1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines an instruction selector for the NVPTX target. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "NVPTXISelDAGToDAG.h" 15 #include "llvm/IR/GlobalValue.h" 16 #include "llvm/IR/Instructions.h" 17 #include "llvm/Support/CommandLine.h" 18 #include "llvm/Support/Debug.h" 19 #include "llvm/Support/ErrorHandling.h" 20 #include "llvm/Support/raw_ostream.h" 21 #include "llvm/Target/TargetIntrinsicInfo.h" 22 23 using namespace llvm; 24 25 #define DEBUG_TYPE "nvptx-isel" 26 27 static cl::opt<int> UsePrecDivF32( 28 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden, 29 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" 30 " IEEE Compliant F32 div.rnd if available."), 31 cl::init(2)); 32 33 static cl::opt<bool> 34 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden, 35 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), 36 cl::init(true)); 37 38 static cl::opt<bool> 39 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden, 40 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."), 41 cl::init(false)); 42 43 44 /// createNVPTXISelDag - This pass converts a legalized DAG into a 45 /// NVPTX-specific DAG, ready for instruction scheduling. 46 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM, 47 llvm::CodeGenOpt::Level OptLevel) { 48 return new NVPTXDAGToDAGISel(TM, OptLevel); 49 } 50 51 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, 52 CodeGenOpt::Level OptLevel) 53 : SelectionDAGISel(tm, OptLevel), TM(tm) { 54 doMulWide = (OptLevel > 0); 55 } 56 57 bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 58 Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget()); 59 return SelectionDAGISel::runOnMachineFunction(MF); 60 } 61 62 int NVPTXDAGToDAGISel::getDivF32Level() const { 63 if (UsePrecDivF32.getNumOccurrences() > 0) { 64 // If nvptx-prec-div32=N is used on the command-line, always honor it 65 return UsePrecDivF32; 66 } else { 67 // Otherwise, use div.approx if fast math is enabled 68 if (TM.Options.UnsafeFPMath) 69 return 0; 70 else 71 return 2; 72 } 73 } 74 75 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const { 76 if (UsePrecSqrtF32.getNumOccurrences() > 0) { 77 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it 78 return UsePrecSqrtF32; 79 } else { 80 // Otherwise, use sqrt.approx if fast math is enabled 81 return !TM.Options.UnsafeFPMath; 82 } 83 } 84 85 bool NVPTXDAGToDAGISel::useF32FTZ() const { 86 if (FtzEnabled.getNumOccurrences() > 0) { 87 // If nvptx-f32ftz is used on the command-line, always honor it 88 return FtzEnabled; 89 } else { 90 const Function *F = MF->getFunction(); 91 // Otherwise, check for an nvptx-f32ftz attribute on the function 92 if (F->hasFnAttribute("nvptx-f32ftz")) 93 return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true"; 94 else 95 return false; 96 } 97 } 98 99 bool NVPTXDAGToDAGISel::allowFMA() const { 100 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering(); 101 return TL->allowFMA(*MF, OptLevel); 102 } 103 104 /// Select - Select instructions not customized! Used for 105 /// expanded, promoted and normal instructions. 106 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) { 107 108 if (N->isMachineOpcode()) { 109 N->setNodeId(-1); 110 return nullptr; // Already selected. 111 } 112 113 SDNode *ResNode = nullptr; 114 switch (N->getOpcode()) { 115 case ISD::LOAD: 116 ResNode = SelectLoad(N); 117 break; 118 case ISD::STORE: 119 ResNode = SelectStore(N); 120 break; 121 case NVPTXISD::LoadV2: 122 case NVPTXISD::LoadV4: 123 ResNode = SelectLoadVector(N); 124 break; 125 case NVPTXISD::LDGV2: 126 case NVPTXISD::LDGV4: 127 case NVPTXISD::LDUV2: 128 case NVPTXISD::LDUV4: 129 ResNode = SelectLDGLDU(N); 130 break; 131 case NVPTXISD::StoreV2: 132 case NVPTXISD::StoreV4: 133 ResNode = SelectStoreVector(N); 134 break; 135 case NVPTXISD::LoadParam: 136 case NVPTXISD::LoadParamV2: 137 case NVPTXISD::LoadParamV4: 138 ResNode = SelectLoadParam(N); 139 break; 140 case NVPTXISD::StoreRetval: 141 case NVPTXISD::StoreRetvalV2: 142 case NVPTXISD::StoreRetvalV4: 143 ResNode = SelectStoreRetval(N); 144 break; 145 case NVPTXISD::StoreParam: 146 case NVPTXISD::StoreParamV2: 147 case NVPTXISD::StoreParamV4: 148 case NVPTXISD::StoreParamS32: 149 case NVPTXISD::StoreParamU32: 150 ResNode = SelectStoreParam(N); 151 break; 152 case ISD::INTRINSIC_WO_CHAIN: 153 ResNode = SelectIntrinsicNoChain(N); 154 break; 155 case ISD::INTRINSIC_W_CHAIN: 156 ResNode = SelectIntrinsicChain(N); 157 break; 158 case NVPTXISD::Tex1DFloatS32: 159 case NVPTXISD::Tex1DFloatFloat: 160 case NVPTXISD::Tex1DFloatFloatLevel: 161 case NVPTXISD::Tex1DFloatFloatGrad: 162 case NVPTXISD::Tex1DS32S32: 163 case NVPTXISD::Tex1DS32Float: 164 case NVPTXISD::Tex1DS32FloatLevel: 165 case NVPTXISD::Tex1DS32FloatGrad: 166 case NVPTXISD::Tex1DU32S32: 167 case NVPTXISD::Tex1DU32Float: 168 case NVPTXISD::Tex1DU32FloatLevel: 169 case NVPTXISD::Tex1DU32FloatGrad: 170 case NVPTXISD::Tex1DArrayFloatS32: 171 case NVPTXISD::Tex1DArrayFloatFloat: 172 case NVPTXISD::Tex1DArrayFloatFloatLevel: 173 case NVPTXISD::Tex1DArrayFloatFloatGrad: 174 case NVPTXISD::Tex1DArrayS32S32: 175 case NVPTXISD::Tex1DArrayS32Float: 176 case NVPTXISD::Tex1DArrayS32FloatLevel: 177 case NVPTXISD::Tex1DArrayS32FloatGrad: 178 case NVPTXISD::Tex1DArrayU32S32: 179 case NVPTXISD::Tex1DArrayU32Float: 180 case NVPTXISD::Tex1DArrayU32FloatLevel: 181 case NVPTXISD::Tex1DArrayU32FloatGrad: 182 case NVPTXISD::Tex2DFloatS32: 183 case NVPTXISD::Tex2DFloatFloat: 184 case NVPTXISD::Tex2DFloatFloatLevel: 185 case NVPTXISD::Tex2DFloatFloatGrad: 186 case NVPTXISD::Tex2DS32S32: 187 case NVPTXISD::Tex2DS32Float: 188 case NVPTXISD::Tex2DS32FloatLevel: 189 case NVPTXISD::Tex2DS32FloatGrad: 190 case NVPTXISD::Tex2DU32S32: 191 case NVPTXISD::Tex2DU32Float: 192 case NVPTXISD::Tex2DU32FloatLevel: 193 case NVPTXISD::Tex2DU32FloatGrad: 194 case NVPTXISD::Tex2DArrayFloatS32: 195 case NVPTXISD::Tex2DArrayFloatFloat: 196 case NVPTXISD::Tex2DArrayFloatFloatLevel: 197 case NVPTXISD::Tex2DArrayFloatFloatGrad: 198 case NVPTXISD::Tex2DArrayS32S32: 199 case NVPTXISD::Tex2DArrayS32Float: 200 case NVPTXISD::Tex2DArrayS32FloatLevel: 201 case NVPTXISD::Tex2DArrayS32FloatGrad: 202 case NVPTXISD::Tex2DArrayU32S32: 203 case NVPTXISD::Tex2DArrayU32Float: 204 case NVPTXISD::Tex2DArrayU32FloatLevel: 205 case NVPTXISD::Tex2DArrayU32FloatGrad: 206 case NVPTXISD::Tex3DFloatS32: 207 case NVPTXISD::Tex3DFloatFloat: 208 case NVPTXISD::Tex3DFloatFloatLevel: 209 case NVPTXISD::Tex3DFloatFloatGrad: 210 case NVPTXISD::Tex3DS32S32: 211 case NVPTXISD::Tex3DS32Float: 212 case NVPTXISD::Tex3DS32FloatLevel: 213 case NVPTXISD::Tex3DS32FloatGrad: 214 case NVPTXISD::Tex3DU32S32: 215 case NVPTXISD::Tex3DU32Float: 216 case NVPTXISD::Tex3DU32FloatLevel: 217 case NVPTXISD::Tex3DU32FloatGrad: 218 case NVPTXISD::TexCubeFloatFloat: 219 case NVPTXISD::TexCubeFloatFloatLevel: 220 case NVPTXISD::TexCubeS32Float: 221 case NVPTXISD::TexCubeS32FloatLevel: 222 case NVPTXISD::TexCubeU32Float: 223 case NVPTXISD::TexCubeU32FloatLevel: 224 case NVPTXISD::TexCubeArrayFloatFloat: 225 case NVPTXISD::TexCubeArrayFloatFloatLevel: 226 case NVPTXISD::TexCubeArrayS32Float: 227 case NVPTXISD::TexCubeArrayS32FloatLevel: 228 case NVPTXISD::TexCubeArrayU32Float: 229 case NVPTXISD::TexCubeArrayU32FloatLevel: 230 case NVPTXISD::Tld4R2DFloatFloat: 231 case NVPTXISD::Tld4G2DFloatFloat: 232 case NVPTXISD::Tld4B2DFloatFloat: 233 case NVPTXISD::Tld4A2DFloatFloat: 234 case NVPTXISD::Tld4R2DS64Float: 235 case NVPTXISD::Tld4G2DS64Float: 236 case NVPTXISD::Tld4B2DS64Float: 237 case NVPTXISD::Tld4A2DS64Float: 238 case NVPTXISD::Tld4R2DU64Float: 239 case NVPTXISD::Tld4G2DU64Float: 240 case NVPTXISD::Tld4B2DU64Float: 241 case NVPTXISD::Tld4A2DU64Float: 242 case NVPTXISD::TexUnified1DFloatS32: 243 case NVPTXISD::TexUnified1DFloatFloat: 244 case NVPTXISD::TexUnified1DFloatFloatLevel: 245 case NVPTXISD::TexUnified1DFloatFloatGrad: 246 case NVPTXISD::TexUnified1DS32S32: 247 case NVPTXISD::TexUnified1DS32Float: 248 case NVPTXISD::TexUnified1DS32FloatLevel: 249 case NVPTXISD::TexUnified1DS32FloatGrad: 250 case NVPTXISD::TexUnified1DU32S32: 251 case NVPTXISD::TexUnified1DU32Float: 252 case NVPTXISD::TexUnified1DU32FloatLevel: 253 case NVPTXISD::TexUnified1DU32FloatGrad: 254 case NVPTXISD::TexUnified1DArrayFloatS32: 255 case NVPTXISD::TexUnified1DArrayFloatFloat: 256 case NVPTXISD::TexUnified1DArrayFloatFloatLevel: 257 case NVPTXISD::TexUnified1DArrayFloatFloatGrad: 258 case NVPTXISD::TexUnified1DArrayS32S32: 259 case NVPTXISD::TexUnified1DArrayS32Float: 260 case NVPTXISD::TexUnified1DArrayS32FloatLevel: 261 case NVPTXISD::TexUnified1DArrayS32FloatGrad: 262 case NVPTXISD::TexUnified1DArrayU32S32: 263 case NVPTXISD::TexUnified1DArrayU32Float: 264 case NVPTXISD::TexUnified1DArrayU32FloatLevel: 265 case NVPTXISD::TexUnified1DArrayU32FloatGrad: 266 case NVPTXISD::TexUnified2DFloatS32: 267 case NVPTXISD::TexUnified2DFloatFloat: 268 case NVPTXISD::TexUnified2DFloatFloatLevel: 269 case NVPTXISD::TexUnified2DFloatFloatGrad: 270 case NVPTXISD::TexUnified2DS32S32: 271 case NVPTXISD::TexUnified2DS32Float: 272 case NVPTXISD::TexUnified2DS32FloatLevel: 273 case NVPTXISD::TexUnified2DS32FloatGrad: 274 case NVPTXISD::TexUnified2DU32S32: 275 case NVPTXISD::TexUnified2DU32Float: 276 case NVPTXISD::TexUnified2DU32FloatLevel: 277 case NVPTXISD::TexUnified2DU32FloatGrad: 278 case NVPTXISD::TexUnified2DArrayFloatS32: 279 case NVPTXISD::TexUnified2DArrayFloatFloat: 280 case NVPTXISD::TexUnified2DArrayFloatFloatLevel: 281 case NVPTXISD::TexUnified2DArrayFloatFloatGrad: 282 case NVPTXISD::TexUnified2DArrayS32S32: 283 case NVPTXISD::TexUnified2DArrayS32Float: 284 case NVPTXISD::TexUnified2DArrayS32FloatLevel: 285 case NVPTXISD::TexUnified2DArrayS32FloatGrad: 286 case NVPTXISD::TexUnified2DArrayU32S32: 287 case NVPTXISD::TexUnified2DArrayU32Float: 288 case NVPTXISD::TexUnified2DArrayU32FloatLevel: 289 case NVPTXISD::TexUnified2DArrayU32FloatGrad: 290 case NVPTXISD::TexUnified3DFloatS32: 291 case NVPTXISD::TexUnified3DFloatFloat: 292 case NVPTXISD::TexUnified3DFloatFloatLevel: 293 case NVPTXISD::TexUnified3DFloatFloatGrad: 294 case NVPTXISD::TexUnified3DS32S32: 295 case NVPTXISD::TexUnified3DS32Float: 296 case NVPTXISD::TexUnified3DS32FloatLevel: 297 case NVPTXISD::TexUnified3DS32FloatGrad: 298 case NVPTXISD::TexUnified3DU32S32: 299 case NVPTXISD::TexUnified3DU32Float: 300 case NVPTXISD::TexUnified3DU32FloatLevel: 301 case NVPTXISD::TexUnified3DU32FloatGrad: 302 case NVPTXISD::TexUnifiedCubeFloatFloat: 303 case NVPTXISD::TexUnifiedCubeFloatFloatLevel: 304 case NVPTXISD::TexUnifiedCubeS32Float: 305 case NVPTXISD::TexUnifiedCubeS32FloatLevel: 306 case NVPTXISD::TexUnifiedCubeU32Float: 307 case NVPTXISD::TexUnifiedCubeU32FloatLevel: 308 case NVPTXISD::TexUnifiedCubeArrayFloatFloat: 309 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: 310 case NVPTXISD::TexUnifiedCubeArrayS32Float: 311 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: 312 case NVPTXISD::TexUnifiedCubeArrayU32Float: 313 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: 314 case NVPTXISD::Tld4UnifiedR2DFloatFloat: 315 case NVPTXISD::Tld4UnifiedG2DFloatFloat: 316 case NVPTXISD::Tld4UnifiedB2DFloatFloat: 317 case NVPTXISD::Tld4UnifiedA2DFloatFloat: 318 case NVPTXISD::Tld4UnifiedR2DS64Float: 319 case NVPTXISD::Tld4UnifiedG2DS64Float: 320 case NVPTXISD::Tld4UnifiedB2DS64Float: 321 case NVPTXISD::Tld4UnifiedA2DS64Float: 322 case NVPTXISD::Tld4UnifiedR2DU64Float: 323 case NVPTXISD::Tld4UnifiedG2DU64Float: 324 case NVPTXISD::Tld4UnifiedB2DU64Float: 325 case NVPTXISD::Tld4UnifiedA2DU64Float: 326 ResNode = SelectTextureIntrinsic(N); 327 break; 328 case NVPTXISD::Suld1DI8Clamp: 329 case NVPTXISD::Suld1DI16Clamp: 330 case NVPTXISD::Suld1DI32Clamp: 331 case NVPTXISD::Suld1DI64Clamp: 332 case NVPTXISD::Suld1DV2I8Clamp: 333 case NVPTXISD::Suld1DV2I16Clamp: 334 case NVPTXISD::Suld1DV2I32Clamp: 335 case NVPTXISD::Suld1DV2I64Clamp: 336 case NVPTXISD::Suld1DV4I8Clamp: 337 case NVPTXISD::Suld1DV4I16Clamp: 338 case NVPTXISD::Suld1DV4I32Clamp: 339 case NVPTXISD::Suld1DArrayI8Clamp: 340 case NVPTXISD::Suld1DArrayI16Clamp: 341 case NVPTXISD::Suld1DArrayI32Clamp: 342 case NVPTXISD::Suld1DArrayI64Clamp: 343 case NVPTXISD::Suld1DArrayV2I8Clamp: 344 case NVPTXISD::Suld1DArrayV2I16Clamp: 345 case NVPTXISD::Suld1DArrayV2I32Clamp: 346 case NVPTXISD::Suld1DArrayV2I64Clamp: 347 case NVPTXISD::Suld1DArrayV4I8Clamp: 348 case NVPTXISD::Suld1DArrayV4I16Clamp: 349 case NVPTXISD::Suld1DArrayV4I32Clamp: 350 case NVPTXISD::Suld2DI8Clamp: 351 case NVPTXISD::Suld2DI16Clamp: 352 case NVPTXISD::Suld2DI32Clamp: 353 case NVPTXISD::Suld2DI64Clamp: 354 case NVPTXISD::Suld2DV2I8Clamp: 355 case NVPTXISD::Suld2DV2I16Clamp: 356 case NVPTXISD::Suld2DV2I32Clamp: 357 case NVPTXISD::Suld2DV2I64Clamp: 358 case NVPTXISD::Suld2DV4I8Clamp: 359 case NVPTXISD::Suld2DV4I16Clamp: 360 case NVPTXISD::Suld2DV4I32Clamp: 361 case NVPTXISD::Suld2DArrayI8Clamp: 362 case NVPTXISD::Suld2DArrayI16Clamp: 363 case NVPTXISD::Suld2DArrayI32Clamp: 364 case NVPTXISD::Suld2DArrayI64Clamp: 365 case NVPTXISD::Suld2DArrayV2I8Clamp: 366 case NVPTXISD::Suld2DArrayV2I16Clamp: 367 case NVPTXISD::Suld2DArrayV2I32Clamp: 368 case NVPTXISD::Suld2DArrayV2I64Clamp: 369 case NVPTXISD::Suld2DArrayV4I8Clamp: 370 case NVPTXISD::Suld2DArrayV4I16Clamp: 371 case NVPTXISD::Suld2DArrayV4I32Clamp: 372 case NVPTXISD::Suld3DI8Clamp: 373 case NVPTXISD::Suld3DI16Clamp: 374 case NVPTXISD::Suld3DI32Clamp: 375 case NVPTXISD::Suld3DI64Clamp: 376 case NVPTXISD::Suld3DV2I8Clamp: 377 case NVPTXISD::Suld3DV2I16Clamp: 378 case NVPTXISD::Suld3DV2I32Clamp: 379 case NVPTXISD::Suld3DV2I64Clamp: 380 case NVPTXISD::Suld3DV4I8Clamp: 381 case NVPTXISD::Suld3DV4I16Clamp: 382 case NVPTXISD::Suld3DV4I32Clamp: 383 case NVPTXISD::Suld1DI8Trap: 384 case NVPTXISD::Suld1DI16Trap: 385 case NVPTXISD::Suld1DI32Trap: 386 case NVPTXISD::Suld1DI64Trap: 387 case NVPTXISD::Suld1DV2I8Trap: 388 case NVPTXISD::Suld1DV2I16Trap: 389 case NVPTXISD::Suld1DV2I32Trap: 390 case NVPTXISD::Suld1DV2I64Trap: 391 case NVPTXISD::Suld1DV4I8Trap: 392 case NVPTXISD::Suld1DV4I16Trap: 393 case NVPTXISD::Suld1DV4I32Trap: 394 case NVPTXISD::Suld1DArrayI8Trap: 395 case NVPTXISD::Suld1DArrayI16Trap: 396 case NVPTXISD::Suld1DArrayI32Trap: 397 case NVPTXISD::Suld1DArrayI64Trap: 398 case NVPTXISD::Suld1DArrayV2I8Trap: 399 case NVPTXISD::Suld1DArrayV2I16Trap: 400 case NVPTXISD::Suld1DArrayV2I32Trap: 401 case NVPTXISD::Suld1DArrayV2I64Trap: 402 case NVPTXISD::Suld1DArrayV4I8Trap: 403 case NVPTXISD::Suld1DArrayV4I16Trap: 404 case NVPTXISD::Suld1DArrayV4I32Trap: 405 case NVPTXISD::Suld2DI8Trap: 406 case NVPTXISD::Suld2DI16Trap: 407 case NVPTXISD::Suld2DI32Trap: 408 case NVPTXISD::Suld2DI64Trap: 409 case NVPTXISD::Suld2DV2I8Trap: 410 case NVPTXISD::Suld2DV2I16Trap: 411 case NVPTXISD::Suld2DV2I32Trap: 412 case NVPTXISD::Suld2DV2I64Trap: 413 case NVPTXISD::Suld2DV4I8Trap: 414 case NVPTXISD::Suld2DV4I16Trap: 415 case NVPTXISD::Suld2DV4I32Trap: 416 case NVPTXISD::Suld2DArrayI8Trap: 417 case NVPTXISD::Suld2DArrayI16Trap: 418 case NVPTXISD::Suld2DArrayI32Trap: 419 case NVPTXISD::Suld2DArrayI64Trap: 420 case NVPTXISD::Suld2DArrayV2I8Trap: 421 case NVPTXISD::Suld2DArrayV2I16Trap: 422 case NVPTXISD::Suld2DArrayV2I32Trap: 423 case NVPTXISD::Suld2DArrayV2I64Trap: 424 case NVPTXISD::Suld2DArrayV4I8Trap: 425 case NVPTXISD::Suld2DArrayV4I16Trap: 426 case NVPTXISD::Suld2DArrayV4I32Trap: 427 case NVPTXISD::Suld3DI8Trap: 428 case NVPTXISD::Suld3DI16Trap: 429 case NVPTXISD::Suld3DI32Trap: 430 case NVPTXISD::Suld3DI64Trap: 431 case NVPTXISD::Suld3DV2I8Trap: 432 case NVPTXISD::Suld3DV2I16Trap: 433 case NVPTXISD::Suld3DV2I32Trap: 434 case NVPTXISD::Suld3DV2I64Trap: 435 case NVPTXISD::Suld3DV4I8Trap: 436 case NVPTXISD::Suld3DV4I16Trap: 437 case NVPTXISD::Suld3DV4I32Trap: 438 case NVPTXISD::Suld1DI8Zero: 439 case NVPTXISD::Suld1DI16Zero: 440 case NVPTXISD::Suld1DI32Zero: 441 case NVPTXISD::Suld1DI64Zero: 442 case NVPTXISD::Suld1DV2I8Zero: 443 case NVPTXISD::Suld1DV2I16Zero: 444 case NVPTXISD::Suld1DV2I32Zero: 445 case NVPTXISD::Suld1DV2I64Zero: 446 case NVPTXISD::Suld1DV4I8Zero: 447 case NVPTXISD::Suld1DV4I16Zero: 448 case NVPTXISD::Suld1DV4I32Zero: 449 case NVPTXISD::Suld1DArrayI8Zero: 450 case NVPTXISD::Suld1DArrayI16Zero: 451 case NVPTXISD::Suld1DArrayI32Zero: 452 case NVPTXISD::Suld1DArrayI64Zero: 453 case NVPTXISD::Suld1DArrayV2I8Zero: 454 case NVPTXISD::Suld1DArrayV2I16Zero: 455 case NVPTXISD::Suld1DArrayV2I32Zero: 456 case NVPTXISD::Suld1DArrayV2I64Zero: 457 case NVPTXISD::Suld1DArrayV4I8Zero: 458 case NVPTXISD::Suld1DArrayV4I16Zero: 459 case NVPTXISD::Suld1DArrayV4I32Zero: 460 case NVPTXISD::Suld2DI8Zero: 461 case NVPTXISD::Suld2DI16Zero: 462 case NVPTXISD::Suld2DI32Zero: 463 case NVPTXISD::Suld2DI64Zero: 464 case NVPTXISD::Suld2DV2I8Zero: 465 case NVPTXISD::Suld2DV2I16Zero: 466 case NVPTXISD::Suld2DV2I32Zero: 467 case NVPTXISD::Suld2DV2I64Zero: 468 case NVPTXISD::Suld2DV4I8Zero: 469 case NVPTXISD::Suld2DV4I16Zero: 470 case NVPTXISD::Suld2DV4I32Zero: 471 case NVPTXISD::Suld2DArrayI8Zero: 472 case NVPTXISD::Suld2DArrayI16Zero: 473 case NVPTXISD::Suld2DArrayI32Zero: 474 case NVPTXISD::Suld2DArrayI64Zero: 475 case NVPTXISD::Suld2DArrayV2I8Zero: 476 case NVPTXISD::Suld2DArrayV2I16Zero: 477 case NVPTXISD::Suld2DArrayV2I32Zero: 478 case NVPTXISD::Suld2DArrayV2I64Zero: 479 case NVPTXISD::Suld2DArrayV4I8Zero: 480 case NVPTXISD::Suld2DArrayV4I16Zero: 481 case NVPTXISD::Suld2DArrayV4I32Zero: 482 case NVPTXISD::Suld3DI8Zero: 483 case NVPTXISD::Suld3DI16Zero: 484 case NVPTXISD::Suld3DI32Zero: 485 case NVPTXISD::Suld3DI64Zero: 486 case NVPTXISD::Suld3DV2I8Zero: 487 case NVPTXISD::Suld3DV2I16Zero: 488 case NVPTXISD::Suld3DV2I32Zero: 489 case NVPTXISD::Suld3DV2I64Zero: 490 case NVPTXISD::Suld3DV4I8Zero: 491 case NVPTXISD::Suld3DV4I16Zero: 492 case NVPTXISD::Suld3DV4I32Zero: 493 ResNode = SelectSurfaceIntrinsic(N); 494 break; 495 case ISD::AND: 496 case ISD::SRA: 497 case ISD::SRL: 498 // Try to select BFE 499 ResNode = SelectBFE(N); 500 break; 501 case ISD::ADDRSPACECAST: 502 ResNode = SelectAddrSpaceCast(N); 503 break; 504 default: 505 break; 506 } 507 if (ResNode) 508 return ResNode; 509 return SelectCode(N); 510 } 511 512 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) { 513 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 514 switch (IID) { 515 default: 516 return NULL; 517 case Intrinsic::nvvm_ldg_global_f: 518 case Intrinsic::nvvm_ldg_global_i: 519 case Intrinsic::nvvm_ldg_global_p: 520 case Intrinsic::nvvm_ldu_global_f: 521 case Intrinsic::nvvm_ldu_global_i: 522 case Intrinsic::nvvm_ldu_global_p: 523 return SelectLDGLDU(N); 524 } 525 } 526 527 static unsigned int getCodeAddrSpace(MemSDNode *N) { 528 const Value *Src = N->getMemOperand()->getValue(); 529 530 if (!Src) 531 return NVPTX::PTXLdStInstCode::GENERIC; 532 533 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) { 534 switch (PT->getAddressSpace()) { 535 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL; 536 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL; 537 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED; 538 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC; 539 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM; 540 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT; 541 default: break; 542 } 543 } 544 return NVPTX::PTXLdStInstCode::GENERIC; 545 } 546 547 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) { 548 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 549 switch (IID) { 550 default: 551 return nullptr; 552 case Intrinsic::nvvm_texsurf_handle_internal: 553 return SelectTexSurfHandle(N); 554 } 555 } 556 557 SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) { 558 // Op 0 is the intrinsic ID 559 SDValue Wrapper = N->getOperand(1); 560 SDValue GlobalVal = Wrapper.getOperand(0); 561 return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64, 562 GlobalVal); 563 } 564 565 SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { 566 SDValue Src = N->getOperand(0); 567 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N); 568 unsigned SrcAddrSpace = CastN->getSrcAddressSpace(); 569 unsigned DstAddrSpace = CastN->getDestAddressSpace(); 570 571 assert(SrcAddrSpace != DstAddrSpace && 572 "addrspacecast must be between different address spaces"); 573 574 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) { 575 // Specific to generic 576 unsigned Opc; 577 switch (SrcAddrSpace) { 578 default: report_fatal_error("Bad address space in addrspacecast"); 579 case ADDRESS_SPACE_GLOBAL: 580 Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes; 581 break; 582 case ADDRESS_SPACE_SHARED: 583 Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes; 584 break; 585 case ADDRESS_SPACE_CONST: 586 Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes; 587 break; 588 case ADDRESS_SPACE_LOCAL: 589 Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes; 590 break; 591 } 592 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src); 593 } else { 594 // Generic to specific 595 if (SrcAddrSpace != 0) 596 report_fatal_error("Cannot cast between two non-generic address spaces"); 597 unsigned Opc; 598 switch (DstAddrSpace) { 599 default: report_fatal_error("Bad address space in addrspacecast"); 600 case ADDRESS_SPACE_GLOBAL: 601 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64 602 : NVPTX::cvta_to_global_yes; 603 break; 604 case ADDRESS_SPACE_SHARED: 605 Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64 606 : NVPTX::cvta_to_shared_yes; 607 break; 608 case ADDRESS_SPACE_CONST: 609 Opc = 610 TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes; 611 break; 612 case ADDRESS_SPACE_LOCAL: 613 Opc = 614 TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes; 615 break; 616 } 617 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src); 618 } 619 } 620 621 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { 622 SDLoc dl(N); 623 LoadSDNode *LD = cast<LoadSDNode>(N); 624 EVT LoadedVT = LD->getMemoryVT(); 625 SDNode *NVPTXLD = nullptr; 626 627 // do not support pre/post inc/dec 628 if (LD->isIndexed()) 629 return nullptr; 630 631 if (!LoadedVT.isSimple()) 632 return nullptr; 633 634 // Address Space Setting 635 unsigned int codeAddrSpace = getCodeAddrSpace(LD); 636 637 // Volatile Setting 638 // - .volatile is only availalble for .global and .shared 639 bool isVolatile = LD->isVolatile(); 640 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 641 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 642 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 643 isVolatile = false; 644 645 // Vector Setting 646 MVT SimpleVT = LoadedVT.getSimpleVT(); 647 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; 648 if (SimpleVT.isVector()) { 649 unsigned num = SimpleVT.getVectorNumElements(); 650 if (num == 2) 651 vecType = NVPTX::PTXLdStInstCode::V2; 652 else if (num == 4) 653 vecType = NVPTX::PTXLdStInstCode::V4; 654 else 655 return nullptr; 656 } 657 658 // Type Setting: fromType + fromTypeWidth 659 // 660 // Sign : ISD::SEXTLOAD 661 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the 662 // type is integer 663 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float 664 MVT ScalarVT = SimpleVT.getScalarType(); 665 // Read at least 8 bits (predicates are stored as 8-bit values) 666 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits()); 667 unsigned int fromType; 668 if ((LD->getExtensionType() == ISD::SEXTLOAD)) 669 fromType = NVPTX::PTXLdStInstCode::Signed; 670 else if (ScalarVT.isFloatingPoint()) 671 fromType = NVPTX::PTXLdStInstCode::Float; 672 else 673 fromType = NVPTX::PTXLdStInstCode::Unsigned; 674 675 // Create the machine instruction DAG 676 SDValue Chain = N->getOperand(0); 677 SDValue N1 = N->getOperand(1); 678 SDValue Addr; 679 SDValue Offset, Base; 680 unsigned Opcode; 681 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy; 682 683 if (SelectDirectAddr(N1, Addr)) { 684 switch (TargetVT) { 685 case MVT::i8: 686 Opcode = NVPTX::LD_i8_avar; 687 break; 688 case MVT::i16: 689 Opcode = NVPTX::LD_i16_avar; 690 break; 691 case MVT::i32: 692 Opcode = NVPTX::LD_i32_avar; 693 break; 694 case MVT::i64: 695 Opcode = NVPTX::LD_i64_avar; 696 break; 697 case MVT::f32: 698 Opcode = NVPTX::LD_f32_avar; 699 break; 700 case MVT::f64: 701 Opcode = NVPTX::LD_f64_avar; 702 break; 703 default: 704 return nullptr; 705 } 706 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), 707 getI32Imm(vecType), getI32Imm(fromType), 708 getI32Imm(fromTypeWidth), Addr, Chain }; 709 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); 710 } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset) 711 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) { 712 switch (TargetVT) { 713 case MVT::i8: 714 Opcode = NVPTX::LD_i8_asi; 715 break; 716 case MVT::i16: 717 Opcode = NVPTX::LD_i16_asi; 718 break; 719 case MVT::i32: 720 Opcode = NVPTX::LD_i32_asi; 721 break; 722 case MVT::i64: 723 Opcode = NVPTX::LD_i64_asi; 724 break; 725 case MVT::f32: 726 Opcode = NVPTX::LD_f32_asi; 727 break; 728 case MVT::f64: 729 Opcode = NVPTX::LD_f64_asi; 730 break; 731 default: 732 return nullptr; 733 } 734 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), 735 getI32Imm(vecType), getI32Imm(fromType), 736 getI32Imm(fromTypeWidth), Base, Offset, Chain }; 737 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); 738 } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset) 739 : SelectADDRri(N1.getNode(), N1, Base, Offset)) { 740 if (TM.is64Bit()) { 741 switch (TargetVT) { 742 case MVT::i8: 743 Opcode = NVPTX::LD_i8_ari_64; 744 break; 745 case MVT::i16: 746 Opcode = NVPTX::LD_i16_ari_64; 747 break; 748 case MVT::i32: 749 Opcode = NVPTX::LD_i32_ari_64; 750 break; 751 case MVT::i64: 752 Opcode = NVPTX::LD_i64_ari_64; 753 break; 754 case MVT::f32: 755 Opcode = NVPTX::LD_f32_ari_64; 756 break; 757 case MVT::f64: 758 Opcode = NVPTX::LD_f64_ari_64; 759 break; 760 default: 761 return nullptr; 762 } 763 } else { 764 switch (TargetVT) { 765 case MVT::i8: 766 Opcode = NVPTX::LD_i8_ari; 767 break; 768 case MVT::i16: 769 Opcode = NVPTX::LD_i16_ari; 770 break; 771 case MVT::i32: 772 Opcode = NVPTX::LD_i32_ari; 773 break; 774 case MVT::i64: 775 Opcode = NVPTX::LD_i64_ari; 776 break; 777 case MVT::f32: 778 Opcode = NVPTX::LD_f32_ari; 779 break; 780 case MVT::f64: 781 Opcode = NVPTX::LD_f64_ari; 782 break; 783 default: 784 return nullptr; 785 } 786 } 787 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), 788 getI32Imm(vecType), getI32Imm(fromType), 789 getI32Imm(fromTypeWidth), Base, Offset, Chain }; 790 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); 791 } else { 792 if (TM.is64Bit()) { 793 switch (TargetVT) { 794 case MVT::i8: 795 Opcode = NVPTX::LD_i8_areg_64; 796 break; 797 case MVT::i16: 798 Opcode = NVPTX::LD_i16_areg_64; 799 break; 800 case MVT::i32: 801 Opcode = NVPTX::LD_i32_areg_64; 802 break; 803 case MVT::i64: 804 Opcode = NVPTX::LD_i64_areg_64; 805 break; 806 case MVT::f32: 807 Opcode = NVPTX::LD_f32_areg_64; 808 break; 809 case MVT::f64: 810 Opcode = NVPTX::LD_f64_areg_64; 811 break; 812 default: 813 return nullptr; 814 } 815 } else { 816 switch (TargetVT) { 817 case MVT::i8: 818 Opcode = NVPTX::LD_i8_areg; 819 break; 820 case MVT::i16: 821 Opcode = NVPTX::LD_i16_areg; 822 break; 823 case MVT::i32: 824 Opcode = NVPTX::LD_i32_areg; 825 break; 826 case MVT::i64: 827 Opcode = NVPTX::LD_i64_areg; 828 break; 829 case MVT::f32: 830 Opcode = NVPTX::LD_f32_areg; 831 break; 832 case MVT::f64: 833 Opcode = NVPTX::LD_f64_areg; 834 break; 835 default: 836 return nullptr; 837 } 838 } 839 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), 840 getI32Imm(vecType), getI32Imm(fromType), 841 getI32Imm(fromTypeWidth), N1, Chain }; 842 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); 843 } 844 845 if (NVPTXLD) { 846 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 847 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 848 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1); 849 } 850 851 return NVPTXLD; 852 } 853 854 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { 855 856 SDValue Chain = N->getOperand(0); 857 SDValue Op1 = N->getOperand(1); 858 SDValue Addr, Offset, Base; 859 unsigned Opcode; 860 SDLoc DL(N); 861 SDNode *LD; 862 MemSDNode *MemSD = cast<MemSDNode>(N); 863 EVT LoadedVT = MemSD->getMemoryVT(); 864 865 if (!LoadedVT.isSimple()) 866 return nullptr; 867 868 // Address Space Setting 869 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD); 870 871 // Volatile Setting 872 // - .volatile is only availalble for .global and .shared 873 bool IsVolatile = MemSD->isVolatile(); 874 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 875 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 876 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 877 IsVolatile = false; 878 879 // Vector Setting 880 MVT SimpleVT = LoadedVT.getSimpleVT(); 881 882 // Type Setting: fromType + fromTypeWidth 883 // 884 // Sign : ISD::SEXTLOAD 885 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the 886 // type is integer 887 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float 888 MVT ScalarVT = SimpleVT.getScalarType(); 889 // Read at least 8 bits (predicates are stored as 8-bit values) 890 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits()); 891 unsigned int FromType; 892 // The last operand holds the original LoadSDNode::getExtensionType() value 893 unsigned ExtensionType = cast<ConstantSDNode>( 894 N->getOperand(N->getNumOperands() - 1))->getZExtValue(); 895 if (ExtensionType == ISD::SEXTLOAD) 896 FromType = NVPTX::PTXLdStInstCode::Signed; 897 else if (ScalarVT.isFloatingPoint()) 898 FromType = NVPTX::PTXLdStInstCode::Float; 899 else 900 FromType = NVPTX::PTXLdStInstCode::Unsigned; 901 902 unsigned VecType; 903 904 switch (N->getOpcode()) { 905 case NVPTXISD::LoadV2: 906 VecType = NVPTX::PTXLdStInstCode::V2; 907 break; 908 case NVPTXISD::LoadV4: 909 VecType = NVPTX::PTXLdStInstCode::V4; 910 break; 911 default: 912 return nullptr; 913 } 914 915 EVT EltVT = N->getValueType(0); 916 917 if (SelectDirectAddr(Op1, Addr)) { 918 switch (N->getOpcode()) { 919 default: 920 return nullptr; 921 case NVPTXISD::LoadV2: 922 switch (EltVT.getSimpleVT().SimpleTy) { 923 default: 924 return nullptr; 925 case MVT::i8: 926 Opcode = NVPTX::LDV_i8_v2_avar; 927 break; 928 case MVT::i16: 929 Opcode = NVPTX::LDV_i16_v2_avar; 930 break; 931 case MVT::i32: 932 Opcode = NVPTX::LDV_i32_v2_avar; 933 break; 934 case MVT::i64: 935 Opcode = NVPTX::LDV_i64_v2_avar; 936 break; 937 case MVT::f32: 938 Opcode = NVPTX::LDV_f32_v2_avar; 939 break; 940 case MVT::f64: 941 Opcode = NVPTX::LDV_f64_v2_avar; 942 break; 943 } 944 break; 945 case NVPTXISD::LoadV4: 946 switch (EltVT.getSimpleVT().SimpleTy) { 947 default: 948 return nullptr; 949 case MVT::i8: 950 Opcode = NVPTX::LDV_i8_v4_avar; 951 break; 952 case MVT::i16: 953 Opcode = NVPTX::LDV_i16_v4_avar; 954 break; 955 case MVT::i32: 956 Opcode = NVPTX::LDV_i32_v4_avar; 957 break; 958 case MVT::f32: 959 Opcode = NVPTX::LDV_f32_v4_avar; 960 break; 961 } 962 break; 963 } 964 965 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), 966 getI32Imm(VecType), getI32Imm(FromType), 967 getI32Imm(FromTypeWidth), Addr, Chain }; 968 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 969 } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset) 970 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { 971 switch (N->getOpcode()) { 972 default: 973 return nullptr; 974 case NVPTXISD::LoadV2: 975 switch (EltVT.getSimpleVT().SimpleTy) { 976 default: 977 return nullptr; 978 case MVT::i8: 979 Opcode = NVPTX::LDV_i8_v2_asi; 980 break; 981 case MVT::i16: 982 Opcode = NVPTX::LDV_i16_v2_asi; 983 break; 984 case MVT::i32: 985 Opcode = NVPTX::LDV_i32_v2_asi; 986 break; 987 case MVT::i64: 988 Opcode = NVPTX::LDV_i64_v2_asi; 989 break; 990 case MVT::f32: 991 Opcode = NVPTX::LDV_f32_v2_asi; 992 break; 993 case MVT::f64: 994 Opcode = NVPTX::LDV_f64_v2_asi; 995 break; 996 } 997 break; 998 case NVPTXISD::LoadV4: 999 switch (EltVT.getSimpleVT().SimpleTy) { 1000 default: 1001 return nullptr; 1002 case MVT::i8: 1003 Opcode = NVPTX::LDV_i8_v4_asi; 1004 break; 1005 case MVT::i16: 1006 Opcode = NVPTX::LDV_i16_v4_asi; 1007 break; 1008 case MVT::i32: 1009 Opcode = NVPTX::LDV_i32_v4_asi; 1010 break; 1011 case MVT::f32: 1012 Opcode = NVPTX::LDV_f32_v4_asi; 1013 break; 1014 } 1015 break; 1016 } 1017 1018 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), 1019 getI32Imm(VecType), getI32Imm(FromType), 1020 getI32Imm(FromTypeWidth), Base, Offset, Chain }; 1021 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 1022 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) 1023 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { 1024 if (TM.is64Bit()) { 1025 switch (N->getOpcode()) { 1026 default: 1027 return nullptr; 1028 case NVPTXISD::LoadV2: 1029 switch (EltVT.getSimpleVT().SimpleTy) { 1030 default: 1031 return nullptr; 1032 case MVT::i8: 1033 Opcode = NVPTX::LDV_i8_v2_ari_64; 1034 break; 1035 case MVT::i16: 1036 Opcode = NVPTX::LDV_i16_v2_ari_64; 1037 break; 1038 case MVT::i32: 1039 Opcode = NVPTX::LDV_i32_v2_ari_64; 1040 break; 1041 case MVT::i64: 1042 Opcode = NVPTX::LDV_i64_v2_ari_64; 1043 break; 1044 case MVT::f32: 1045 Opcode = NVPTX::LDV_f32_v2_ari_64; 1046 break; 1047 case MVT::f64: 1048 Opcode = NVPTX::LDV_f64_v2_ari_64; 1049 break; 1050 } 1051 break; 1052 case NVPTXISD::LoadV4: 1053 switch (EltVT.getSimpleVT().SimpleTy) { 1054 default: 1055 return nullptr; 1056 case MVT::i8: 1057 Opcode = NVPTX::LDV_i8_v4_ari_64; 1058 break; 1059 case MVT::i16: 1060 Opcode = NVPTX::LDV_i16_v4_ari_64; 1061 break; 1062 case MVT::i32: 1063 Opcode = NVPTX::LDV_i32_v4_ari_64; 1064 break; 1065 case MVT::f32: 1066 Opcode = NVPTX::LDV_f32_v4_ari_64; 1067 break; 1068 } 1069 break; 1070 } 1071 } else { 1072 switch (N->getOpcode()) { 1073 default: 1074 return nullptr; 1075 case NVPTXISD::LoadV2: 1076 switch (EltVT.getSimpleVT().SimpleTy) { 1077 default: 1078 return nullptr; 1079 case MVT::i8: 1080 Opcode = NVPTX::LDV_i8_v2_ari; 1081 break; 1082 case MVT::i16: 1083 Opcode = NVPTX::LDV_i16_v2_ari; 1084 break; 1085 case MVT::i32: 1086 Opcode = NVPTX::LDV_i32_v2_ari; 1087 break; 1088 case MVT::i64: 1089 Opcode = NVPTX::LDV_i64_v2_ari; 1090 break; 1091 case MVT::f32: 1092 Opcode = NVPTX::LDV_f32_v2_ari; 1093 break; 1094 case MVT::f64: 1095 Opcode = NVPTX::LDV_f64_v2_ari; 1096 break; 1097 } 1098 break; 1099 case NVPTXISD::LoadV4: 1100 switch (EltVT.getSimpleVT().SimpleTy) { 1101 default: 1102 return nullptr; 1103 case MVT::i8: 1104 Opcode = NVPTX::LDV_i8_v4_ari; 1105 break; 1106 case MVT::i16: 1107 Opcode = NVPTX::LDV_i16_v4_ari; 1108 break; 1109 case MVT::i32: 1110 Opcode = NVPTX::LDV_i32_v4_ari; 1111 break; 1112 case MVT::f32: 1113 Opcode = NVPTX::LDV_f32_v4_ari; 1114 break; 1115 } 1116 break; 1117 } 1118 } 1119 1120 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), 1121 getI32Imm(VecType), getI32Imm(FromType), 1122 getI32Imm(FromTypeWidth), Base, Offset, Chain }; 1123 1124 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 1125 } else { 1126 if (TM.is64Bit()) { 1127 switch (N->getOpcode()) { 1128 default: 1129 return nullptr; 1130 case NVPTXISD::LoadV2: 1131 switch (EltVT.getSimpleVT().SimpleTy) { 1132 default: 1133 return nullptr; 1134 case MVT::i8: 1135 Opcode = NVPTX::LDV_i8_v2_areg_64; 1136 break; 1137 case MVT::i16: 1138 Opcode = NVPTX::LDV_i16_v2_areg_64; 1139 break; 1140 case MVT::i32: 1141 Opcode = NVPTX::LDV_i32_v2_areg_64; 1142 break; 1143 case MVT::i64: 1144 Opcode = NVPTX::LDV_i64_v2_areg_64; 1145 break; 1146 case MVT::f32: 1147 Opcode = NVPTX::LDV_f32_v2_areg_64; 1148 break; 1149 case MVT::f64: 1150 Opcode = NVPTX::LDV_f64_v2_areg_64; 1151 break; 1152 } 1153 break; 1154 case NVPTXISD::LoadV4: 1155 switch (EltVT.getSimpleVT().SimpleTy) { 1156 default: 1157 return nullptr; 1158 case MVT::i8: 1159 Opcode = NVPTX::LDV_i8_v4_areg_64; 1160 break; 1161 case MVT::i16: 1162 Opcode = NVPTX::LDV_i16_v4_areg_64; 1163 break; 1164 case MVT::i32: 1165 Opcode = NVPTX::LDV_i32_v4_areg_64; 1166 break; 1167 case MVT::f32: 1168 Opcode = NVPTX::LDV_f32_v4_areg_64; 1169 break; 1170 } 1171 break; 1172 } 1173 } else { 1174 switch (N->getOpcode()) { 1175 default: 1176 return nullptr; 1177 case NVPTXISD::LoadV2: 1178 switch (EltVT.getSimpleVT().SimpleTy) { 1179 default: 1180 return nullptr; 1181 case MVT::i8: 1182 Opcode = NVPTX::LDV_i8_v2_areg; 1183 break; 1184 case MVT::i16: 1185 Opcode = NVPTX::LDV_i16_v2_areg; 1186 break; 1187 case MVT::i32: 1188 Opcode = NVPTX::LDV_i32_v2_areg; 1189 break; 1190 case MVT::i64: 1191 Opcode = NVPTX::LDV_i64_v2_areg; 1192 break; 1193 case MVT::f32: 1194 Opcode = NVPTX::LDV_f32_v2_areg; 1195 break; 1196 case MVT::f64: 1197 Opcode = NVPTX::LDV_f64_v2_areg; 1198 break; 1199 } 1200 break; 1201 case NVPTXISD::LoadV4: 1202 switch (EltVT.getSimpleVT().SimpleTy) { 1203 default: 1204 return nullptr; 1205 case MVT::i8: 1206 Opcode = NVPTX::LDV_i8_v4_areg; 1207 break; 1208 case MVT::i16: 1209 Opcode = NVPTX::LDV_i16_v4_areg; 1210 break; 1211 case MVT::i32: 1212 Opcode = NVPTX::LDV_i32_v4_areg; 1213 break; 1214 case MVT::f32: 1215 Opcode = NVPTX::LDV_f32_v4_areg; 1216 break; 1217 } 1218 break; 1219 } 1220 } 1221 1222 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), 1223 getI32Imm(VecType), getI32Imm(FromType), 1224 getI32Imm(FromTypeWidth), Op1, Chain }; 1225 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 1226 } 1227 1228 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 1229 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 1230 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1); 1231 1232 return LD; 1233 } 1234 1235 SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { 1236 1237 SDValue Chain = N->getOperand(0); 1238 SDValue Op1; 1239 MemSDNode *Mem; 1240 bool IsLDG = true; 1241 1242 // If this is an LDG intrinsic, the address is the third operand. Its its an 1243 // LDG/LDU SD node (from custom vector handling), then its the second operand 1244 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { 1245 Op1 = N->getOperand(2); 1246 Mem = cast<MemIntrinsicSDNode>(N); 1247 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 1248 switch (IID) { 1249 default: 1250 return NULL; 1251 case Intrinsic::nvvm_ldg_global_f: 1252 case Intrinsic::nvvm_ldg_global_i: 1253 case Intrinsic::nvvm_ldg_global_p: 1254 IsLDG = true; 1255 break; 1256 case Intrinsic::nvvm_ldu_global_f: 1257 case Intrinsic::nvvm_ldu_global_i: 1258 case Intrinsic::nvvm_ldu_global_p: 1259 IsLDG = false; 1260 break; 1261 } 1262 } else { 1263 Op1 = N->getOperand(1); 1264 Mem = cast<MemSDNode>(N); 1265 } 1266 1267 unsigned Opcode; 1268 SDLoc DL(N); 1269 SDNode *LD; 1270 SDValue Base, Offset, Addr; 1271 1272 EVT EltVT = Mem->getMemoryVT(); 1273 if (EltVT.isVector()) { 1274 EltVT = EltVT.getVectorElementType(); 1275 } 1276 1277 if (SelectDirectAddr(Op1, Addr)) { 1278 switch (N->getOpcode()) { 1279 default: 1280 return nullptr; 1281 case ISD::INTRINSIC_W_CHAIN: 1282 if (IsLDG) { 1283 switch (EltVT.getSimpleVT().SimpleTy) { 1284 default: 1285 return nullptr; 1286 case MVT::i8: 1287 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar; 1288 break; 1289 case MVT::i16: 1290 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar; 1291 break; 1292 case MVT::i32: 1293 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar; 1294 break; 1295 case MVT::i64: 1296 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar; 1297 break; 1298 case MVT::f32: 1299 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar; 1300 break; 1301 case MVT::f64: 1302 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar; 1303 break; 1304 } 1305 } else { 1306 switch (EltVT.getSimpleVT().SimpleTy) { 1307 default: 1308 return nullptr; 1309 case MVT::i8: 1310 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar; 1311 break; 1312 case MVT::i16: 1313 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar; 1314 break; 1315 case MVT::i32: 1316 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar; 1317 break; 1318 case MVT::i64: 1319 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar; 1320 break; 1321 case MVT::f32: 1322 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar; 1323 break; 1324 case MVT::f64: 1325 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar; 1326 break; 1327 } 1328 } 1329 break; 1330 case NVPTXISD::LDGV2: 1331 switch (EltVT.getSimpleVT().SimpleTy) { 1332 default: 1333 return nullptr; 1334 case MVT::i8: 1335 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar; 1336 break; 1337 case MVT::i16: 1338 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar; 1339 break; 1340 case MVT::i32: 1341 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar; 1342 break; 1343 case MVT::i64: 1344 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar; 1345 break; 1346 case MVT::f32: 1347 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar; 1348 break; 1349 case MVT::f64: 1350 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar; 1351 break; 1352 } 1353 break; 1354 case NVPTXISD::LDUV2: 1355 switch (EltVT.getSimpleVT().SimpleTy) { 1356 default: 1357 return nullptr; 1358 case MVT::i8: 1359 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar; 1360 break; 1361 case MVT::i16: 1362 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar; 1363 break; 1364 case MVT::i32: 1365 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar; 1366 break; 1367 case MVT::i64: 1368 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar; 1369 break; 1370 case MVT::f32: 1371 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar; 1372 break; 1373 case MVT::f64: 1374 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar; 1375 break; 1376 } 1377 break; 1378 case NVPTXISD::LDGV4: 1379 switch (EltVT.getSimpleVT().SimpleTy) { 1380 default: 1381 return nullptr; 1382 case MVT::i8: 1383 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar; 1384 break; 1385 case MVT::i16: 1386 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar; 1387 break; 1388 case MVT::i32: 1389 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar; 1390 break; 1391 case MVT::f32: 1392 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar; 1393 break; 1394 } 1395 break; 1396 case NVPTXISD::LDUV4: 1397 switch (EltVT.getSimpleVT().SimpleTy) { 1398 default: 1399 return nullptr; 1400 case MVT::i8: 1401 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar; 1402 break; 1403 case MVT::i16: 1404 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar; 1405 break; 1406 case MVT::i32: 1407 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar; 1408 break; 1409 case MVT::f32: 1410 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar; 1411 break; 1412 } 1413 break; 1414 } 1415 1416 SDValue Ops[] = { Addr, Chain }; 1417 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 1418 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) 1419 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { 1420 if (TM.is64Bit()) { 1421 switch (N->getOpcode()) { 1422 default: 1423 return nullptr; 1424 case ISD::INTRINSIC_W_CHAIN: 1425 if (IsLDG) { 1426 switch (EltVT.getSimpleVT().SimpleTy) { 1427 default: 1428 return nullptr; 1429 case MVT::i8: 1430 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64; 1431 break; 1432 case MVT::i16: 1433 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64; 1434 break; 1435 case MVT::i32: 1436 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64; 1437 break; 1438 case MVT::i64: 1439 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64; 1440 break; 1441 case MVT::f32: 1442 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64; 1443 break; 1444 case MVT::f64: 1445 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64; 1446 break; 1447 } 1448 } else { 1449 switch (EltVT.getSimpleVT().SimpleTy) { 1450 default: 1451 return nullptr; 1452 case MVT::i8: 1453 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64; 1454 break; 1455 case MVT::i16: 1456 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64; 1457 break; 1458 case MVT::i32: 1459 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64; 1460 break; 1461 case MVT::i64: 1462 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64; 1463 break; 1464 case MVT::f32: 1465 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64; 1466 break; 1467 case MVT::f64: 1468 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64; 1469 break; 1470 } 1471 } 1472 break; 1473 case NVPTXISD::LDGV2: 1474 switch (EltVT.getSimpleVT().SimpleTy) { 1475 default: 1476 return nullptr; 1477 case MVT::i8: 1478 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64; 1479 break; 1480 case MVT::i16: 1481 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64; 1482 break; 1483 case MVT::i32: 1484 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64; 1485 break; 1486 case MVT::i64: 1487 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64; 1488 break; 1489 case MVT::f32: 1490 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64; 1491 break; 1492 case MVT::f64: 1493 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64; 1494 break; 1495 } 1496 break; 1497 case NVPTXISD::LDUV2: 1498 switch (EltVT.getSimpleVT().SimpleTy) { 1499 default: 1500 return nullptr; 1501 case MVT::i8: 1502 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64; 1503 break; 1504 case MVT::i16: 1505 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64; 1506 break; 1507 case MVT::i32: 1508 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64; 1509 break; 1510 case MVT::i64: 1511 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64; 1512 break; 1513 case MVT::f32: 1514 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64; 1515 break; 1516 case MVT::f64: 1517 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64; 1518 break; 1519 } 1520 break; 1521 case NVPTXISD::LDGV4: 1522 switch (EltVT.getSimpleVT().SimpleTy) { 1523 default: 1524 return nullptr; 1525 case MVT::i8: 1526 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64; 1527 break; 1528 case MVT::i16: 1529 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64; 1530 break; 1531 case MVT::i32: 1532 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64; 1533 break; 1534 case MVT::f32: 1535 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64; 1536 break; 1537 } 1538 break; 1539 case NVPTXISD::LDUV4: 1540 switch (EltVT.getSimpleVT().SimpleTy) { 1541 default: 1542 return nullptr; 1543 case MVT::i8: 1544 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64; 1545 break; 1546 case MVT::i16: 1547 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64; 1548 break; 1549 case MVT::i32: 1550 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64; 1551 break; 1552 case MVT::f32: 1553 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64; 1554 break; 1555 } 1556 break; 1557 } 1558 } else { 1559 switch (N->getOpcode()) { 1560 default: 1561 return nullptr; 1562 case ISD::INTRINSIC_W_CHAIN: 1563 if (IsLDG) { 1564 switch (EltVT.getSimpleVT().SimpleTy) { 1565 default: 1566 return nullptr; 1567 case MVT::i8: 1568 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari; 1569 break; 1570 case MVT::i16: 1571 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari; 1572 break; 1573 case MVT::i32: 1574 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari; 1575 break; 1576 case MVT::i64: 1577 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari; 1578 break; 1579 case MVT::f32: 1580 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari; 1581 break; 1582 case MVT::f64: 1583 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari; 1584 break; 1585 } 1586 } else { 1587 switch (EltVT.getSimpleVT().SimpleTy) { 1588 default: 1589 return nullptr; 1590 case MVT::i8: 1591 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari; 1592 break; 1593 case MVT::i16: 1594 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari; 1595 break; 1596 case MVT::i32: 1597 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari; 1598 break; 1599 case MVT::i64: 1600 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari; 1601 break; 1602 case MVT::f32: 1603 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari; 1604 break; 1605 case MVT::f64: 1606 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari; 1607 break; 1608 } 1609 } 1610 break; 1611 case NVPTXISD::LDGV2: 1612 switch (EltVT.getSimpleVT().SimpleTy) { 1613 default: 1614 return nullptr; 1615 case MVT::i8: 1616 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32; 1617 break; 1618 case MVT::i16: 1619 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32; 1620 break; 1621 case MVT::i32: 1622 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32; 1623 break; 1624 case MVT::i64: 1625 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32; 1626 break; 1627 case MVT::f32: 1628 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32; 1629 break; 1630 case MVT::f64: 1631 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32; 1632 break; 1633 } 1634 break; 1635 case NVPTXISD::LDUV2: 1636 switch (EltVT.getSimpleVT().SimpleTy) { 1637 default: 1638 return nullptr; 1639 case MVT::i8: 1640 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32; 1641 break; 1642 case MVT::i16: 1643 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32; 1644 break; 1645 case MVT::i32: 1646 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32; 1647 break; 1648 case MVT::i64: 1649 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32; 1650 break; 1651 case MVT::f32: 1652 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32; 1653 break; 1654 case MVT::f64: 1655 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32; 1656 break; 1657 } 1658 break; 1659 case NVPTXISD::LDGV4: 1660 switch (EltVT.getSimpleVT().SimpleTy) { 1661 default: 1662 return nullptr; 1663 case MVT::i8: 1664 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32; 1665 break; 1666 case MVT::i16: 1667 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32; 1668 break; 1669 case MVT::i32: 1670 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32; 1671 break; 1672 case MVT::f32: 1673 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32; 1674 break; 1675 } 1676 break; 1677 case NVPTXISD::LDUV4: 1678 switch (EltVT.getSimpleVT().SimpleTy) { 1679 default: 1680 return nullptr; 1681 case MVT::i8: 1682 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32; 1683 break; 1684 case MVT::i16: 1685 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32; 1686 break; 1687 case MVT::i32: 1688 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32; 1689 break; 1690 case MVT::f32: 1691 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32; 1692 break; 1693 } 1694 break; 1695 } 1696 } 1697 1698 SDValue Ops[] = { Base, Offset, Chain }; 1699 1700 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 1701 } else { 1702 if (TM.is64Bit()) { 1703 switch (N->getOpcode()) { 1704 default: 1705 return nullptr; 1706 case ISD::INTRINSIC_W_CHAIN: 1707 if (IsLDG) { 1708 switch (EltVT.getSimpleVT().SimpleTy) { 1709 default: 1710 return nullptr; 1711 case MVT::i8: 1712 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64; 1713 break; 1714 case MVT::i16: 1715 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64; 1716 break; 1717 case MVT::i32: 1718 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64; 1719 break; 1720 case MVT::i64: 1721 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64; 1722 break; 1723 case MVT::f32: 1724 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64; 1725 break; 1726 case MVT::f64: 1727 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64; 1728 break; 1729 } 1730 } else { 1731 switch (EltVT.getSimpleVT().SimpleTy) { 1732 default: 1733 return nullptr; 1734 case MVT::i8: 1735 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64; 1736 break; 1737 case MVT::i16: 1738 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64; 1739 break; 1740 case MVT::i32: 1741 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64; 1742 break; 1743 case MVT::i64: 1744 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64; 1745 break; 1746 case MVT::f32: 1747 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64; 1748 break; 1749 case MVT::f64: 1750 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64; 1751 break; 1752 } 1753 } 1754 break; 1755 case NVPTXISD::LDGV2: 1756 switch (EltVT.getSimpleVT().SimpleTy) { 1757 default: 1758 return nullptr; 1759 case MVT::i8: 1760 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64; 1761 break; 1762 case MVT::i16: 1763 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64; 1764 break; 1765 case MVT::i32: 1766 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64; 1767 break; 1768 case MVT::i64: 1769 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64; 1770 break; 1771 case MVT::f32: 1772 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64; 1773 break; 1774 case MVT::f64: 1775 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64; 1776 break; 1777 } 1778 break; 1779 case NVPTXISD::LDUV2: 1780 switch (EltVT.getSimpleVT().SimpleTy) { 1781 default: 1782 return nullptr; 1783 case MVT::i8: 1784 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64; 1785 break; 1786 case MVT::i16: 1787 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64; 1788 break; 1789 case MVT::i32: 1790 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64; 1791 break; 1792 case MVT::i64: 1793 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64; 1794 break; 1795 case MVT::f32: 1796 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64; 1797 break; 1798 case MVT::f64: 1799 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64; 1800 break; 1801 } 1802 break; 1803 case NVPTXISD::LDGV4: 1804 switch (EltVT.getSimpleVT().SimpleTy) { 1805 default: 1806 return nullptr; 1807 case MVT::i8: 1808 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64; 1809 break; 1810 case MVT::i16: 1811 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64; 1812 break; 1813 case MVT::i32: 1814 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64; 1815 break; 1816 case MVT::f32: 1817 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64; 1818 break; 1819 } 1820 break; 1821 case NVPTXISD::LDUV4: 1822 switch (EltVT.getSimpleVT().SimpleTy) { 1823 default: 1824 return nullptr; 1825 case MVT::i8: 1826 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64; 1827 break; 1828 case MVT::i16: 1829 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64; 1830 break; 1831 case MVT::i32: 1832 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64; 1833 break; 1834 case MVT::f32: 1835 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64; 1836 break; 1837 } 1838 break; 1839 } 1840 } else { 1841 switch (N->getOpcode()) { 1842 default: 1843 return nullptr; 1844 case ISD::INTRINSIC_W_CHAIN: 1845 if (IsLDG) { 1846 switch (EltVT.getSimpleVT().SimpleTy) { 1847 default: 1848 return nullptr; 1849 case MVT::i8: 1850 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg; 1851 break; 1852 case MVT::i16: 1853 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg; 1854 break; 1855 case MVT::i32: 1856 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg; 1857 break; 1858 case MVT::i64: 1859 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg; 1860 break; 1861 case MVT::f32: 1862 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg; 1863 break; 1864 case MVT::f64: 1865 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg; 1866 break; 1867 } 1868 } else { 1869 switch (EltVT.getSimpleVT().SimpleTy) { 1870 default: 1871 return nullptr; 1872 case MVT::i8: 1873 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg; 1874 break; 1875 case MVT::i16: 1876 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg; 1877 break; 1878 case MVT::i32: 1879 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg; 1880 break; 1881 case MVT::i64: 1882 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg; 1883 break; 1884 case MVT::f32: 1885 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg; 1886 break; 1887 case MVT::f64: 1888 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg; 1889 break; 1890 } 1891 } 1892 break; 1893 case NVPTXISD::LDGV2: 1894 switch (EltVT.getSimpleVT().SimpleTy) { 1895 default: 1896 return nullptr; 1897 case MVT::i8: 1898 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32; 1899 break; 1900 case MVT::i16: 1901 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32; 1902 break; 1903 case MVT::i32: 1904 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32; 1905 break; 1906 case MVT::i64: 1907 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32; 1908 break; 1909 case MVT::f32: 1910 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32; 1911 break; 1912 case MVT::f64: 1913 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32; 1914 break; 1915 } 1916 break; 1917 case NVPTXISD::LDUV2: 1918 switch (EltVT.getSimpleVT().SimpleTy) { 1919 default: 1920 return nullptr; 1921 case MVT::i8: 1922 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32; 1923 break; 1924 case MVT::i16: 1925 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32; 1926 break; 1927 case MVT::i32: 1928 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32; 1929 break; 1930 case MVT::i64: 1931 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32; 1932 break; 1933 case MVT::f32: 1934 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32; 1935 break; 1936 case MVT::f64: 1937 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32; 1938 break; 1939 } 1940 break; 1941 case NVPTXISD::LDGV4: 1942 switch (EltVT.getSimpleVT().SimpleTy) { 1943 default: 1944 return nullptr; 1945 case MVT::i8: 1946 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32; 1947 break; 1948 case MVT::i16: 1949 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32; 1950 break; 1951 case MVT::i32: 1952 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32; 1953 break; 1954 case MVT::f32: 1955 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32; 1956 break; 1957 } 1958 break; 1959 case NVPTXISD::LDUV4: 1960 switch (EltVT.getSimpleVT().SimpleTy) { 1961 default: 1962 return nullptr; 1963 case MVT::i8: 1964 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32; 1965 break; 1966 case MVT::i16: 1967 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32; 1968 break; 1969 case MVT::i32: 1970 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32; 1971 break; 1972 case MVT::f32: 1973 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32; 1974 break; 1975 } 1976 break; 1977 } 1978 } 1979 1980 SDValue Ops[] = { Op1, Chain }; 1981 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 1982 } 1983 1984 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 1985 MemRefs0[0] = Mem->getMemOperand(); 1986 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1); 1987 1988 return LD; 1989 } 1990 1991 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { 1992 SDLoc dl(N); 1993 StoreSDNode *ST = cast<StoreSDNode>(N); 1994 EVT StoreVT = ST->getMemoryVT(); 1995 SDNode *NVPTXST = nullptr; 1996 1997 // do not support pre/post inc/dec 1998 if (ST->isIndexed()) 1999 return nullptr; 2000 2001 if (!StoreVT.isSimple()) 2002 return nullptr; 2003 2004 // Address Space Setting 2005 unsigned int codeAddrSpace = getCodeAddrSpace(ST); 2006 2007 // Volatile Setting 2008 // - .volatile is only availalble for .global and .shared 2009 bool isVolatile = ST->isVolatile(); 2010 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 2011 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 2012 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 2013 isVolatile = false; 2014 2015 // Vector Setting 2016 MVT SimpleVT = StoreVT.getSimpleVT(); 2017 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; 2018 if (SimpleVT.isVector()) { 2019 unsigned num = SimpleVT.getVectorNumElements(); 2020 if (num == 2) 2021 vecType = NVPTX::PTXLdStInstCode::V2; 2022 else if (num == 4) 2023 vecType = NVPTX::PTXLdStInstCode::V4; 2024 else 2025 return nullptr; 2026 } 2027 2028 // Type Setting: toType + toTypeWidth 2029 // - for integer type, always use 'u' 2030 // 2031 MVT ScalarVT = SimpleVT.getScalarType(); 2032 unsigned toTypeWidth = ScalarVT.getSizeInBits(); 2033 unsigned int toType; 2034 if (ScalarVT.isFloatingPoint()) 2035 toType = NVPTX::PTXLdStInstCode::Float; 2036 else 2037 toType = NVPTX::PTXLdStInstCode::Unsigned; 2038 2039 // Create the machine instruction DAG 2040 SDValue Chain = N->getOperand(0); 2041 SDValue N1 = N->getOperand(1); 2042 SDValue N2 = N->getOperand(2); 2043 SDValue Addr; 2044 SDValue Offset, Base; 2045 unsigned Opcode; 2046 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy; 2047 2048 if (SelectDirectAddr(N2, Addr)) { 2049 switch (SourceVT) { 2050 case MVT::i8: 2051 Opcode = NVPTX::ST_i8_avar; 2052 break; 2053 case MVT::i16: 2054 Opcode = NVPTX::ST_i16_avar; 2055 break; 2056 case MVT::i32: 2057 Opcode = NVPTX::ST_i32_avar; 2058 break; 2059 case MVT::i64: 2060 Opcode = NVPTX::ST_i64_avar; 2061 break; 2062 case MVT::f32: 2063 Opcode = NVPTX::ST_f32_avar; 2064 break; 2065 case MVT::f64: 2066 Opcode = NVPTX::ST_f64_avar; 2067 break; 2068 default: 2069 return nullptr; 2070 } 2071 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), 2072 getI32Imm(vecType), getI32Imm(toType), 2073 getI32Imm(toTypeWidth), Addr, Chain }; 2074 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); 2075 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) 2076 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { 2077 switch (SourceVT) { 2078 case MVT::i8: 2079 Opcode = NVPTX::ST_i8_asi; 2080 break; 2081 case MVT::i16: 2082 Opcode = NVPTX::ST_i16_asi; 2083 break; 2084 case MVT::i32: 2085 Opcode = NVPTX::ST_i32_asi; 2086 break; 2087 case MVT::i64: 2088 Opcode = NVPTX::ST_i64_asi; 2089 break; 2090 case MVT::f32: 2091 Opcode = NVPTX::ST_f32_asi; 2092 break; 2093 case MVT::f64: 2094 Opcode = NVPTX::ST_f64_asi; 2095 break; 2096 default: 2097 return nullptr; 2098 } 2099 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), 2100 getI32Imm(vecType), getI32Imm(toType), 2101 getI32Imm(toTypeWidth), Base, Offset, Chain }; 2102 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); 2103 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset) 2104 : SelectADDRri(N2.getNode(), N2, Base, Offset)) { 2105 if (TM.is64Bit()) { 2106 switch (SourceVT) { 2107 case MVT::i8: 2108 Opcode = NVPTX::ST_i8_ari_64; 2109 break; 2110 case MVT::i16: 2111 Opcode = NVPTX::ST_i16_ari_64; 2112 break; 2113 case MVT::i32: 2114 Opcode = NVPTX::ST_i32_ari_64; 2115 break; 2116 case MVT::i64: 2117 Opcode = NVPTX::ST_i64_ari_64; 2118 break; 2119 case MVT::f32: 2120 Opcode = NVPTX::ST_f32_ari_64; 2121 break; 2122 case MVT::f64: 2123 Opcode = NVPTX::ST_f64_ari_64; 2124 break; 2125 default: 2126 return nullptr; 2127 } 2128 } else { 2129 switch (SourceVT) { 2130 case MVT::i8: 2131 Opcode = NVPTX::ST_i8_ari; 2132 break; 2133 case MVT::i16: 2134 Opcode = NVPTX::ST_i16_ari; 2135 break; 2136 case MVT::i32: 2137 Opcode = NVPTX::ST_i32_ari; 2138 break; 2139 case MVT::i64: 2140 Opcode = NVPTX::ST_i64_ari; 2141 break; 2142 case MVT::f32: 2143 Opcode = NVPTX::ST_f32_ari; 2144 break; 2145 case MVT::f64: 2146 Opcode = NVPTX::ST_f64_ari; 2147 break; 2148 default: 2149 return nullptr; 2150 } 2151 } 2152 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), 2153 getI32Imm(vecType), getI32Imm(toType), 2154 getI32Imm(toTypeWidth), Base, Offset, Chain }; 2155 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); 2156 } else { 2157 if (TM.is64Bit()) { 2158 switch (SourceVT) { 2159 case MVT::i8: 2160 Opcode = NVPTX::ST_i8_areg_64; 2161 break; 2162 case MVT::i16: 2163 Opcode = NVPTX::ST_i16_areg_64; 2164 break; 2165 case MVT::i32: 2166 Opcode = NVPTX::ST_i32_areg_64; 2167 break; 2168 case MVT::i64: 2169 Opcode = NVPTX::ST_i64_areg_64; 2170 break; 2171 case MVT::f32: 2172 Opcode = NVPTX::ST_f32_areg_64; 2173 break; 2174 case MVT::f64: 2175 Opcode = NVPTX::ST_f64_areg_64; 2176 break; 2177 default: 2178 return nullptr; 2179 } 2180 } else { 2181 switch (SourceVT) { 2182 case MVT::i8: 2183 Opcode = NVPTX::ST_i8_areg; 2184 break; 2185 case MVT::i16: 2186 Opcode = NVPTX::ST_i16_areg; 2187 break; 2188 case MVT::i32: 2189 Opcode = NVPTX::ST_i32_areg; 2190 break; 2191 case MVT::i64: 2192 Opcode = NVPTX::ST_i64_areg; 2193 break; 2194 case MVT::f32: 2195 Opcode = NVPTX::ST_f32_areg; 2196 break; 2197 case MVT::f64: 2198 Opcode = NVPTX::ST_f64_areg; 2199 break; 2200 default: 2201 return nullptr; 2202 } 2203 } 2204 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), 2205 getI32Imm(vecType), getI32Imm(toType), 2206 getI32Imm(toTypeWidth), N2, Chain }; 2207 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); 2208 } 2209 2210 if (NVPTXST) { 2211 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 2212 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 2213 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1); 2214 } 2215 2216 return NVPTXST; 2217 } 2218 2219 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { 2220 SDValue Chain = N->getOperand(0); 2221 SDValue Op1 = N->getOperand(1); 2222 SDValue Addr, Offset, Base; 2223 unsigned Opcode; 2224 SDLoc DL(N); 2225 SDNode *ST; 2226 EVT EltVT = Op1.getValueType(); 2227 MemSDNode *MemSD = cast<MemSDNode>(N); 2228 EVT StoreVT = MemSD->getMemoryVT(); 2229 2230 // Address Space Setting 2231 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD); 2232 2233 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) { 2234 report_fatal_error("Cannot store to pointer that points to constant " 2235 "memory space"); 2236 } 2237 2238 // Volatile Setting 2239 // - .volatile is only availalble for .global and .shared 2240 bool IsVolatile = MemSD->isVolatile(); 2241 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 2242 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 2243 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 2244 IsVolatile = false; 2245 2246 // Type Setting: toType + toTypeWidth 2247 // - for integer type, always use 'u' 2248 assert(StoreVT.isSimple() && "Store value is not simple"); 2249 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType(); 2250 unsigned ToTypeWidth = ScalarVT.getSizeInBits(); 2251 unsigned ToType; 2252 if (ScalarVT.isFloatingPoint()) 2253 ToType = NVPTX::PTXLdStInstCode::Float; 2254 else 2255 ToType = NVPTX::PTXLdStInstCode::Unsigned; 2256 2257 SmallVector<SDValue, 12> StOps; 2258 SDValue N2; 2259 unsigned VecType; 2260 2261 switch (N->getOpcode()) { 2262 case NVPTXISD::StoreV2: 2263 VecType = NVPTX::PTXLdStInstCode::V2; 2264 StOps.push_back(N->getOperand(1)); 2265 StOps.push_back(N->getOperand(2)); 2266 N2 = N->getOperand(3); 2267 break; 2268 case NVPTXISD::StoreV4: 2269 VecType = NVPTX::PTXLdStInstCode::V4; 2270 StOps.push_back(N->getOperand(1)); 2271 StOps.push_back(N->getOperand(2)); 2272 StOps.push_back(N->getOperand(3)); 2273 StOps.push_back(N->getOperand(4)); 2274 N2 = N->getOperand(5); 2275 break; 2276 default: 2277 return nullptr; 2278 } 2279 2280 StOps.push_back(getI32Imm(IsVolatile)); 2281 StOps.push_back(getI32Imm(CodeAddrSpace)); 2282 StOps.push_back(getI32Imm(VecType)); 2283 StOps.push_back(getI32Imm(ToType)); 2284 StOps.push_back(getI32Imm(ToTypeWidth)); 2285 2286 if (SelectDirectAddr(N2, Addr)) { 2287 switch (N->getOpcode()) { 2288 default: 2289 return nullptr; 2290 case NVPTXISD::StoreV2: 2291 switch (EltVT.getSimpleVT().SimpleTy) { 2292 default: 2293 return nullptr; 2294 case MVT::i8: 2295 Opcode = NVPTX::STV_i8_v2_avar; 2296 break; 2297 case MVT::i16: 2298 Opcode = NVPTX::STV_i16_v2_avar; 2299 break; 2300 case MVT::i32: 2301 Opcode = NVPTX::STV_i32_v2_avar; 2302 break; 2303 case MVT::i64: 2304 Opcode = NVPTX::STV_i64_v2_avar; 2305 break; 2306 case MVT::f32: 2307 Opcode = NVPTX::STV_f32_v2_avar; 2308 break; 2309 case MVT::f64: 2310 Opcode = NVPTX::STV_f64_v2_avar; 2311 break; 2312 } 2313 break; 2314 case NVPTXISD::StoreV4: 2315 switch (EltVT.getSimpleVT().SimpleTy) { 2316 default: 2317 return nullptr; 2318 case MVT::i8: 2319 Opcode = NVPTX::STV_i8_v4_avar; 2320 break; 2321 case MVT::i16: 2322 Opcode = NVPTX::STV_i16_v4_avar; 2323 break; 2324 case MVT::i32: 2325 Opcode = NVPTX::STV_i32_v4_avar; 2326 break; 2327 case MVT::f32: 2328 Opcode = NVPTX::STV_f32_v4_avar; 2329 break; 2330 } 2331 break; 2332 } 2333 StOps.push_back(Addr); 2334 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) 2335 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { 2336 switch (N->getOpcode()) { 2337 default: 2338 return nullptr; 2339 case NVPTXISD::StoreV2: 2340 switch (EltVT.getSimpleVT().SimpleTy) { 2341 default: 2342 return nullptr; 2343 case MVT::i8: 2344 Opcode = NVPTX::STV_i8_v2_asi; 2345 break; 2346 case MVT::i16: 2347 Opcode = NVPTX::STV_i16_v2_asi; 2348 break; 2349 case MVT::i32: 2350 Opcode = NVPTX::STV_i32_v2_asi; 2351 break; 2352 case MVT::i64: 2353 Opcode = NVPTX::STV_i64_v2_asi; 2354 break; 2355 case MVT::f32: 2356 Opcode = NVPTX::STV_f32_v2_asi; 2357 break; 2358 case MVT::f64: 2359 Opcode = NVPTX::STV_f64_v2_asi; 2360 break; 2361 } 2362 break; 2363 case NVPTXISD::StoreV4: 2364 switch (EltVT.getSimpleVT().SimpleTy) { 2365 default: 2366 return nullptr; 2367 case MVT::i8: 2368 Opcode = NVPTX::STV_i8_v4_asi; 2369 break; 2370 case MVT::i16: 2371 Opcode = NVPTX::STV_i16_v4_asi; 2372 break; 2373 case MVT::i32: 2374 Opcode = NVPTX::STV_i32_v4_asi; 2375 break; 2376 case MVT::f32: 2377 Opcode = NVPTX::STV_f32_v4_asi; 2378 break; 2379 } 2380 break; 2381 } 2382 StOps.push_back(Base); 2383 StOps.push_back(Offset); 2384 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset) 2385 : SelectADDRri(N2.getNode(), N2, Base, Offset)) { 2386 if (TM.is64Bit()) { 2387 switch (N->getOpcode()) { 2388 default: 2389 return nullptr; 2390 case NVPTXISD::StoreV2: 2391 switch (EltVT.getSimpleVT().SimpleTy) { 2392 default: 2393 return nullptr; 2394 case MVT::i8: 2395 Opcode = NVPTX::STV_i8_v2_ari_64; 2396 break; 2397 case MVT::i16: 2398 Opcode = NVPTX::STV_i16_v2_ari_64; 2399 break; 2400 case MVT::i32: 2401 Opcode = NVPTX::STV_i32_v2_ari_64; 2402 break; 2403 case MVT::i64: 2404 Opcode = NVPTX::STV_i64_v2_ari_64; 2405 break; 2406 case MVT::f32: 2407 Opcode = NVPTX::STV_f32_v2_ari_64; 2408 break; 2409 case MVT::f64: 2410 Opcode = NVPTX::STV_f64_v2_ari_64; 2411 break; 2412 } 2413 break; 2414 case NVPTXISD::StoreV4: 2415 switch (EltVT.getSimpleVT().SimpleTy) { 2416 default: 2417 return nullptr; 2418 case MVT::i8: 2419 Opcode = NVPTX::STV_i8_v4_ari_64; 2420 break; 2421 case MVT::i16: 2422 Opcode = NVPTX::STV_i16_v4_ari_64; 2423 break; 2424 case MVT::i32: 2425 Opcode = NVPTX::STV_i32_v4_ari_64; 2426 break; 2427 case MVT::f32: 2428 Opcode = NVPTX::STV_f32_v4_ari_64; 2429 break; 2430 } 2431 break; 2432 } 2433 } else { 2434 switch (N->getOpcode()) { 2435 default: 2436 return nullptr; 2437 case NVPTXISD::StoreV2: 2438 switch (EltVT.getSimpleVT().SimpleTy) { 2439 default: 2440 return nullptr; 2441 case MVT::i8: 2442 Opcode = NVPTX::STV_i8_v2_ari; 2443 break; 2444 case MVT::i16: 2445 Opcode = NVPTX::STV_i16_v2_ari; 2446 break; 2447 case MVT::i32: 2448 Opcode = NVPTX::STV_i32_v2_ari; 2449 break; 2450 case MVT::i64: 2451 Opcode = NVPTX::STV_i64_v2_ari; 2452 break; 2453 case MVT::f32: 2454 Opcode = NVPTX::STV_f32_v2_ari; 2455 break; 2456 case MVT::f64: 2457 Opcode = NVPTX::STV_f64_v2_ari; 2458 break; 2459 } 2460 break; 2461 case NVPTXISD::StoreV4: 2462 switch (EltVT.getSimpleVT().SimpleTy) { 2463 default: 2464 return nullptr; 2465 case MVT::i8: 2466 Opcode = NVPTX::STV_i8_v4_ari; 2467 break; 2468 case MVT::i16: 2469 Opcode = NVPTX::STV_i16_v4_ari; 2470 break; 2471 case MVT::i32: 2472 Opcode = NVPTX::STV_i32_v4_ari; 2473 break; 2474 case MVT::f32: 2475 Opcode = NVPTX::STV_f32_v4_ari; 2476 break; 2477 } 2478 break; 2479 } 2480 } 2481 StOps.push_back(Base); 2482 StOps.push_back(Offset); 2483 } else { 2484 if (TM.is64Bit()) { 2485 switch (N->getOpcode()) { 2486 default: 2487 return nullptr; 2488 case NVPTXISD::StoreV2: 2489 switch (EltVT.getSimpleVT().SimpleTy) { 2490 default: 2491 return nullptr; 2492 case MVT::i8: 2493 Opcode = NVPTX::STV_i8_v2_areg_64; 2494 break; 2495 case MVT::i16: 2496 Opcode = NVPTX::STV_i16_v2_areg_64; 2497 break; 2498 case MVT::i32: 2499 Opcode = NVPTX::STV_i32_v2_areg_64; 2500 break; 2501 case MVT::i64: 2502 Opcode = NVPTX::STV_i64_v2_areg_64; 2503 break; 2504 case MVT::f32: 2505 Opcode = NVPTX::STV_f32_v2_areg_64; 2506 break; 2507 case MVT::f64: 2508 Opcode = NVPTX::STV_f64_v2_areg_64; 2509 break; 2510 } 2511 break; 2512 case NVPTXISD::StoreV4: 2513 switch (EltVT.getSimpleVT().SimpleTy) { 2514 default: 2515 return nullptr; 2516 case MVT::i8: 2517 Opcode = NVPTX::STV_i8_v4_areg_64; 2518 break; 2519 case MVT::i16: 2520 Opcode = NVPTX::STV_i16_v4_areg_64; 2521 break; 2522 case MVT::i32: 2523 Opcode = NVPTX::STV_i32_v4_areg_64; 2524 break; 2525 case MVT::f32: 2526 Opcode = NVPTX::STV_f32_v4_areg_64; 2527 break; 2528 } 2529 break; 2530 } 2531 } else { 2532 switch (N->getOpcode()) { 2533 default: 2534 return nullptr; 2535 case NVPTXISD::StoreV2: 2536 switch (EltVT.getSimpleVT().SimpleTy) { 2537 default: 2538 return nullptr; 2539 case MVT::i8: 2540 Opcode = NVPTX::STV_i8_v2_areg; 2541 break; 2542 case MVT::i16: 2543 Opcode = NVPTX::STV_i16_v2_areg; 2544 break; 2545 case MVT::i32: 2546 Opcode = NVPTX::STV_i32_v2_areg; 2547 break; 2548 case MVT::i64: 2549 Opcode = NVPTX::STV_i64_v2_areg; 2550 break; 2551 case MVT::f32: 2552 Opcode = NVPTX::STV_f32_v2_areg; 2553 break; 2554 case MVT::f64: 2555 Opcode = NVPTX::STV_f64_v2_areg; 2556 break; 2557 } 2558 break; 2559 case NVPTXISD::StoreV4: 2560 switch (EltVT.getSimpleVT().SimpleTy) { 2561 default: 2562 return nullptr; 2563 case MVT::i8: 2564 Opcode = NVPTX::STV_i8_v4_areg; 2565 break; 2566 case MVT::i16: 2567 Opcode = NVPTX::STV_i16_v4_areg; 2568 break; 2569 case MVT::i32: 2570 Opcode = NVPTX::STV_i32_v4_areg; 2571 break; 2572 case MVT::f32: 2573 Opcode = NVPTX::STV_f32_v4_areg; 2574 break; 2575 } 2576 break; 2577 } 2578 } 2579 StOps.push_back(N2); 2580 } 2581 2582 StOps.push_back(Chain); 2583 2584 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps); 2585 2586 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 2587 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 2588 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1); 2589 2590 return ST; 2591 } 2592 2593 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) { 2594 SDValue Chain = Node->getOperand(0); 2595 SDValue Offset = Node->getOperand(2); 2596 SDValue Flag = Node->getOperand(3); 2597 SDLoc DL(Node); 2598 MemSDNode *Mem = cast<MemSDNode>(Node); 2599 2600 unsigned VecSize; 2601 switch (Node->getOpcode()) { 2602 default: 2603 return nullptr; 2604 case NVPTXISD::LoadParam: 2605 VecSize = 1; 2606 break; 2607 case NVPTXISD::LoadParamV2: 2608 VecSize = 2; 2609 break; 2610 case NVPTXISD::LoadParamV4: 2611 VecSize = 4; 2612 break; 2613 } 2614 2615 EVT EltVT = Node->getValueType(0); 2616 EVT MemVT = Mem->getMemoryVT(); 2617 2618 unsigned Opc = 0; 2619 2620 switch (VecSize) { 2621 default: 2622 return nullptr; 2623 case 1: 2624 switch (MemVT.getSimpleVT().SimpleTy) { 2625 default: 2626 return nullptr; 2627 case MVT::i1: 2628 Opc = NVPTX::LoadParamMemI8; 2629 break; 2630 case MVT::i8: 2631 Opc = NVPTX::LoadParamMemI8; 2632 break; 2633 case MVT::i16: 2634 Opc = NVPTX::LoadParamMemI16; 2635 break; 2636 case MVT::i32: 2637 Opc = NVPTX::LoadParamMemI32; 2638 break; 2639 case MVT::i64: 2640 Opc = NVPTX::LoadParamMemI64; 2641 break; 2642 case MVT::f32: 2643 Opc = NVPTX::LoadParamMemF32; 2644 break; 2645 case MVT::f64: 2646 Opc = NVPTX::LoadParamMemF64; 2647 break; 2648 } 2649 break; 2650 case 2: 2651 switch (MemVT.getSimpleVT().SimpleTy) { 2652 default: 2653 return nullptr; 2654 case MVT::i1: 2655 Opc = NVPTX::LoadParamMemV2I8; 2656 break; 2657 case MVT::i8: 2658 Opc = NVPTX::LoadParamMemV2I8; 2659 break; 2660 case MVT::i16: 2661 Opc = NVPTX::LoadParamMemV2I16; 2662 break; 2663 case MVT::i32: 2664 Opc = NVPTX::LoadParamMemV2I32; 2665 break; 2666 case MVT::i64: 2667 Opc = NVPTX::LoadParamMemV2I64; 2668 break; 2669 case MVT::f32: 2670 Opc = NVPTX::LoadParamMemV2F32; 2671 break; 2672 case MVT::f64: 2673 Opc = NVPTX::LoadParamMemV2F64; 2674 break; 2675 } 2676 break; 2677 case 4: 2678 switch (MemVT.getSimpleVT().SimpleTy) { 2679 default: 2680 return nullptr; 2681 case MVT::i1: 2682 Opc = NVPTX::LoadParamMemV4I8; 2683 break; 2684 case MVT::i8: 2685 Opc = NVPTX::LoadParamMemV4I8; 2686 break; 2687 case MVT::i16: 2688 Opc = NVPTX::LoadParamMemV4I16; 2689 break; 2690 case MVT::i32: 2691 Opc = NVPTX::LoadParamMemV4I32; 2692 break; 2693 case MVT::f32: 2694 Opc = NVPTX::LoadParamMemV4F32; 2695 break; 2696 } 2697 break; 2698 } 2699 2700 SDVTList VTs; 2701 if (VecSize == 1) { 2702 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue); 2703 } else if (VecSize == 2) { 2704 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue); 2705 } else { 2706 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue }; 2707 VTs = CurDAG->getVTList(EVTs); 2708 } 2709 2710 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); 2711 2712 SmallVector<SDValue, 2> Ops; 2713 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32)); 2714 Ops.push_back(Chain); 2715 Ops.push_back(Flag); 2716 2717 SDNode *Ret = 2718 CurDAG->getMachineNode(Opc, DL, VTs, Ops); 2719 return Ret; 2720 } 2721 2722 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) { 2723 SDLoc DL(N); 2724 SDValue Chain = N->getOperand(0); 2725 SDValue Offset = N->getOperand(1); 2726 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); 2727 MemSDNode *Mem = cast<MemSDNode>(N); 2728 2729 // How many elements do we have? 2730 unsigned NumElts = 1; 2731 switch (N->getOpcode()) { 2732 default: 2733 return nullptr; 2734 case NVPTXISD::StoreRetval: 2735 NumElts = 1; 2736 break; 2737 case NVPTXISD::StoreRetvalV2: 2738 NumElts = 2; 2739 break; 2740 case NVPTXISD::StoreRetvalV4: 2741 NumElts = 4; 2742 break; 2743 } 2744 2745 // Build vector of operands 2746 SmallVector<SDValue, 6> Ops; 2747 for (unsigned i = 0; i < NumElts; ++i) 2748 Ops.push_back(N->getOperand(i + 2)); 2749 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32)); 2750 Ops.push_back(Chain); 2751 2752 // Determine target opcode 2753 // If we have an i1, use an 8-bit store. The lowering code in 2754 // NVPTXISelLowering will have already emitted an upcast. 2755 unsigned Opcode = 0; 2756 switch (NumElts) { 2757 default: 2758 return nullptr; 2759 case 1: 2760 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 2761 default: 2762 return nullptr; 2763 case MVT::i1: 2764 Opcode = NVPTX::StoreRetvalI8; 2765 break; 2766 case MVT::i8: 2767 Opcode = NVPTX::StoreRetvalI8; 2768 break; 2769 case MVT::i16: 2770 Opcode = NVPTX::StoreRetvalI16; 2771 break; 2772 case MVT::i32: 2773 Opcode = NVPTX::StoreRetvalI32; 2774 break; 2775 case MVT::i64: 2776 Opcode = NVPTX::StoreRetvalI64; 2777 break; 2778 case MVT::f32: 2779 Opcode = NVPTX::StoreRetvalF32; 2780 break; 2781 case MVT::f64: 2782 Opcode = NVPTX::StoreRetvalF64; 2783 break; 2784 } 2785 break; 2786 case 2: 2787 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 2788 default: 2789 return nullptr; 2790 case MVT::i1: 2791 Opcode = NVPTX::StoreRetvalV2I8; 2792 break; 2793 case MVT::i8: 2794 Opcode = NVPTX::StoreRetvalV2I8; 2795 break; 2796 case MVT::i16: 2797 Opcode = NVPTX::StoreRetvalV2I16; 2798 break; 2799 case MVT::i32: 2800 Opcode = NVPTX::StoreRetvalV2I32; 2801 break; 2802 case MVT::i64: 2803 Opcode = NVPTX::StoreRetvalV2I64; 2804 break; 2805 case MVT::f32: 2806 Opcode = NVPTX::StoreRetvalV2F32; 2807 break; 2808 case MVT::f64: 2809 Opcode = NVPTX::StoreRetvalV2F64; 2810 break; 2811 } 2812 break; 2813 case 4: 2814 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 2815 default: 2816 return nullptr; 2817 case MVT::i1: 2818 Opcode = NVPTX::StoreRetvalV4I8; 2819 break; 2820 case MVT::i8: 2821 Opcode = NVPTX::StoreRetvalV4I8; 2822 break; 2823 case MVT::i16: 2824 Opcode = NVPTX::StoreRetvalV4I16; 2825 break; 2826 case MVT::i32: 2827 Opcode = NVPTX::StoreRetvalV4I32; 2828 break; 2829 case MVT::f32: 2830 Opcode = NVPTX::StoreRetvalV4F32; 2831 break; 2832 } 2833 break; 2834 } 2835 2836 SDNode *Ret = 2837 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops); 2838 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 2839 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 2840 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1); 2841 2842 return Ret; 2843 } 2844 2845 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) { 2846 SDLoc DL(N); 2847 SDValue Chain = N->getOperand(0); 2848 SDValue Param = N->getOperand(1); 2849 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue(); 2850 SDValue Offset = N->getOperand(2); 2851 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); 2852 MemSDNode *Mem = cast<MemSDNode>(N); 2853 SDValue Flag = N->getOperand(N->getNumOperands() - 1); 2854 2855 // How many elements do we have? 2856 unsigned NumElts = 1; 2857 switch (N->getOpcode()) { 2858 default: 2859 return nullptr; 2860 case NVPTXISD::StoreParamU32: 2861 case NVPTXISD::StoreParamS32: 2862 case NVPTXISD::StoreParam: 2863 NumElts = 1; 2864 break; 2865 case NVPTXISD::StoreParamV2: 2866 NumElts = 2; 2867 break; 2868 case NVPTXISD::StoreParamV4: 2869 NumElts = 4; 2870 break; 2871 } 2872 2873 // Build vector of operands 2874 SmallVector<SDValue, 8> Ops; 2875 for (unsigned i = 0; i < NumElts; ++i) 2876 Ops.push_back(N->getOperand(i + 3)); 2877 Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32)); 2878 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32)); 2879 Ops.push_back(Chain); 2880 Ops.push_back(Flag); 2881 2882 // Determine target opcode 2883 // If we have an i1, use an 8-bit store. The lowering code in 2884 // NVPTXISelLowering will have already emitted an upcast. 2885 unsigned Opcode = 0; 2886 switch (N->getOpcode()) { 2887 default: 2888 switch (NumElts) { 2889 default: 2890 return nullptr; 2891 case 1: 2892 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 2893 default: 2894 return nullptr; 2895 case MVT::i1: 2896 Opcode = NVPTX::StoreParamI8; 2897 break; 2898 case MVT::i8: 2899 Opcode = NVPTX::StoreParamI8; 2900 break; 2901 case MVT::i16: 2902 Opcode = NVPTX::StoreParamI16; 2903 break; 2904 case MVT::i32: 2905 Opcode = NVPTX::StoreParamI32; 2906 break; 2907 case MVT::i64: 2908 Opcode = NVPTX::StoreParamI64; 2909 break; 2910 case MVT::f32: 2911 Opcode = NVPTX::StoreParamF32; 2912 break; 2913 case MVT::f64: 2914 Opcode = NVPTX::StoreParamF64; 2915 break; 2916 } 2917 break; 2918 case 2: 2919 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 2920 default: 2921 return nullptr; 2922 case MVT::i1: 2923 Opcode = NVPTX::StoreParamV2I8; 2924 break; 2925 case MVT::i8: 2926 Opcode = NVPTX::StoreParamV2I8; 2927 break; 2928 case MVT::i16: 2929 Opcode = NVPTX::StoreParamV2I16; 2930 break; 2931 case MVT::i32: 2932 Opcode = NVPTX::StoreParamV2I32; 2933 break; 2934 case MVT::i64: 2935 Opcode = NVPTX::StoreParamV2I64; 2936 break; 2937 case MVT::f32: 2938 Opcode = NVPTX::StoreParamV2F32; 2939 break; 2940 case MVT::f64: 2941 Opcode = NVPTX::StoreParamV2F64; 2942 break; 2943 } 2944 break; 2945 case 4: 2946 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 2947 default: 2948 return nullptr; 2949 case MVT::i1: 2950 Opcode = NVPTX::StoreParamV4I8; 2951 break; 2952 case MVT::i8: 2953 Opcode = NVPTX::StoreParamV4I8; 2954 break; 2955 case MVT::i16: 2956 Opcode = NVPTX::StoreParamV4I16; 2957 break; 2958 case MVT::i32: 2959 Opcode = NVPTX::StoreParamV4I32; 2960 break; 2961 case MVT::f32: 2962 Opcode = NVPTX::StoreParamV4F32; 2963 break; 2964 } 2965 break; 2966 } 2967 break; 2968 // Special case: if we have a sign-extend/zero-extend node, insert the 2969 // conversion instruction first, and use that as the value operand to 2970 // the selected StoreParam node. 2971 case NVPTXISD::StoreParamU32: { 2972 Opcode = NVPTX::StoreParamI32; 2973 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, 2974 MVT::i32); 2975 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL, 2976 MVT::i32, Ops[0], CvtNone); 2977 Ops[0] = SDValue(Cvt, 0); 2978 break; 2979 } 2980 case NVPTXISD::StoreParamS32: { 2981 Opcode = NVPTX::StoreParamI32; 2982 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, 2983 MVT::i32); 2984 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL, 2985 MVT::i32, Ops[0], CvtNone); 2986 Ops[0] = SDValue(Cvt, 0); 2987 break; 2988 } 2989 } 2990 2991 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue); 2992 SDNode *Ret = 2993 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops); 2994 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 2995 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 2996 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1); 2997 2998 return Ret; 2999 } 3000 3001 SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) { 3002 SDValue Chain = N->getOperand(0); 3003 SDNode *Ret = nullptr; 3004 unsigned Opc = 0; 3005 SmallVector<SDValue, 8> Ops; 3006 3007 switch (N->getOpcode()) { 3008 default: return nullptr; 3009 case NVPTXISD::Tex1DFloatS32: 3010 Opc = NVPTX::TEX_1D_F32_S32; 3011 break; 3012 case NVPTXISD::Tex1DFloatFloat: 3013 Opc = NVPTX::TEX_1D_F32_F32; 3014 break; 3015 case NVPTXISD::Tex1DFloatFloatLevel: 3016 Opc = NVPTX::TEX_1D_F32_F32_LEVEL; 3017 break; 3018 case NVPTXISD::Tex1DFloatFloatGrad: 3019 Opc = NVPTX::TEX_1D_F32_F32_GRAD; 3020 break; 3021 case NVPTXISD::Tex1DS32S32: 3022 Opc = NVPTX::TEX_1D_S32_S32; 3023 break; 3024 case NVPTXISD::Tex1DS32Float: 3025 Opc = NVPTX::TEX_1D_S32_F32; 3026 break; 3027 case NVPTXISD::Tex1DS32FloatLevel: 3028 Opc = NVPTX::TEX_1D_S32_F32_LEVEL; 3029 break; 3030 case NVPTXISD::Tex1DS32FloatGrad: 3031 Opc = NVPTX::TEX_1D_S32_F32_GRAD; 3032 break; 3033 case NVPTXISD::Tex1DU32S32: 3034 Opc = NVPTX::TEX_1D_U32_S32; 3035 break; 3036 case NVPTXISD::Tex1DU32Float: 3037 Opc = NVPTX::TEX_1D_U32_F32; 3038 break; 3039 case NVPTXISD::Tex1DU32FloatLevel: 3040 Opc = NVPTX::TEX_1D_U32_F32_LEVEL; 3041 break; 3042 case NVPTXISD::Tex1DU32FloatGrad: 3043 Opc = NVPTX::TEX_1D_U32_F32_GRAD; 3044 break; 3045 case NVPTXISD::Tex1DArrayFloatS32: 3046 Opc = NVPTX::TEX_1D_ARRAY_F32_S32; 3047 break; 3048 case NVPTXISD::Tex1DArrayFloatFloat: 3049 Opc = NVPTX::TEX_1D_ARRAY_F32_F32; 3050 break; 3051 case NVPTXISD::Tex1DArrayFloatFloatLevel: 3052 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL; 3053 break; 3054 case NVPTXISD::Tex1DArrayFloatFloatGrad: 3055 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD; 3056 break; 3057 case NVPTXISD::Tex1DArrayS32S32: 3058 Opc = NVPTX::TEX_1D_ARRAY_S32_S32; 3059 break; 3060 case NVPTXISD::Tex1DArrayS32Float: 3061 Opc = NVPTX::TEX_1D_ARRAY_S32_F32; 3062 break; 3063 case NVPTXISD::Tex1DArrayS32FloatLevel: 3064 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL; 3065 break; 3066 case NVPTXISD::Tex1DArrayS32FloatGrad: 3067 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD; 3068 break; 3069 case NVPTXISD::Tex1DArrayU32S32: 3070 Opc = NVPTX::TEX_1D_ARRAY_U32_S32; 3071 break; 3072 case NVPTXISD::Tex1DArrayU32Float: 3073 Opc = NVPTX::TEX_1D_ARRAY_U32_F32; 3074 break; 3075 case NVPTXISD::Tex1DArrayU32FloatLevel: 3076 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL; 3077 break; 3078 case NVPTXISD::Tex1DArrayU32FloatGrad: 3079 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD; 3080 break; 3081 case NVPTXISD::Tex2DFloatS32: 3082 Opc = NVPTX::TEX_2D_F32_S32; 3083 break; 3084 case NVPTXISD::Tex2DFloatFloat: 3085 Opc = NVPTX::TEX_2D_F32_F32; 3086 break; 3087 case NVPTXISD::Tex2DFloatFloatLevel: 3088 Opc = NVPTX::TEX_2D_F32_F32_LEVEL; 3089 break; 3090 case NVPTXISD::Tex2DFloatFloatGrad: 3091 Opc = NVPTX::TEX_2D_F32_F32_GRAD; 3092 break; 3093 case NVPTXISD::Tex2DS32S32: 3094 Opc = NVPTX::TEX_2D_S32_S32; 3095 break; 3096 case NVPTXISD::Tex2DS32Float: 3097 Opc = NVPTX::TEX_2D_S32_F32; 3098 break; 3099 case NVPTXISD::Tex2DS32FloatLevel: 3100 Opc = NVPTX::TEX_2D_S32_F32_LEVEL; 3101 break; 3102 case NVPTXISD::Tex2DS32FloatGrad: 3103 Opc = NVPTX::TEX_2D_S32_F32_GRAD; 3104 break; 3105 case NVPTXISD::Tex2DU32S32: 3106 Opc = NVPTX::TEX_2D_U32_S32; 3107 break; 3108 case NVPTXISD::Tex2DU32Float: 3109 Opc = NVPTX::TEX_2D_U32_F32; 3110 break; 3111 case NVPTXISD::Tex2DU32FloatLevel: 3112 Opc = NVPTX::TEX_2D_U32_F32_LEVEL; 3113 break; 3114 case NVPTXISD::Tex2DU32FloatGrad: 3115 Opc = NVPTX::TEX_2D_U32_F32_GRAD; 3116 break; 3117 case NVPTXISD::Tex2DArrayFloatS32: 3118 Opc = NVPTX::TEX_2D_ARRAY_F32_S32; 3119 break; 3120 case NVPTXISD::Tex2DArrayFloatFloat: 3121 Opc = NVPTX::TEX_2D_ARRAY_F32_F32; 3122 break; 3123 case NVPTXISD::Tex2DArrayFloatFloatLevel: 3124 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL; 3125 break; 3126 case NVPTXISD::Tex2DArrayFloatFloatGrad: 3127 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD; 3128 break; 3129 case NVPTXISD::Tex2DArrayS32S32: 3130 Opc = NVPTX::TEX_2D_ARRAY_S32_S32; 3131 break; 3132 case NVPTXISD::Tex2DArrayS32Float: 3133 Opc = NVPTX::TEX_2D_ARRAY_S32_F32; 3134 break; 3135 case NVPTXISD::Tex2DArrayS32FloatLevel: 3136 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL; 3137 break; 3138 case NVPTXISD::Tex2DArrayS32FloatGrad: 3139 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD; 3140 break; 3141 case NVPTXISD::Tex2DArrayU32S32: 3142 Opc = NVPTX::TEX_2D_ARRAY_U32_S32; 3143 break; 3144 case NVPTXISD::Tex2DArrayU32Float: 3145 Opc = NVPTX::TEX_2D_ARRAY_U32_F32; 3146 break; 3147 case NVPTXISD::Tex2DArrayU32FloatLevel: 3148 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL; 3149 break; 3150 case NVPTXISD::Tex2DArrayU32FloatGrad: 3151 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD; 3152 break; 3153 case NVPTXISD::Tex3DFloatS32: 3154 Opc = NVPTX::TEX_3D_F32_S32; 3155 break; 3156 case NVPTXISD::Tex3DFloatFloat: 3157 Opc = NVPTX::TEX_3D_F32_F32; 3158 break; 3159 case NVPTXISD::Tex3DFloatFloatLevel: 3160 Opc = NVPTX::TEX_3D_F32_F32_LEVEL; 3161 break; 3162 case NVPTXISD::Tex3DFloatFloatGrad: 3163 Opc = NVPTX::TEX_3D_F32_F32_GRAD; 3164 break; 3165 case NVPTXISD::Tex3DS32S32: 3166 Opc = NVPTX::TEX_3D_S32_S32; 3167 break; 3168 case NVPTXISD::Tex3DS32Float: 3169 Opc = NVPTX::TEX_3D_S32_F32; 3170 break; 3171 case NVPTXISD::Tex3DS32FloatLevel: 3172 Opc = NVPTX::TEX_3D_S32_F32_LEVEL; 3173 break; 3174 case NVPTXISD::Tex3DS32FloatGrad: 3175 Opc = NVPTX::TEX_3D_S32_F32_GRAD; 3176 break; 3177 case NVPTXISD::Tex3DU32S32: 3178 Opc = NVPTX::TEX_3D_U32_S32; 3179 break; 3180 case NVPTXISD::Tex3DU32Float: 3181 Opc = NVPTX::TEX_3D_U32_F32; 3182 break; 3183 case NVPTXISD::Tex3DU32FloatLevel: 3184 Opc = NVPTX::TEX_3D_U32_F32_LEVEL; 3185 break; 3186 case NVPTXISD::Tex3DU32FloatGrad: 3187 Opc = NVPTX::TEX_3D_U32_F32_GRAD; 3188 break; 3189 case NVPTXISD::TexCubeFloatFloat: 3190 Opc = NVPTX::TEX_CUBE_F32_F32; 3191 break; 3192 case NVPTXISD::TexCubeFloatFloatLevel: 3193 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL; 3194 break; 3195 case NVPTXISD::TexCubeS32Float: 3196 Opc = NVPTX::TEX_CUBE_S32_F32; 3197 break; 3198 case NVPTXISD::TexCubeS32FloatLevel: 3199 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL; 3200 break; 3201 case NVPTXISD::TexCubeU32Float: 3202 Opc = NVPTX::TEX_CUBE_U32_F32; 3203 break; 3204 case NVPTXISD::TexCubeU32FloatLevel: 3205 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL; 3206 break; 3207 case NVPTXISD::TexCubeArrayFloatFloat: 3208 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32; 3209 break; 3210 case NVPTXISD::TexCubeArrayFloatFloatLevel: 3211 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL; 3212 break; 3213 case NVPTXISD::TexCubeArrayS32Float: 3214 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32; 3215 break; 3216 case NVPTXISD::TexCubeArrayS32FloatLevel: 3217 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL; 3218 break; 3219 case NVPTXISD::TexCubeArrayU32Float: 3220 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32; 3221 break; 3222 case NVPTXISD::TexCubeArrayU32FloatLevel: 3223 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL; 3224 break; 3225 case NVPTXISD::Tld4R2DFloatFloat: 3226 Opc = NVPTX::TLD4_R_2D_F32_F32; 3227 break; 3228 case NVPTXISD::Tld4G2DFloatFloat: 3229 Opc = NVPTX::TLD4_G_2D_F32_F32; 3230 break; 3231 case NVPTXISD::Tld4B2DFloatFloat: 3232 Opc = NVPTX::TLD4_B_2D_F32_F32; 3233 break; 3234 case NVPTXISD::Tld4A2DFloatFloat: 3235 Opc = NVPTX::TLD4_A_2D_F32_F32; 3236 break; 3237 case NVPTXISD::Tld4R2DS64Float: 3238 Opc = NVPTX::TLD4_R_2D_S32_F32; 3239 break; 3240 case NVPTXISD::Tld4G2DS64Float: 3241 Opc = NVPTX::TLD4_G_2D_S32_F32; 3242 break; 3243 case NVPTXISD::Tld4B2DS64Float: 3244 Opc = NVPTX::TLD4_B_2D_S32_F32; 3245 break; 3246 case NVPTXISD::Tld4A2DS64Float: 3247 Opc = NVPTX::TLD4_A_2D_S32_F32; 3248 break; 3249 case NVPTXISD::Tld4R2DU64Float: 3250 Opc = NVPTX::TLD4_R_2D_U32_F32; 3251 break; 3252 case NVPTXISD::Tld4G2DU64Float: 3253 Opc = NVPTX::TLD4_G_2D_U32_F32; 3254 break; 3255 case NVPTXISD::Tld4B2DU64Float: 3256 Opc = NVPTX::TLD4_B_2D_U32_F32; 3257 break; 3258 case NVPTXISD::Tld4A2DU64Float: 3259 Opc = NVPTX::TLD4_A_2D_U32_F32; 3260 break; 3261 case NVPTXISD::TexUnified1DFloatS32: 3262 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32; 3263 break; 3264 case NVPTXISD::TexUnified1DFloatFloat: 3265 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32; 3266 break; 3267 case NVPTXISD::TexUnified1DFloatFloatLevel: 3268 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL; 3269 break; 3270 case NVPTXISD::TexUnified1DFloatFloatGrad: 3271 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD; 3272 break; 3273 case NVPTXISD::TexUnified1DS32S32: 3274 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32; 3275 break; 3276 case NVPTXISD::TexUnified1DS32Float: 3277 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32; 3278 break; 3279 case NVPTXISD::TexUnified1DS32FloatLevel: 3280 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL; 3281 break; 3282 case NVPTXISD::TexUnified1DS32FloatGrad: 3283 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD; 3284 break; 3285 case NVPTXISD::TexUnified1DU32S32: 3286 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32; 3287 break; 3288 case NVPTXISD::TexUnified1DU32Float: 3289 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32; 3290 break; 3291 case NVPTXISD::TexUnified1DU32FloatLevel: 3292 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL; 3293 break; 3294 case NVPTXISD::TexUnified1DU32FloatGrad: 3295 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD; 3296 break; 3297 case NVPTXISD::TexUnified1DArrayFloatS32: 3298 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32; 3299 break; 3300 case NVPTXISD::TexUnified1DArrayFloatFloat: 3301 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32; 3302 break; 3303 case NVPTXISD::TexUnified1DArrayFloatFloatLevel: 3304 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL; 3305 break; 3306 case NVPTXISD::TexUnified1DArrayFloatFloatGrad: 3307 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD; 3308 break; 3309 case NVPTXISD::TexUnified1DArrayS32S32: 3310 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32; 3311 break; 3312 case NVPTXISD::TexUnified1DArrayS32Float: 3313 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32; 3314 break; 3315 case NVPTXISD::TexUnified1DArrayS32FloatLevel: 3316 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL; 3317 break; 3318 case NVPTXISD::TexUnified1DArrayS32FloatGrad: 3319 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD; 3320 break; 3321 case NVPTXISD::TexUnified1DArrayU32S32: 3322 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32; 3323 break; 3324 case NVPTXISD::TexUnified1DArrayU32Float: 3325 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32; 3326 break; 3327 case NVPTXISD::TexUnified1DArrayU32FloatLevel: 3328 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL; 3329 break; 3330 case NVPTXISD::TexUnified1DArrayU32FloatGrad: 3331 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD; 3332 break; 3333 case NVPTXISD::TexUnified2DFloatS32: 3334 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32; 3335 break; 3336 case NVPTXISD::TexUnified2DFloatFloat: 3337 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32; 3338 break; 3339 case NVPTXISD::TexUnified2DFloatFloatLevel: 3340 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL; 3341 break; 3342 case NVPTXISD::TexUnified2DFloatFloatGrad: 3343 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD; 3344 break; 3345 case NVPTXISD::TexUnified2DS32S32: 3346 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32; 3347 break; 3348 case NVPTXISD::TexUnified2DS32Float: 3349 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32; 3350 break; 3351 case NVPTXISD::TexUnified2DS32FloatLevel: 3352 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL; 3353 break; 3354 case NVPTXISD::TexUnified2DS32FloatGrad: 3355 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD; 3356 break; 3357 case NVPTXISD::TexUnified2DU32S32: 3358 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32; 3359 break; 3360 case NVPTXISD::TexUnified2DU32Float: 3361 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32; 3362 break; 3363 case NVPTXISD::TexUnified2DU32FloatLevel: 3364 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL; 3365 break; 3366 case NVPTXISD::TexUnified2DU32FloatGrad: 3367 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD; 3368 break; 3369 case NVPTXISD::TexUnified2DArrayFloatS32: 3370 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32; 3371 break; 3372 case NVPTXISD::TexUnified2DArrayFloatFloat: 3373 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32; 3374 break; 3375 case NVPTXISD::TexUnified2DArrayFloatFloatLevel: 3376 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL; 3377 break; 3378 case NVPTXISD::TexUnified2DArrayFloatFloatGrad: 3379 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD; 3380 break; 3381 case NVPTXISD::TexUnified2DArrayS32S32: 3382 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32; 3383 break; 3384 case NVPTXISD::TexUnified2DArrayS32Float: 3385 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32; 3386 break; 3387 case NVPTXISD::TexUnified2DArrayS32FloatLevel: 3388 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL; 3389 break; 3390 case NVPTXISD::TexUnified2DArrayS32FloatGrad: 3391 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD; 3392 break; 3393 case NVPTXISD::TexUnified2DArrayU32S32: 3394 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32; 3395 break; 3396 case NVPTXISD::TexUnified2DArrayU32Float: 3397 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32; 3398 break; 3399 case NVPTXISD::TexUnified2DArrayU32FloatLevel: 3400 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL; 3401 break; 3402 case NVPTXISD::TexUnified2DArrayU32FloatGrad: 3403 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD; 3404 break; 3405 case NVPTXISD::TexUnified3DFloatS32: 3406 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32; 3407 break; 3408 case NVPTXISD::TexUnified3DFloatFloat: 3409 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32; 3410 break; 3411 case NVPTXISD::TexUnified3DFloatFloatLevel: 3412 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL; 3413 break; 3414 case NVPTXISD::TexUnified3DFloatFloatGrad: 3415 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD; 3416 break; 3417 case NVPTXISD::TexUnified3DS32S32: 3418 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32; 3419 break; 3420 case NVPTXISD::TexUnified3DS32Float: 3421 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32; 3422 break; 3423 case NVPTXISD::TexUnified3DS32FloatLevel: 3424 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL; 3425 break; 3426 case NVPTXISD::TexUnified3DS32FloatGrad: 3427 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD; 3428 break; 3429 case NVPTXISD::TexUnified3DU32S32: 3430 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32; 3431 break; 3432 case NVPTXISD::TexUnified3DU32Float: 3433 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32; 3434 break; 3435 case NVPTXISD::TexUnified3DU32FloatLevel: 3436 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL; 3437 break; 3438 case NVPTXISD::TexUnified3DU32FloatGrad: 3439 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD; 3440 break; 3441 case NVPTXISD::TexUnifiedCubeFloatFloat: 3442 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32; 3443 break; 3444 case NVPTXISD::TexUnifiedCubeFloatFloatLevel: 3445 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL; 3446 break; 3447 case NVPTXISD::TexUnifiedCubeS32Float: 3448 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32; 3449 break; 3450 case NVPTXISD::TexUnifiedCubeS32FloatLevel: 3451 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL; 3452 break; 3453 case NVPTXISD::TexUnifiedCubeU32Float: 3454 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32; 3455 break; 3456 case NVPTXISD::TexUnifiedCubeU32FloatLevel: 3457 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL; 3458 break; 3459 case NVPTXISD::TexUnifiedCubeArrayFloatFloat: 3460 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32; 3461 break; 3462 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: 3463 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL; 3464 break; 3465 case NVPTXISD::TexUnifiedCubeArrayS32Float: 3466 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32; 3467 break; 3468 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: 3469 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL; 3470 break; 3471 case NVPTXISD::TexUnifiedCubeArrayU32Float: 3472 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32; 3473 break; 3474 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: 3475 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL; 3476 break; 3477 case NVPTXISD::Tld4UnifiedR2DFloatFloat: 3478 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32; 3479 break; 3480 case NVPTXISD::Tld4UnifiedG2DFloatFloat: 3481 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32; 3482 break; 3483 case NVPTXISD::Tld4UnifiedB2DFloatFloat: 3484 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32; 3485 break; 3486 case NVPTXISD::Tld4UnifiedA2DFloatFloat: 3487 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32; 3488 break; 3489 case NVPTXISD::Tld4UnifiedR2DS64Float: 3490 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32; 3491 break; 3492 case NVPTXISD::Tld4UnifiedG2DS64Float: 3493 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32; 3494 break; 3495 case NVPTXISD::Tld4UnifiedB2DS64Float: 3496 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32; 3497 break; 3498 case NVPTXISD::Tld4UnifiedA2DS64Float: 3499 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32; 3500 break; 3501 case NVPTXISD::Tld4UnifiedR2DU64Float: 3502 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32; 3503 break; 3504 case NVPTXISD::Tld4UnifiedG2DU64Float: 3505 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32; 3506 break; 3507 case NVPTXISD::Tld4UnifiedB2DU64Float: 3508 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32; 3509 break; 3510 case NVPTXISD::Tld4UnifiedA2DU64Float: 3511 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32; 3512 break; 3513 } 3514 3515 // Copy over operands 3516 for (unsigned i = 1; i < N->getNumOperands(); ++i) { 3517 Ops.push_back(N->getOperand(i)); 3518 } 3519 3520 Ops.push_back(Chain); 3521 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 3522 return Ret; 3523 } 3524 3525 SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) { 3526 SDValue Chain = N->getOperand(0); 3527 SDValue TexHandle = N->getOperand(1); 3528 SDNode *Ret = nullptr; 3529 unsigned Opc = 0; 3530 SmallVector<SDValue, 8> Ops; 3531 switch (N->getOpcode()) { 3532 default: return nullptr; 3533 case NVPTXISD::Suld1DI8Clamp: 3534 Opc = NVPTX::SULD_1D_I8_CLAMP; 3535 Ops.push_back(TexHandle); 3536 Ops.push_back(N->getOperand(2)); 3537 Ops.push_back(Chain); 3538 break; 3539 case NVPTXISD::Suld1DI16Clamp: 3540 Opc = NVPTX::SULD_1D_I16_CLAMP; 3541 Ops.push_back(TexHandle); 3542 Ops.push_back(N->getOperand(2)); 3543 Ops.push_back(Chain); 3544 break; 3545 case NVPTXISD::Suld1DI32Clamp: 3546 Opc = NVPTX::SULD_1D_I32_CLAMP; 3547 Ops.push_back(TexHandle); 3548 Ops.push_back(N->getOperand(2)); 3549 Ops.push_back(Chain); 3550 break; 3551 case NVPTXISD::Suld1DI64Clamp: 3552 Opc = NVPTX::SULD_1D_I64_CLAMP; 3553 Ops.push_back(TexHandle); 3554 Ops.push_back(N->getOperand(2)); 3555 Ops.push_back(Chain); 3556 break; 3557 case NVPTXISD::Suld1DV2I8Clamp: 3558 Opc = NVPTX::SULD_1D_V2I8_CLAMP; 3559 Ops.push_back(TexHandle); 3560 Ops.push_back(N->getOperand(2)); 3561 Ops.push_back(Chain); 3562 break; 3563 case NVPTXISD::Suld1DV2I16Clamp: 3564 Opc = NVPTX::SULD_1D_V2I16_CLAMP; 3565 Ops.push_back(TexHandle); 3566 Ops.push_back(N->getOperand(2)); 3567 Ops.push_back(Chain); 3568 break; 3569 case NVPTXISD::Suld1DV2I32Clamp: 3570 Opc = NVPTX::SULD_1D_V2I32_CLAMP; 3571 Ops.push_back(TexHandle); 3572 Ops.push_back(N->getOperand(2)); 3573 Ops.push_back(Chain); 3574 break; 3575 case NVPTXISD::Suld1DV2I64Clamp: 3576 Opc = NVPTX::SULD_1D_V2I64_CLAMP; 3577 Ops.push_back(TexHandle); 3578 Ops.push_back(N->getOperand(2)); 3579 Ops.push_back(Chain); 3580 break; 3581 case NVPTXISD::Suld1DV4I8Clamp: 3582 Opc = NVPTX::SULD_1D_V4I8_CLAMP; 3583 Ops.push_back(TexHandle); 3584 Ops.push_back(N->getOperand(2)); 3585 Ops.push_back(Chain); 3586 break; 3587 case NVPTXISD::Suld1DV4I16Clamp: 3588 Opc = NVPTX::SULD_1D_V4I16_CLAMP; 3589 Ops.push_back(TexHandle); 3590 Ops.push_back(N->getOperand(2)); 3591 Ops.push_back(Chain); 3592 break; 3593 case NVPTXISD::Suld1DV4I32Clamp: 3594 Opc = NVPTX::SULD_1D_V4I32_CLAMP; 3595 Ops.push_back(TexHandle); 3596 Ops.push_back(N->getOperand(2)); 3597 Ops.push_back(Chain); 3598 break; 3599 case NVPTXISD::Suld1DArrayI8Clamp: 3600 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP; 3601 Ops.push_back(TexHandle); 3602 Ops.push_back(N->getOperand(2)); 3603 Ops.push_back(N->getOperand(3)); 3604 Ops.push_back(Chain); 3605 break; 3606 case NVPTXISD::Suld1DArrayI16Clamp: 3607 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP; 3608 Ops.push_back(TexHandle); 3609 Ops.push_back(N->getOperand(2)); 3610 Ops.push_back(N->getOperand(3)); 3611 Ops.push_back(Chain); 3612 break; 3613 case NVPTXISD::Suld1DArrayI32Clamp: 3614 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP; 3615 Ops.push_back(TexHandle); 3616 Ops.push_back(N->getOperand(2)); 3617 Ops.push_back(N->getOperand(3)); 3618 Ops.push_back(Chain); 3619 break; 3620 case NVPTXISD::Suld1DArrayI64Clamp: 3621 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP; 3622 Ops.push_back(TexHandle); 3623 Ops.push_back(N->getOperand(2)); 3624 Ops.push_back(N->getOperand(3)); 3625 Ops.push_back(Chain); 3626 break; 3627 case NVPTXISD::Suld1DArrayV2I8Clamp: 3628 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP; 3629 Ops.push_back(TexHandle); 3630 Ops.push_back(N->getOperand(2)); 3631 Ops.push_back(N->getOperand(3)); 3632 Ops.push_back(Chain); 3633 break; 3634 case NVPTXISD::Suld1DArrayV2I16Clamp: 3635 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP; 3636 Ops.push_back(TexHandle); 3637 Ops.push_back(N->getOperand(2)); 3638 Ops.push_back(N->getOperand(3)); 3639 Ops.push_back(Chain); 3640 break; 3641 case NVPTXISD::Suld1DArrayV2I32Clamp: 3642 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP; 3643 Ops.push_back(TexHandle); 3644 Ops.push_back(N->getOperand(2)); 3645 Ops.push_back(N->getOperand(3)); 3646 Ops.push_back(Chain); 3647 break; 3648 case NVPTXISD::Suld1DArrayV2I64Clamp: 3649 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP; 3650 Ops.push_back(TexHandle); 3651 Ops.push_back(N->getOperand(2)); 3652 Ops.push_back(N->getOperand(3)); 3653 Ops.push_back(Chain); 3654 break; 3655 case NVPTXISD::Suld1DArrayV4I8Clamp: 3656 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP; 3657 Ops.push_back(TexHandle); 3658 Ops.push_back(N->getOperand(2)); 3659 Ops.push_back(N->getOperand(3)); 3660 Ops.push_back(Chain); 3661 break; 3662 case NVPTXISD::Suld1DArrayV4I16Clamp: 3663 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP; 3664 Ops.push_back(TexHandle); 3665 Ops.push_back(N->getOperand(2)); 3666 Ops.push_back(N->getOperand(3)); 3667 Ops.push_back(Chain); 3668 break; 3669 case NVPTXISD::Suld1DArrayV4I32Clamp: 3670 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP; 3671 Ops.push_back(TexHandle); 3672 Ops.push_back(N->getOperand(2)); 3673 Ops.push_back(N->getOperand(3)); 3674 Ops.push_back(Chain); 3675 break; 3676 case NVPTXISD::Suld2DI8Clamp: 3677 Opc = NVPTX::SULD_2D_I8_CLAMP; 3678 Ops.push_back(TexHandle); 3679 Ops.push_back(N->getOperand(2)); 3680 Ops.push_back(N->getOperand(3)); 3681 Ops.push_back(Chain); 3682 break; 3683 case NVPTXISD::Suld2DI16Clamp: 3684 Opc = NVPTX::SULD_2D_I16_CLAMP; 3685 Ops.push_back(TexHandle); 3686 Ops.push_back(N->getOperand(2)); 3687 Ops.push_back(N->getOperand(3)); 3688 Ops.push_back(Chain); 3689 break; 3690 case NVPTXISD::Suld2DI32Clamp: 3691 Opc = NVPTX::SULD_2D_I32_CLAMP; 3692 Ops.push_back(TexHandle); 3693 Ops.push_back(N->getOperand(2)); 3694 Ops.push_back(N->getOperand(3)); 3695 Ops.push_back(Chain); 3696 break; 3697 case NVPTXISD::Suld2DI64Clamp: 3698 Opc = NVPTX::SULD_2D_I64_CLAMP; 3699 Ops.push_back(TexHandle); 3700 Ops.push_back(N->getOperand(2)); 3701 Ops.push_back(N->getOperand(3)); 3702 Ops.push_back(Chain); 3703 break; 3704 case NVPTXISD::Suld2DV2I8Clamp: 3705 Opc = NVPTX::SULD_2D_V2I8_CLAMP; 3706 Ops.push_back(TexHandle); 3707 Ops.push_back(N->getOperand(2)); 3708 Ops.push_back(N->getOperand(3)); 3709 Ops.push_back(Chain); 3710 break; 3711 case NVPTXISD::Suld2DV2I16Clamp: 3712 Opc = NVPTX::SULD_2D_V2I16_CLAMP; 3713 Ops.push_back(TexHandle); 3714 Ops.push_back(N->getOperand(2)); 3715 Ops.push_back(N->getOperand(3)); 3716 Ops.push_back(Chain); 3717 break; 3718 case NVPTXISD::Suld2DV2I32Clamp: 3719 Opc = NVPTX::SULD_2D_V2I32_CLAMP; 3720 Ops.push_back(TexHandle); 3721 Ops.push_back(N->getOperand(2)); 3722 Ops.push_back(N->getOperand(3)); 3723 Ops.push_back(Chain); 3724 break; 3725 case NVPTXISD::Suld2DV2I64Clamp: 3726 Opc = NVPTX::SULD_2D_V2I64_CLAMP; 3727 Ops.push_back(TexHandle); 3728 Ops.push_back(N->getOperand(2)); 3729 Ops.push_back(N->getOperand(3)); 3730 Ops.push_back(Chain); 3731 break; 3732 case NVPTXISD::Suld2DV4I8Clamp: 3733 Opc = NVPTX::SULD_2D_V4I8_CLAMP; 3734 Ops.push_back(TexHandle); 3735 Ops.push_back(N->getOperand(2)); 3736 Ops.push_back(N->getOperand(3)); 3737 Ops.push_back(Chain); 3738 break; 3739 case NVPTXISD::Suld2DV4I16Clamp: 3740 Opc = NVPTX::SULD_2D_V4I16_CLAMP; 3741 Ops.push_back(TexHandle); 3742 Ops.push_back(N->getOperand(2)); 3743 Ops.push_back(N->getOperand(3)); 3744 Ops.push_back(Chain); 3745 break; 3746 case NVPTXISD::Suld2DV4I32Clamp: 3747 Opc = NVPTX::SULD_2D_V4I32_CLAMP; 3748 Ops.push_back(TexHandle); 3749 Ops.push_back(N->getOperand(2)); 3750 Ops.push_back(N->getOperand(3)); 3751 Ops.push_back(Chain); 3752 break; 3753 case NVPTXISD::Suld2DArrayI8Clamp: 3754 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP; 3755 Ops.push_back(TexHandle); 3756 Ops.push_back(N->getOperand(2)); 3757 Ops.push_back(N->getOperand(3)); 3758 Ops.push_back(N->getOperand(4)); 3759 Ops.push_back(Chain); 3760 break; 3761 case NVPTXISD::Suld2DArrayI16Clamp: 3762 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP; 3763 Ops.push_back(TexHandle); 3764 Ops.push_back(N->getOperand(2)); 3765 Ops.push_back(N->getOperand(3)); 3766 Ops.push_back(N->getOperand(4)); 3767 Ops.push_back(Chain); 3768 break; 3769 case NVPTXISD::Suld2DArrayI32Clamp: 3770 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP; 3771 Ops.push_back(TexHandle); 3772 Ops.push_back(N->getOperand(2)); 3773 Ops.push_back(N->getOperand(3)); 3774 Ops.push_back(N->getOperand(4)); 3775 Ops.push_back(Chain); 3776 break; 3777 case NVPTXISD::Suld2DArrayI64Clamp: 3778 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP; 3779 Ops.push_back(TexHandle); 3780 Ops.push_back(N->getOperand(2)); 3781 Ops.push_back(N->getOperand(3)); 3782 Ops.push_back(N->getOperand(4)); 3783 Ops.push_back(Chain); 3784 break; 3785 case NVPTXISD::Suld2DArrayV2I8Clamp: 3786 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP; 3787 Ops.push_back(TexHandle); 3788 Ops.push_back(N->getOperand(2)); 3789 Ops.push_back(N->getOperand(3)); 3790 Ops.push_back(N->getOperand(4)); 3791 Ops.push_back(Chain); 3792 break; 3793 case NVPTXISD::Suld2DArrayV2I16Clamp: 3794 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP; 3795 Ops.push_back(TexHandle); 3796 Ops.push_back(N->getOperand(2)); 3797 Ops.push_back(N->getOperand(3)); 3798 Ops.push_back(N->getOperand(4)); 3799 Ops.push_back(Chain); 3800 break; 3801 case NVPTXISD::Suld2DArrayV2I32Clamp: 3802 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP; 3803 Ops.push_back(TexHandle); 3804 Ops.push_back(N->getOperand(2)); 3805 Ops.push_back(N->getOperand(3)); 3806 Ops.push_back(N->getOperand(4)); 3807 Ops.push_back(Chain); 3808 break; 3809 case NVPTXISD::Suld2DArrayV2I64Clamp: 3810 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP; 3811 Ops.push_back(TexHandle); 3812 Ops.push_back(N->getOperand(2)); 3813 Ops.push_back(N->getOperand(3)); 3814 Ops.push_back(N->getOperand(4)); 3815 Ops.push_back(Chain); 3816 break; 3817 case NVPTXISD::Suld2DArrayV4I8Clamp: 3818 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP; 3819 Ops.push_back(TexHandle); 3820 Ops.push_back(N->getOperand(2)); 3821 Ops.push_back(N->getOperand(3)); 3822 Ops.push_back(N->getOperand(4)); 3823 Ops.push_back(Chain); 3824 break; 3825 case NVPTXISD::Suld2DArrayV4I16Clamp: 3826 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP; 3827 Ops.push_back(TexHandle); 3828 Ops.push_back(N->getOperand(2)); 3829 Ops.push_back(N->getOperand(3)); 3830 Ops.push_back(N->getOperand(4)); 3831 Ops.push_back(Chain); 3832 break; 3833 case NVPTXISD::Suld2DArrayV4I32Clamp: 3834 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP; 3835 Ops.push_back(TexHandle); 3836 Ops.push_back(N->getOperand(2)); 3837 Ops.push_back(N->getOperand(3)); 3838 Ops.push_back(N->getOperand(4)); 3839 Ops.push_back(Chain); 3840 break; 3841 case NVPTXISD::Suld3DI8Clamp: 3842 Opc = NVPTX::SULD_3D_I8_CLAMP; 3843 Ops.push_back(TexHandle); 3844 Ops.push_back(N->getOperand(2)); 3845 Ops.push_back(N->getOperand(3)); 3846 Ops.push_back(N->getOperand(4)); 3847 Ops.push_back(Chain); 3848 break; 3849 case NVPTXISD::Suld3DI16Clamp: 3850 Opc = NVPTX::SULD_3D_I16_CLAMP; 3851 Ops.push_back(TexHandle); 3852 Ops.push_back(N->getOperand(2)); 3853 Ops.push_back(N->getOperand(3)); 3854 Ops.push_back(N->getOperand(4)); 3855 Ops.push_back(Chain); 3856 break; 3857 case NVPTXISD::Suld3DI32Clamp: 3858 Opc = NVPTX::SULD_3D_I32_CLAMP; 3859 Ops.push_back(TexHandle); 3860 Ops.push_back(N->getOperand(2)); 3861 Ops.push_back(N->getOperand(3)); 3862 Ops.push_back(N->getOperand(4)); 3863 Ops.push_back(Chain); 3864 break; 3865 case NVPTXISD::Suld3DI64Clamp: 3866 Opc = NVPTX::SULD_3D_I64_CLAMP; 3867 Ops.push_back(TexHandle); 3868 Ops.push_back(N->getOperand(2)); 3869 Ops.push_back(N->getOperand(3)); 3870 Ops.push_back(N->getOperand(4)); 3871 Ops.push_back(Chain); 3872 break; 3873 case NVPTXISD::Suld3DV2I8Clamp: 3874 Opc = NVPTX::SULD_3D_V2I8_CLAMP; 3875 Ops.push_back(TexHandle); 3876 Ops.push_back(N->getOperand(2)); 3877 Ops.push_back(N->getOperand(3)); 3878 Ops.push_back(N->getOperand(4)); 3879 Ops.push_back(Chain); 3880 break; 3881 case NVPTXISD::Suld3DV2I16Clamp: 3882 Opc = NVPTX::SULD_3D_V2I16_CLAMP; 3883 Ops.push_back(TexHandle); 3884 Ops.push_back(N->getOperand(2)); 3885 Ops.push_back(N->getOperand(3)); 3886 Ops.push_back(N->getOperand(4)); 3887 Ops.push_back(Chain); 3888 break; 3889 case NVPTXISD::Suld3DV2I32Clamp: 3890 Opc = NVPTX::SULD_3D_V2I32_CLAMP; 3891 Ops.push_back(TexHandle); 3892 Ops.push_back(N->getOperand(2)); 3893 Ops.push_back(N->getOperand(3)); 3894 Ops.push_back(N->getOperand(4)); 3895 Ops.push_back(Chain); 3896 break; 3897 case NVPTXISD::Suld3DV2I64Clamp: 3898 Opc = NVPTX::SULD_3D_V2I64_CLAMP; 3899 Ops.push_back(TexHandle); 3900 Ops.push_back(N->getOperand(2)); 3901 Ops.push_back(N->getOperand(3)); 3902 Ops.push_back(N->getOperand(4)); 3903 Ops.push_back(Chain); 3904 break; 3905 case NVPTXISD::Suld3DV4I8Clamp: 3906 Opc = NVPTX::SULD_3D_V4I8_CLAMP; 3907 Ops.push_back(TexHandle); 3908 Ops.push_back(N->getOperand(2)); 3909 Ops.push_back(N->getOperand(3)); 3910 Ops.push_back(N->getOperand(4)); 3911 Ops.push_back(Chain); 3912 break; 3913 case NVPTXISD::Suld3DV4I16Clamp: 3914 Opc = NVPTX::SULD_3D_V4I16_CLAMP; 3915 Ops.push_back(TexHandle); 3916 Ops.push_back(N->getOperand(2)); 3917 Ops.push_back(N->getOperand(3)); 3918 Ops.push_back(N->getOperand(4)); 3919 Ops.push_back(Chain); 3920 break; 3921 case NVPTXISD::Suld3DV4I32Clamp: 3922 Opc = NVPTX::SULD_3D_V4I32_CLAMP; 3923 Ops.push_back(TexHandle); 3924 Ops.push_back(N->getOperand(2)); 3925 Ops.push_back(N->getOperand(3)); 3926 Ops.push_back(N->getOperand(4)); 3927 Ops.push_back(Chain); 3928 break; 3929 case NVPTXISD::Suld1DI8Trap: 3930 Opc = NVPTX::SULD_1D_I8_TRAP; 3931 Ops.push_back(TexHandle); 3932 Ops.push_back(N->getOperand(2)); 3933 Ops.push_back(Chain); 3934 break; 3935 case NVPTXISD::Suld1DI16Trap: 3936 Opc = NVPTX::SULD_1D_I16_TRAP; 3937 Ops.push_back(TexHandle); 3938 Ops.push_back(N->getOperand(2)); 3939 Ops.push_back(Chain); 3940 break; 3941 case NVPTXISD::Suld1DI32Trap: 3942 Opc = NVPTX::SULD_1D_I32_TRAP; 3943 Ops.push_back(TexHandle); 3944 Ops.push_back(N->getOperand(2)); 3945 Ops.push_back(Chain); 3946 break; 3947 case NVPTXISD::Suld1DI64Trap: 3948 Opc = NVPTX::SULD_1D_I64_TRAP; 3949 Ops.push_back(TexHandle); 3950 Ops.push_back(N->getOperand(2)); 3951 Ops.push_back(Chain); 3952 break; 3953 case NVPTXISD::Suld1DV2I8Trap: 3954 Opc = NVPTX::SULD_1D_V2I8_TRAP; 3955 Ops.push_back(TexHandle); 3956 Ops.push_back(N->getOperand(2)); 3957 Ops.push_back(Chain); 3958 break; 3959 case NVPTXISD::Suld1DV2I16Trap: 3960 Opc = NVPTX::SULD_1D_V2I16_TRAP; 3961 Ops.push_back(TexHandle); 3962 Ops.push_back(N->getOperand(2)); 3963 Ops.push_back(Chain); 3964 break; 3965 case NVPTXISD::Suld1DV2I32Trap: 3966 Opc = NVPTX::SULD_1D_V2I32_TRAP; 3967 Ops.push_back(TexHandle); 3968 Ops.push_back(N->getOperand(2)); 3969 Ops.push_back(Chain); 3970 break; 3971 case NVPTXISD::Suld1DV2I64Trap: 3972 Opc = NVPTX::SULD_1D_V2I64_TRAP; 3973 Ops.push_back(TexHandle); 3974 Ops.push_back(N->getOperand(2)); 3975 Ops.push_back(Chain); 3976 break; 3977 case NVPTXISD::Suld1DV4I8Trap: 3978 Opc = NVPTX::SULD_1D_V4I8_TRAP; 3979 Ops.push_back(TexHandle); 3980 Ops.push_back(N->getOperand(2)); 3981 Ops.push_back(Chain); 3982 break; 3983 case NVPTXISD::Suld1DV4I16Trap: 3984 Opc = NVPTX::SULD_1D_V4I16_TRAP; 3985 Ops.push_back(TexHandle); 3986 Ops.push_back(N->getOperand(2)); 3987 Ops.push_back(Chain); 3988 break; 3989 case NVPTXISD::Suld1DV4I32Trap: 3990 Opc = NVPTX::SULD_1D_V4I32_TRAP; 3991 Ops.push_back(TexHandle); 3992 Ops.push_back(N->getOperand(2)); 3993 Ops.push_back(Chain); 3994 break; 3995 case NVPTXISD::Suld1DArrayI8Trap: 3996 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP; 3997 Ops.push_back(TexHandle); 3998 Ops.push_back(N->getOperand(2)); 3999 Ops.push_back(N->getOperand(3)); 4000 Ops.push_back(Chain); 4001 break; 4002 case NVPTXISD::Suld1DArrayI16Trap: 4003 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP; 4004 Ops.push_back(TexHandle); 4005 Ops.push_back(N->getOperand(2)); 4006 Ops.push_back(N->getOperand(3)); 4007 Ops.push_back(Chain); 4008 break; 4009 case NVPTXISD::Suld1DArrayI32Trap: 4010 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP; 4011 Ops.push_back(TexHandle); 4012 Ops.push_back(N->getOperand(2)); 4013 Ops.push_back(N->getOperand(3)); 4014 Ops.push_back(Chain); 4015 break; 4016 case NVPTXISD::Suld1DArrayI64Trap: 4017 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP; 4018 Ops.push_back(TexHandle); 4019 Ops.push_back(N->getOperand(2)); 4020 Ops.push_back(N->getOperand(3)); 4021 Ops.push_back(Chain); 4022 break; 4023 case NVPTXISD::Suld1DArrayV2I8Trap: 4024 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP; 4025 Ops.push_back(TexHandle); 4026 Ops.push_back(N->getOperand(2)); 4027 Ops.push_back(N->getOperand(3)); 4028 Ops.push_back(Chain); 4029 break; 4030 case NVPTXISD::Suld1DArrayV2I16Trap: 4031 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP; 4032 Ops.push_back(TexHandle); 4033 Ops.push_back(N->getOperand(2)); 4034 Ops.push_back(N->getOperand(3)); 4035 Ops.push_back(Chain); 4036 break; 4037 case NVPTXISD::Suld1DArrayV2I32Trap: 4038 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP; 4039 Ops.push_back(TexHandle); 4040 Ops.push_back(N->getOperand(2)); 4041 Ops.push_back(N->getOperand(3)); 4042 Ops.push_back(Chain); 4043 break; 4044 case NVPTXISD::Suld1DArrayV2I64Trap: 4045 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP; 4046 Ops.push_back(TexHandle); 4047 Ops.push_back(N->getOperand(2)); 4048 Ops.push_back(N->getOperand(3)); 4049 Ops.push_back(Chain); 4050 break; 4051 case NVPTXISD::Suld1DArrayV4I8Trap: 4052 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP; 4053 Ops.push_back(TexHandle); 4054 Ops.push_back(N->getOperand(2)); 4055 Ops.push_back(N->getOperand(3)); 4056 Ops.push_back(Chain); 4057 break; 4058 case NVPTXISD::Suld1DArrayV4I16Trap: 4059 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP; 4060 Ops.push_back(TexHandle); 4061 Ops.push_back(N->getOperand(2)); 4062 Ops.push_back(N->getOperand(3)); 4063 Ops.push_back(Chain); 4064 break; 4065 case NVPTXISD::Suld1DArrayV4I32Trap: 4066 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP; 4067 Ops.push_back(TexHandle); 4068 Ops.push_back(N->getOperand(2)); 4069 Ops.push_back(N->getOperand(3)); 4070 Ops.push_back(Chain); 4071 break; 4072 case NVPTXISD::Suld2DI8Trap: 4073 Opc = NVPTX::SULD_2D_I8_TRAP; 4074 Ops.push_back(TexHandle); 4075 Ops.push_back(N->getOperand(2)); 4076 Ops.push_back(N->getOperand(3)); 4077 Ops.push_back(Chain); 4078 break; 4079 case NVPTXISD::Suld2DI16Trap: 4080 Opc = NVPTX::SULD_2D_I16_TRAP; 4081 Ops.push_back(TexHandle); 4082 Ops.push_back(N->getOperand(2)); 4083 Ops.push_back(N->getOperand(3)); 4084 Ops.push_back(Chain); 4085 break; 4086 case NVPTXISD::Suld2DI32Trap: 4087 Opc = NVPTX::SULD_2D_I32_TRAP; 4088 Ops.push_back(TexHandle); 4089 Ops.push_back(N->getOperand(2)); 4090 Ops.push_back(N->getOperand(3)); 4091 Ops.push_back(Chain); 4092 break; 4093 case NVPTXISD::Suld2DI64Trap: 4094 Opc = NVPTX::SULD_2D_I64_TRAP; 4095 Ops.push_back(TexHandle); 4096 Ops.push_back(N->getOperand(2)); 4097 Ops.push_back(N->getOperand(3)); 4098 Ops.push_back(Chain); 4099 break; 4100 case NVPTXISD::Suld2DV2I8Trap: 4101 Opc = NVPTX::SULD_2D_V2I8_TRAP; 4102 Ops.push_back(TexHandle); 4103 Ops.push_back(N->getOperand(2)); 4104 Ops.push_back(N->getOperand(3)); 4105 Ops.push_back(Chain); 4106 break; 4107 case NVPTXISD::Suld2DV2I16Trap: 4108 Opc = NVPTX::SULD_2D_V2I16_TRAP; 4109 Ops.push_back(TexHandle); 4110 Ops.push_back(N->getOperand(2)); 4111 Ops.push_back(N->getOperand(3)); 4112 Ops.push_back(Chain); 4113 break; 4114 case NVPTXISD::Suld2DV2I32Trap: 4115 Opc = NVPTX::SULD_2D_V2I32_TRAP; 4116 Ops.push_back(TexHandle); 4117 Ops.push_back(N->getOperand(2)); 4118 Ops.push_back(N->getOperand(3)); 4119 Ops.push_back(Chain); 4120 break; 4121 case NVPTXISD::Suld2DV2I64Trap: 4122 Opc = NVPTX::SULD_2D_V2I64_TRAP; 4123 Ops.push_back(TexHandle); 4124 Ops.push_back(N->getOperand(2)); 4125 Ops.push_back(N->getOperand(3)); 4126 Ops.push_back(Chain); 4127 break; 4128 case NVPTXISD::Suld2DV4I8Trap: 4129 Opc = NVPTX::SULD_2D_V4I8_TRAP; 4130 Ops.push_back(TexHandle); 4131 Ops.push_back(N->getOperand(2)); 4132 Ops.push_back(N->getOperand(3)); 4133 Ops.push_back(Chain); 4134 break; 4135 case NVPTXISD::Suld2DV4I16Trap: 4136 Opc = NVPTX::SULD_2D_V4I16_TRAP; 4137 Ops.push_back(TexHandle); 4138 Ops.push_back(N->getOperand(2)); 4139 Ops.push_back(N->getOperand(3)); 4140 Ops.push_back(Chain); 4141 break; 4142 case NVPTXISD::Suld2DV4I32Trap: 4143 Opc = NVPTX::SULD_2D_V4I32_TRAP; 4144 Ops.push_back(TexHandle); 4145 Ops.push_back(N->getOperand(2)); 4146 Ops.push_back(N->getOperand(3)); 4147 Ops.push_back(Chain); 4148 break; 4149 case NVPTXISD::Suld2DArrayI8Trap: 4150 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP; 4151 Ops.push_back(TexHandle); 4152 Ops.push_back(N->getOperand(2)); 4153 Ops.push_back(N->getOperand(3)); 4154 Ops.push_back(N->getOperand(4)); 4155 Ops.push_back(Chain); 4156 break; 4157 case NVPTXISD::Suld2DArrayI16Trap: 4158 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP; 4159 Ops.push_back(TexHandle); 4160 Ops.push_back(N->getOperand(2)); 4161 Ops.push_back(N->getOperand(3)); 4162 Ops.push_back(N->getOperand(4)); 4163 Ops.push_back(Chain); 4164 break; 4165 case NVPTXISD::Suld2DArrayI32Trap: 4166 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP; 4167 Ops.push_back(TexHandle); 4168 Ops.push_back(N->getOperand(2)); 4169 Ops.push_back(N->getOperand(3)); 4170 Ops.push_back(N->getOperand(4)); 4171 Ops.push_back(Chain); 4172 break; 4173 case NVPTXISD::Suld2DArrayI64Trap: 4174 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP; 4175 Ops.push_back(TexHandle); 4176 Ops.push_back(N->getOperand(2)); 4177 Ops.push_back(N->getOperand(3)); 4178 Ops.push_back(N->getOperand(4)); 4179 Ops.push_back(Chain); 4180 break; 4181 case NVPTXISD::Suld2DArrayV2I8Trap: 4182 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP; 4183 Ops.push_back(TexHandle); 4184 Ops.push_back(N->getOperand(2)); 4185 Ops.push_back(N->getOperand(3)); 4186 Ops.push_back(N->getOperand(4)); 4187 Ops.push_back(Chain); 4188 break; 4189 case NVPTXISD::Suld2DArrayV2I16Trap: 4190 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP; 4191 Ops.push_back(TexHandle); 4192 Ops.push_back(N->getOperand(2)); 4193 Ops.push_back(N->getOperand(3)); 4194 Ops.push_back(N->getOperand(4)); 4195 Ops.push_back(Chain); 4196 break; 4197 case NVPTXISD::Suld2DArrayV2I32Trap: 4198 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP; 4199 Ops.push_back(TexHandle); 4200 Ops.push_back(N->getOperand(2)); 4201 Ops.push_back(N->getOperand(3)); 4202 Ops.push_back(N->getOperand(4)); 4203 Ops.push_back(Chain); 4204 break; 4205 case NVPTXISD::Suld2DArrayV2I64Trap: 4206 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP; 4207 Ops.push_back(TexHandle); 4208 Ops.push_back(N->getOperand(2)); 4209 Ops.push_back(N->getOperand(3)); 4210 Ops.push_back(N->getOperand(4)); 4211 Ops.push_back(Chain); 4212 break; 4213 case NVPTXISD::Suld2DArrayV4I8Trap: 4214 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP; 4215 Ops.push_back(TexHandle); 4216 Ops.push_back(N->getOperand(2)); 4217 Ops.push_back(N->getOperand(3)); 4218 Ops.push_back(N->getOperand(4)); 4219 Ops.push_back(Chain); 4220 break; 4221 case NVPTXISD::Suld2DArrayV4I16Trap: 4222 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP; 4223 Ops.push_back(TexHandle); 4224 Ops.push_back(N->getOperand(2)); 4225 Ops.push_back(N->getOperand(3)); 4226 Ops.push_back(N->getOperand(4)); 4227 Ops.push_back(Chain); 4228 break; 4229 case NVPTXISD::Suld2DArrayV4I32Trap: 4230 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP; 4231 Ops.push_back(TexHandle); 4232 Ops.push_back(N->getOperand(2)); 4233 Ops.push_back(N->getOperand(3)); 4234 Ops.push_back(N->getOperand(4)); 4235 Ops.push_back(Chain); 4236 break; 4237 case NVPTXISD::Suld3DI8Trap: 4238 Opc = NVPTX::SULD_3D_I8_TRAP; 4239 Ops.push_back(TexHandle); 4240 Ops.push_back(N->getOperand(2)); 4241 Ops.push_back(N->getOperand(3)); 4242 Ops.push_back(N->getOperand(4)); 4243 Ops.push_back(Chain); 4244 break; 4245 case NVPTXISD::Suld3DI16Trap: 4246 Opc = NVPTX::SULD_3D_I16_TRAP; 4247 Ops.push_back(TexHandle); 4248 Ops.push_back(N->getOperand(2)); 4249 Ops.push_back(N->getOperand(3)); 4250 Ops.push_back(N->getOperand(4)); 4251 Ops.push_back(Chain); 4252 break; 4253 case NVPTXISD::Suld3DI32Trap: 4254 Opc = NVPTX::SULD_3D_I32_TRAP; 4255 Ops.push_back(TexHandle); 4256 Ops.push_back(N->getOperand(2)); 4257 Ops.push_back(N->getOperand(3)); 4258 Ops.push_back(N->getOperand(4)); 4259 Ops.push_back(Chain); 4260 break; 4261 case NVPTXISD::Suld3DI64Trap: 4262 Opc = NVPTX::SULD_3D_I64_TRAP; 4263 Ops.push_back(TexHandle); 4264 Ops.push_back(N->getOperand(2)); 4265 Ops.push_back(N->getOperand(3)); 4266 Ops.push_back(N->getOperand(4)); 4267 Ops.push_back(Chain); 4268 break; 4269 case NVPTXISD::Suld3DV2I8Trap: 4270 Opc = NVPTX::SULD_3D_V2I8_TRAP; 4271 Ops.push_back(TexHandle); 4272 Ops.push_back(N->getOperand(2)); 4273 Ops.push_back(N->getOperand(3)); 4274 Ops.push_back(N->getOperand(4)); 4275 Ops.push_back(Chain); 4276 break; 4277 case NVPTXISD::Suld3DV2I16Trap: 4278 Opc = NVPTX::SULD_3D_V2I16_TRAP; 4279 Ops.push_back(TexHandle); 4280 Ops.push_back(N->getOperand(2)); 4281 Ops.push_back(N->getOperand(3)); 4282 Ops.push_back(N->getOperand(4)); 4283 Ops.push_back(Chain); 4284 break; 4285 case NVPTXISD::Suld3DV2I32Trap: 4286 Opc = NVPTX::SULD_3D_V2I32_TRAP; 4287 Ops.push_back(TexHandle); 4288 Ops.push_back(N->getOperand(2)); 4289 Ops.push_back(N->getOperand(3)); 4290 Ops.push_back(N->getOperand(4)); 4291 Ops.push_back(Chain); 4292 break; 4293 case NVPTXISD::Suld3DV2I64Trap: 4294 Opc = NVPTX::SULD_3D_V2I64_TRAP; 4295 Ops.push_back(TexHandle); 4296 Ops.push_back(N->getOperand(2)); 4297 Ops.push_back(N->getOperand(3)); 4298 Ops.push_back(N->getOperand(4)); 4299 Ops.push_back(Chain); 4300 break; 4301 case NVPTXISD::Suld3DV4I8Trap: 4302 Opc = NVPTX::SULD_3D_V4I8_TRAP; 4303 Ops.push_back(TexHandle); 4304 Ops.push_back(N->getOperand(2)); 4305 Ops.push_back(N->getOperand(3)); 4306 Ops.push_back(N->getOperand(4)); 4307 Ops.push_back(Chain); 4308 break; 4309 case NVPTXISD::Suld3DV4I16Trap: 4310 Opc = NVPTX::SULD_3D_V4I16_TRAP; 4311 Ops.push_back(TexHandle); 4312 Ops.push_back(N->getOperand(2)); 4313 Ops.push_back(N->getOperand(3)); 4314 Ops.push_back(N->getOperand(4)); 4315 Ops.push_back(Chain); 4316 break; 4317 case NVPTXISD::Suld3DV4I32Trap: 4318 Opc = NVPTX::SULD_3D_V4I32_TRAP; 4319 Ops.push_back(TexHandle); 4320 Ops.push_back(N->getOperand(2)); 4321 Ops.push_back(N->getOperand(3)); 4322 Ops.push_back(N->getOperand(4)); 4323 Ops.push_back(Chain); 4324 break; 4325 case NVPTXISD::Suld1DI8Zero: 4326 Opc = NVPTX::SULD_1D_I8_ZERO; 4327 Ops.push_back(TexHandle); 4328 Ops.push_back(N->getOperand(2)); 4329 Ops.push_back(Chain); 4330 break; 4331 case NVPTXISD::Suld1DI16Zero: 4332 Opc = NVPTX::SULD_1D_I16_ZERO; 4333 Ops.push_back(TexHandle); 4334 Ops.push_back(N->getOperand(2)); 4335 Ops.push_back(Chain); 4336 break; 4337 case NVPTXISD::Suld1DI32Zero: 4338 Opc = NVPTX::SULD_1D_I32_ZERO; 4339 Ops.push_back(TexHandle); 4340 Ops.push_back(N->getOperand(2)); 4341 Ops.push_back(Chain); 4342 break; 4343 case NVPTXISD::Suld1DI64Zero: 4344 Opc = NVPTX::SULD_1D_I64_ZERO; 4345 Ops.push_back(TexHandle); 4346 Ops.push_back(N->getOperand(2)); 4347 Ops.push_back(Chain); 4348 break; 4349 case NVPTXISD::Suld1DV2I8Zero: 4350 Opc = NVPTX::SULD_1D_V2I8_ZERO; 4351 Ops.push_back(TexHandle); 4352 Ops.push_back(N->getOperand(2)); 4353 Ops.push_back(Chain); 4354 break; 4355 case NVPTXISD::Suld1DV2I16Zero: 4356 Opc = NVPTX::SULD_1D_V2I16_ZERO; 4357 Ops.push_back(TexHandle); 4358 Ops.push_back(N->getOperand(2)); 4359 Ops.push_back(Chain); 4360 break; 4361 case NVPTXISD::Suld1DV2I32Zero: 4362 Opc = NVPTX::SULD_1D_V2I32_ZERO; 4363 Ops.push_back(TexHandle); 4364 Ops.push_back(N->getOperand(2)); 4365 Ops.push_back(Chain); 4366 break; 4367 case NVPTXISD::Suld1DV2I64Zero: 4368 Opc = NVPTX::SULD_1D_V2I64_ZERO; 4369 Ops.push_back(TexHandle); 4370 Ops.push_back(N->getOperand(2)); 4371 Ops.push_back(Chain); 4372 break; 4373 case NVPTXISD::Suld1DV4I8Zero: 4374 Opc = NVPTX::SULD_1D_V4I8_ZERO; 4375 Ops.push_back(TexHandle); 4376 Ops.push_back(N->getOperand(2)); 4377 Ops.push_back(Chain); 4378 break; 4379 case NVPTXISD::Suld1DV4I16Zero: 4380 Opc = NVPTX::SULD_1D_V4I16_ZERO; 4381 Ops.push_back(TexHandle); 4382 Ops.push_back(N->getOperand(2)); 4383 Ops.push_back(Chain); 4384 break; 4385 case NVPTXISD::Suld1DV4I32Zero: 4386 Opc = NVPTX::SULD_1D_V4I32_ZERO; 4387 Ops.push_back(TexHandle); 4388 Ops.push_back(N->getOperand(2)); 4389 Ops.push_back(Chain); 4390 break; 4391 case NVPTXISD::Suld1DArrayI8Zero: 4392 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO; 4393 Ops.push_back(TexHandle); 4394 Ops.push_back(N->getOperand(2)); 4395 Ops.push_back(N->getOperand(3)); 4396 Ops.push_back(Chain); 4397 break; 4398 case NVPTXISD::Suld1DArrayI16Zero: 4399 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO; 4400 Ops.push_back(TexHandle); 4401 Ops.push_back(N->getOperand(2)); 4402 Ops.push_back(N->getOperand(3)); 4403 Ops.push_back(Chain); 4404 break; 4405 case NVPTXISD::Suld1DArrayI32Zero: 4406 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO; 4407 Ops.push_back(TexHandle); 4408 Ops.push_back(N->getOperand(2)); 4409 Ops.push_back(N->getOperand(3)); 4410 Ops.push_back(Chain); 4411 break; 4412 case NVPTXISD::Suld1DArrayI64Zero: 4413 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO; 4414 Ops.push_back(TexHandle); 4415 Ops.push_back(N->getOperand(2)); 4416 Ops.push_back(N->getOperand(3)); 4417 Ops.push_back(Chain); 4418 break; 4419 case NVPTXISD::Suld1DArrayV2I8Zero: 4420 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO; 4421 Ops.push_back(TexHandle); 4422 Ops.push_back(N->getOperand(2)); 4423 Ops.push_back(N->getOperand(3)); 4424 Ops.push_back(Chain); 4425 break; 4426 case NVPTXISD::Suld1DArrayV2I16Zero: 4427 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO; 4428 Ops.push_back(TexHandle); 4429 Ops.push_back(N->getOperand(2)); 4430 Ops.push_back(N->getOperand(3)); 4431 Ops.push_back(Chain); 4432 break; 4433 case NVPTXISD::Suld1DArrayV2I32Zero: 4434 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO; 4435 Ops.push_back(TexHandle); 4436 Ops.push_back(N->getOperand(2)); 4437 Ops.push_back(N->getOperand(3)); 4438 Ops.push_back(Chain); 4439 break; 4440 case NVPTXISD::Suld1DArrayV2I64Zero: 4441 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO; 4442 Ops.push_back(TexHandle); 4443 Ops.push_back(N->getOperand(2)); 4444 Ops.push_back(N->getOperand(3)); 4445 Ops.push_back(Chain); 4446 break; 4447 case NVPTXISD::Suld1DArrayV4I8Zero: 4448 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO; 4449 Ops.push_back(TexHandle); 4450 Ops.push_back(N->getOperand(2)); 4451 Ops.push_back(N->getOperand(3)); 4452 Ops.push_back(Chain); 4453 break; 4454 case NVPTXISD::Suld1DArrayV4I16Zero: 4455 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO; 4456 Ops.push_back(TexHandle); 4457 Ops.push_back(N->getOperand(2)); 4458 Ops.push_back(N->getOperand(3)); 4459 Ops.push_back(Chain); 4460 break; 4461 case NVPTXISD::Suld1DArrayV4I32Zero: 4462 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO; 4463 Ops.push_back(TexHandle); 4464 Ops.push_back(N->getOperand(2)); 4465 Ops.push_back(N->getOperand(3)); 4466 Ops.push_back(Chain); 4467 break; 4468 case NVPTXISD::Suld2DI8Zero: 4469 Opc = NVPTX::SULD_2D_I8_ZERO; 4470 Ops.push_back(TexHandle); 4471 Ops.push_back(N->getOperand(2)); 4472 Ops.push_back(N->getOperand(3)); 4473 Ops.push_back(Chain); 4474 break; 4475 case NVPTXISD::Suld2DI16Zero: 4476 Opc = NVPTX::SULD_2D_I16_ZERO; 4477 Ops.push_back(TexHandle); 4478 Ops.push_back(N->getOperand(2)); 4479 Ops.push_back(N->getOperand(3)); 4480 Ops.push_back(Chain); 4481 break; 4482 case NVPTXISD::Suld2DI32Zero: 4483 Opc = NVPTX::SULD_2D_I32_ZERO; 4484 Ops.push_back(TexHandle); 4485 Ops.push_back(N->getOperand(2)); 4486 Ops.push_back(N->getOperand(3)); 4487 Ops.push_back(Chain); 4488 break; 4489 case NVPTXISD::Suld2DI64Zero: 4490 Opc = NVPTX::SULD_2D_I64_ZERO; 4491 Ops.push_back(TexHandle); 4492 Ops.push_back(N->getOperand(2)); 4493 Ops.push_back(N->getOperand(3)); 4494 Ops.push_back(Chain); 4495 break; 4496 case NVPTXISD::Suld2DV2I8Zero: 4497 Opc = NVPTX::SULD_2D_V2I8_ZERO; 4498 Ops.push_back(TexHandle); 4499 Ops.push_back(N->getOperand(2)); 4500 Ops.push_back(N->getOperand(3)); 4501 Ops.push_back(Chain); 4502 break; 4503 case NVPTXISD::Suld2DV2I16Zero: 4504 Opc = NVPTX::SULD_2D_V2I16_ZERO; 4505 Ops.push_back(TexHandle); 4506 Ops.push_back(N->getOperand(2)); 4507 Ops.push_back(N->getOperand(3)); 4508 Ops.push_back(Chain); 4509 break; 4510 case NVPTXISD::Suld2DV2I32Zero: 4511 Opc = NVPTX::SULD_2D_V2I32_ZERO; 4512 Ops.push_back(TexHandle); 4513 Ops.push_back(N->getOperand(2)); 4514 Ops.push_back(N->getOperand(3)); 4515 Ops.push_back(Chain); 4516 break; 4517 case NVPTXISD::Suld2DV2I64Zero: 4518 Opc = NVPTX::SULD_2D_V2I64_ZERO; 4519 Ops.push_back(TexHandle); 4520 Ops.push_back(N->getOperand(2)); 4521 Ops.push_back(N->getOperand(3)); 4522 Ops.push_back(Chain); 4523 break; 4524 case NVPTXISD::Suld2DV4I8Zero: 4525 Opc = NVPTX::SULD_2D_V4I8_ZERO; 4526 Ops.push_back(TexHandle); 4527 Ops.push_back(N->getOperand(2)); 4528 Ops.push_back(N->getOperand(3)); 4529 Ops.push_back(Chain); 4530 break; 4531 case NVPTXISD::Suld2DV4I16Zero: 4532 Opc = NVPTX::SULD_2D_V4I16_ZERO; 4533 Ops.push_back(TexHandle); 4534 Ops.push_back(N->getOperand(2)); 4535 Ops.push_back(N->getOperand(3)); 4536 Ops.push_back(Chain); 4537 break; 4538 case NVPTXISD::Suld2DV4I32Zero: 4539 Opc = NVPTX::SULD_2D_V4I32_ZERO; 4540 Ops.push_back(TexHandle); 4541 Ops.push_back(N->getOperand(2)); 4542 Ops.push_back(N->getOperand(3)); 4543 Ops.push_back(Chain); 4544 break; 4545 case NVPTXISD::Suld2DArrayI8Zero: 4546 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO; 4547 Ops.push_back(TexHandle); 4548 Ops.push_back(N->getOperand(2)); 4549 Ops.push_back(N->getOperand(3)); 4550 Ops.push_back(N->getOperand(4)); 4551 Ops.push_back(Chain); 4552 break; 4553 case NVPTXISD::Suld2DArrayI16Zero: 4554 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO; 4555 Ops.push_back(TexHandle); 4556 Ops.push_back(N->getOperand(2)); 4557 Ops.push_back(N->getOperand(3)); 4558 Ops.push_back(N->getOperand(4)); 4559 Ops.push_back(Chain); 4560 break; 4561 case NVPTXISD::Suld2DArrayI32Zero: 4562 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO; 4563 Ops.push_back(TexHandle); 4564 Ops.push_back(N->getOperand(2)); 4565 Ops.push_back(N->getOperand(3)); 4566 Ops.push_back(N->getOperand(4)); 4567 Ops.push_back(Chain); 4568 break; 4569 case NVPTXISD::Suld2DArrayI64Zero: 4570 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO; 4571 Ops.push_back(TexHandle); 4572 Ops.push_back(N->getOperand(2)); 4573 Ops.push_back(N->getOperand(3)); 4574 Ops.push_back(N->getOperand(4)); 4575 Ops.push_back(Chain); 4576 break; 4577 case NVPTXISD::Suld2DArrayV2I8Zero: 4578 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO; 4579 Ops.push_back(TexHandle); 4580 Ops.push_back(N->getOperand(2)); 4581 Ops.push_back(N->getOperand(3)); 4582 Ops.push_back(N->getOperand(4)); 4583 Ops.push_back(Chain); 4584 break; 4585 case NVPTXISD::Suld2DArrayV2I16Zero: 4586 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO; 4587 Ops.push_back(TexHandle); 4588 Ops.push_back(N->getOperand(2)); 4589 Ops.push_back(N->getOperand(3)); 4590 Ops.push_back(N->getOperand(4)); 4591 Ops.push_back(Chain); 4592 break; 4593 case NVPTXISD::Suld2DArrayV2I32Zero: 4594 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO; 4595 Ops.push_back(TexHandle); 4596 Ops.push_back(N->getOperand(2)); 4597 Ops.push_back(N->getOperand(3)); 4598 Ops.push_back(N->getOperand(4)); 4599 Ops.push_back(Chain); 4600 break; 4601 case NVPTXISD::Suld2DArrayV2I64Zero: 4602 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO; 4603 Ops.push_back(TexHandle); 4604 Ops.push_back(N->getOperand(2)); 4605 Ops.push_back(N->getOperand(3)); 4606 Ops.push_back(N->getOperand(4)); 4607 Ops.push_back(Chain); 4608 break; 4609 case NVPTXISD::Suld2DArrayV4I8Zero: 4610 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO; 4611 Ops.push_back(TexHandle); 4612 Ops.push_back(N->getOperand(2)); 4613 Ops.push_back(N->getOperand(3)); 4614 Ops.push_back(N->getOperand(4)); 4615 Ops.push_back(Chain); 4616 break; 4617 case NVPTXISD::Suld2DArrayV4I16Zero: 4618 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO; 4619 Ops.push_back(TexHandle); 4620 Ops.push_back(N->getOperand(2)); 4621 Ops.push_back(N->getOperand(3)); 4622 Ops.push_back(N->getOperand(4)); 4623 Ops.push_back(Chain); 4624 break; 4625 case NVPTXISD::Suld2DArrayV4I32Zero: 4626 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO; 4627 Ops.push_back(TexHandle); 4628 Ops.push_back(N->getOperand(2)); 4629 Ops.push_back(N->getOperand(3)); 4630 Ops.push_back(N->getOperand(4)); 4631 Ops.push_back(Chain); 4632 break; 4633 case NVPTXISD::Suld3DI8Zero: 4634 Opc = NVPTX::SULD_3D_I8_ZERO; 4635 Ops.push_back(TexHandle); 4636 Ops.push_back(N->getOperand(2)); 4637 Ops.push_back(N->getOperand(3)); 4638 Ops.push_back(N->getOperand(4)); 4639 Ops.push_back(Chain); 4640 break; 4641 case NVPTXISD::Suld3DI16Zero: 4642 Opc = NVPTX::SULD_3D_I16_ZERO; 4643 Ops.push_back(TexHandle); 4644 Ops.push_back(N->getOperand(2)); 4645 Ops.push_back(N->getOperand(3)); 4646 Ops.push_back(N->getOperand(4)); 4647 Ops.push_back(Chain); 4648 break; 4649 case NVPTXISD::Suld3DI32Zero: 4650 Opc = NVPTX::SULD_3D_I32_ZERO; 4651 Ops.push_back(TexHandle); 4652 Ops.push_back(N->getOperand(2)); 4653 Ops.push_back(N->getOperand(3)); 4654 Ops.push_back(N->getOperand(4)); 4655 Ops.push_back(Chain); 4656 break; 4657 case NVPTXISD::Suld3DI64Zero: 4658 Opc = NVPTX::SULD_3D_I64_ZERO; 4659 Ops.push_back(TexHandle); 4660 Ops.push_back(N->getOperand(2)); 4661 Ops.push_back(N->getOperand(3)); 4662 Ops.push_back(N->getOperand(4)); 4663 Ops.push_back(Chain); 4664 break; 4665 case NVPTXISD::Suld3DV2I8Zero: 4666 Opc = NVPTX::SULD_3D_V2I8_ZERO; 4667 Ops.push_back(TexHandle); 4668 Ops.push_back(N->getOperand(2)); 4669 Ops.push_back(N->getOperand(3)); 4670 Ops.push_back(N->getOperand(4)); 4671 Ops.push_back(Chain); 4672 break; 4673 case NVPTXISD::Suld3DV2I16Zero: 4674 Opc = NVPTX::SULD_3D_V2I16_ZERO; 4675 Ops.push_back(TexHandle); 4676 Ops.push_back(N->getOperand(2)); 4677 Ops.push_back(N->getOperand(3)); 4678 Ops.push_back(N->getOperand(4)); 4679 Ops.push_back(Chain); 4680 break; 4681 case NVPTXISD::Suld3DV2I32Zero: 4682 Opc = NVPTX::SULD_3D_V2I32_ZERO; 4683 Ops.push_back(TexHandle); 4684 Ops.push_back(N->getOperand(2)); 4685 Ops.push_back(N->getOperand(3)); 4686 Ops.push_back(N->getOperand(4)); 4687 Ops.push_back(Chain); 4688 break; 4689 case NVPTXISD::Suld3DV2I64Zero: 4690 Opc = NVPTX::SULD_3D_V2I64_ZERO; 4691 Ops.push_back(TexHandle); 4692 Ops.push_back(N->getOperand(2)); 4693 Ops.push_back(N->getOperand(3)); 4694 Ops.push_back(N->getOperand(4)); 4695 Ops.push_back(Chain); 4696 break; 4697 case NVPTXISD::Suld3DV4I8Zero: 4698 Opc = NVPTX::SULD_3D_V4I8_ZERO; 4699 Ops.push_back(TexHandle); 4700 Ops.push_back(N->getOperand(2)); 4701 Ops.push_back(N->getOperand(3)); 4702 Ops.push_back(N->getOperand(4)); 4703 Ops.push_back(Chain); 4704 break; 4705 case NVPTXISD::Suld3DV4I16Zero: 4706 Opc = NVPTX::SULD_3D_V4I16_ZERO; 4707 Ops.push_back(TexHandle); 4708 Ops.push_back(N->getOperand(2)); 4709 Ops.push_back(N->getOperand(3)); 4710 Ops.push_back(N->getOperand(4)); 4711 Ops.push_back(Chain); 4712 break; 4713 case NVPTXISD::Suld3DV4I32Zero: 4714 Opc = NVPTX::SULD_3D_V4I32_ZERO; 4715 Ops.push_back(TexHandle); 4716 Ops.push_back(N->getOperand(2)); 4717 Ops.push_back(N->getOperand(3)); 4718 Ops.push_back(N->getOperand(4)); 4719 Ops.push_back(Chain); 4720 break; 4721 } 4722 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 4723 return Ret; 4724 } 4725 4726 4727 /// SelectBFE - Look for instruction sequences that can be made more efficient 4728 /// by using the 'bfe' (bit-field extract) PTX instruction 4729 SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) { 4730 SDValue LHS = N->getOperand(0); 4731 SDValue RHS = N->getOperand(1); 4732 SDValue Len; 4733 SDValue Start; 4734 SDValue Val; 4735 bool IsSigned = false; 4736 4737 if (N->getOpcode() == ISD::AND) { 4738 // Canonicalize the operands 4739 // We want 'and %val, %mask' 4740 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) { 4741 std::swap(LHS, RHS); 4742 } 4743 4744 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS); 4745 if (!Mask) { 4746 // We need a constant mask on the RHS of the AND 4747 return NULL; 4748 } 4749 4750 // Extract the mask bits 4751 uint64_t MaskVal = Mask->getZExtValue(); 4752 if (!isMask_64(MaskVal)) { 4753 // We *could* handle shifted masks here, but doing so would require an 4754 // 'and' operation to fix up the low-order bits so we would trade 4755 // shr+and for bfe+and, which has the same throughput 4756 return NULL; 4757 } 4758 4759 // How many bits are in our mask? 4760 uint64_t NumBits = countTrailingOnes(MaskVal); 4761 Len = CurDAG->getTargetConstant(NumBits, MVT::i32); 4762 4763 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) { 4764 // We have a 'srl/and' pair, extract the effective start bit and length 4765 Val = LHS.getNode()->getOperand(0); 4766 Start = LHS.getNode()->getOperand(1); 4767 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start); 4768 if (StartConst) { 4769 uint64_t StartVal = StartConst->getZExtValue(); 4770 // How many "good" bits do we have left? "good" is defined here as bits 4771 // that exist in the original value, not shifted in. 4772 uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal; 4773 if (NumBits > GoodBits) { 4774 // Do not handle the case where bits have been shifted in. In theory 4775 // we could handle this, but the cost is likely higher than just 4776 // emitting the srl/and pair. 4777 return NULL; 4778 } 4779 Start = CurDAG->getTargetConstant(StartVal, MVT::i32); 4780 } else { 4781 // Do not handle the case where the shift amount (can be zero if no srl 4782 // was found) is not constant. We could handle this case, but it would 4783 // require run-time logic that would be more expensive than just 4784 // emitting the srl/and pair. 4785 return NULL; 4786 } 4787 } else { 4788 // Do not handle the case where the LHS of the and is not a shift. While 4789 // it would be trivial to handle this case, it would just transform 4790 // 'and' -> 'bfe', but 'and' has higher-throughput. 4791 return NULL; 4792 } 4793 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) { 4794 if (LHS->getOpcode() == ISD::AND) { 4795 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS); 4796 if (!ShiftCnst) { 4797 // Shift amount must be constant 4798 return NULL; 4799 } 4800 4801 uint64_t ShiftAmt = ShiftCnst->getZExtValue(); 4802 4803 SDValue AndLHS = LHS->getOperand(0); 4804 SDValue AndRHS = LHS->getOperand(1); 4805 4806 // Canonicalize the AND to have the mask on the RHS 4807 if (isa<ConstantSDNode>(AndLHS)) { 4808 std::swap(AndLHS, AndRHS); 4809 } 4810 4811 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS); 4812 if (!MaskCnst) { 4813 // Mask must be constant 4814 return NULL; 4815 } 4816 4817 uint64_t MaskVal = MaskCnst->getZExtValue(); 4818 uint64_t NumZeros; 4819 uint64_t NumBits; 4820 if (isMask_64(MaskVal)) { 4821 NumZeros = 0; 4822 // The number of bits in the result bitfield will be the number of 4823 // trailing ones (the AND) minus the number of bits we shift off 4824 NumBits = countTrailingOnes(MaskVal) - ShiftAmt; 4825 } else if (isShiftedMask_64(MaskVal)) { 4826 NumZeros = countTrailingZeros(MaskVal); 4827 unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros); 4828 // The number of bits in the result bitfield will be the number of 4829 // trailing zeros plus the number of set bits in the mask minus the 4830 // number of bits we shift off 4831 NumBits = NumZeros + NumOnes - ShiftAmt; 4832 } else { 4833 // This is not a mask we can handle 4834 return NULL; 4835 } 4836 4837 if (ShiftAmt < NumZeros) { 4838 // Handling this case would require extra logic that would make this 4839 // transformation non-profitable 4840 return NULL; 4841 } 4842 4843 Val = AndLHS; 4844 Start = CurDAG->getTargetConstant(ShiftAmt, MVT::i32); 4845 Len = CurDAG->getTargetConstant(NumBits, MVT::i32); 4846 } else if (LHS->getOpcode() == ISD::SHL) { 4847 // Here, we have a pattern like: 4848 // 4849 // (sra (shl val, NN), MM) 4850 // or 4851 // (srl (shl val, NN), MM) 4852 // 4853 // If MM >= NN, we can efficiently optimize this with bfe 4854 Val = LHS->getOperand(0); 4855 4856 SDValue ShlRHS = LHS->getOperand(1); 4857 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS); 4858 if (!ShlCnst) { 4859 // Shift amount must be constant 4860 return NULL; 4861 } 4862 uint64_t InnerShiftAmt = ShlCnst->getZExtValue(); 4863 4864 SDValue ShrRHS = RHS; 4865 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS); 4866 if (!ShrCnst) { 4867 // Shift amount must be constant 4868 return NULL; 4869 } 4870 uint64_t OuterShiftAmt = ShrCnst->getZExtValue(); 4871 4872 // To avoid extra codegen and be profitable, we need Outer >= Inner 4873 if (OuterShiftAmt < InnerShiftAmt) { 4874 return NULL; 4875 } 4876 4877 // If the outer shift is more than the type size, we have no bitfield to 4878 // extract (since we also check that the inner shift is <= the outer shift 4879 // then this also implies that the inner shift is < the type size) 4880 if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) { 4881 return NULL; 4882 } 4883 4884 Start = 4885 CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, MVT::i32); 4886 Len = 4887 CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() - 4888 OuterShiftAmt, MVT::i32); 4889 4890 if (N->getOpcode() == ISD::SRA) { 4891 // If we have a arithmetic right shift, we need to use the signed bfe 4892 // variant 4893 IsSigned = true; 4894 } 4895 } else { 4896 // No can do... 4897 return NULL; 4898 } 4899 } else { 4900 // No can do... 4901 return NULL; 4902 } 4903 4904 4905 unsigned Opc; 4906 // For the BFE operations we form here from "and" and "srl", always use the 4907 // unsigned variants. 4908 if (Val.getValueType() == MVT::i32) { 4909 if (IsSigned) { 4910 Opc = NVPTX::BFE_S32rii; 4911 } else { 4912 Opc = NVPTX::BFE_U32rii; 4913 } 4914 } else if (Val.getValueType() == MVT::i64) { 4915 if (IsSigned) { 4916 Opc = NVPTX::BFE_S64rii; 4917 } else { 4918 Opc = NVPTX::BFE_U64rii; 4919 } 4920 } else { 4921 // We cannot handle this type 4922 return NULL; 4923 } 4924 4925 SDValue Ops[] = { 4926 Val, Start, Len 4927 }; 4928 4929 SDNode *Ret = 4930 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 4931 4932 return Ret; 4933 } 4934 4935 // SelectDirectAddr - Match a direct address for DAG. 4936 // A direct address could be a globaladdress or externalsymbol. 4937 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { 4938 // Return true if TGA or ES. 4939 if (N.getOpcode() == ISD::TargetGlobalAddress || 4940 N.getOpcode() == ISD::TargetExternalSymbol) { 4941 Address = N; 4942 return true; 4943 } 4944 if (N.getOpcode() == NVPTXISD::Wrapper) { 4945 Address = N.getOperand(0); 4946 return true; 4947 } 4948 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) { 4949 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue(); 4950 if (IID == Intrinsic::nvvm_ptr_gen_to_param) 4951 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam) 4952 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address)); 4953 } 4954 return false; 4955 } 4956 4957 // symbol+offset 4958 bool NVPTXDAGToDAGISel::SelectADDRsi_imp( 4959 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { 4960 if (Addr.getOpcode() == ISD::ADD) { 4961 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { 4962 SDValue base = Addr.getOperand(0); 4963 if (SelectDirectAddr(base, Base)) { 4964 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt); 4965 return true; 4966 } 4967 } 4968 } 4969 return false; 4970 } 4971 4972 // symbol+offset 4973 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr, 4974 SDValue &Base, SDValue &Offset) { 4975 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32); 4976 } 4977 4978 // symbol+offset 4979 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr, 4980 SDValue &Base, SDValue &Offset) { 4981 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64); 4982 } 4983 4984 // register+offset 4985 bool NVPTXDAGToDAGISel::SelectADDRri_imp( 4986 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { 4987 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 4988 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); 4989 Offset = CurDAG->getTargetConstant(0, mvt); 4990 return true; 4991 } 4992 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 4993 Addr.getOpcode() == ISD::TargetGlobalAddress) 4994 return false; // direct calls. 4995 4996 if (Addr.getOpcode() == ISD::ADD) { 4997 if (SelectDirectAddr(Addr.getOperand(0), Addr)) { 4998 return false; 4999 } 5000 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { 5001 if (FrameIndexSDNode *FIN = 5002 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) 5003 // Constant offset from frame ref. 5004 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); 5005 else 5006 Base = Addr.getOperand(0); 5007 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt); 5008 return true; 5009 } 5010 } 5011 return false; 5012 } 5013 5014 // register+offset 5015 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr, 5016 SDValue &Base, SDValue &Offset) { 5017 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32); 5018 } 5019 5020 // register+offset 5021 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr, 5022 SDValue &Base, SDValue &Offset) { 5023 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64); 5024 } 5025 5026 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, 5027 unsigned int spN) const { 5028 const Value *Src = nullptr; 5029 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) { 5030 if (spN == 0 && mN->getMemOperand()->getPseudoValue()) 5031 return true; 5032 Src = mN->getMemOperand()->getValue(); 5033 } 5034 if (!Src) 5035 return false; 5036 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) 5037 return (PT->getAddressSpace() == spN); 5038 return false; 5039 } 5040 5041 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 5042 /// inline asm expressions. 5043 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand( 5044 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 5045 SDValue Op0, Op1; 5046 switch (ConstraintID) { 5047 default: 5048 return true; 5049 case InlineAsm::Constraint_m: // memory 5050 if (SelectDirectAddr(Op, Op0)) { 5051 OutOps.push_back(Op0); 5052 OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32)); 5053 return false; 5054 } 5055 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) { 5056 OutOps.push_back(Op0); 5057 OutOps.push_back(Op1); 5058 return false; 5059 } 5060 break; 5061 } 5062 return true; 5063 } 5064