1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines an instruction selector for the NVPTX target. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "NVPTXISelDAGToDAG.h" 15 #include "NVPTXUtilities.h" 16 #include "llvm/Analysis/ValueTracking.h" 17 #include "llvm/IR/GlobalValue.h" 18 #include "llvm/IR/Instructions.h" 19 #include "llvm/Support/CommandLine.h" 20 #include "llvm/Support/Debug.h" 21 #include "llvm/Support/ErrorHandling.h" 22 #include "llvm/Support/raw_ostream.h" 23 #include "llvm/Target/TargetIntrinsicInfo.h" 24 25 using namespace llvm; 26 27 #define DEBUG_TYPE "nvptx-isel" 28 29 static cl::opt<int> UsePrecDivF32( 30 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden, 31 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" 32 " IEEE Compliant F32 div.rnd if available."), 33 cl::init(2)); 34 35 static cl::opt<bool> 36 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden, 37 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), 38 cl::init(true)); 39 40 static cl::opt<bool> 41 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden, 42 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."), 43 cl::init(false)); 44 45 46 /// createNVPTXISelDag - This pass converts a legalized DAG into a 47 /// NVPTX-specific DAG, ready for instruction scheduling. 48 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM, 49 llvm::CodeGenOpt::Level OptLevel) { 50 return new NVPTXDAGToDAGISel(TM, OptLevel); 51 } 52 53 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, 54 CodeGenOpt::Level OptLevel) 55 : SelectionDAGISel(tm, OptLevel), TM(tm) { 56 doMulWide = (OptLevel > 0); 57 } 58 59 bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 60 Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget()); 61 return SelectionDAGISel::runOnMachineFunction(MF); 62 } 63 64 int NVPTXDAGToDAGISel::getDivF32Level() const { 65 if (UsePrecDivF32.getNumOccurrences() > 0) { 66 // If nvptx-prec-div32=N is used on the command-line, always honor it 67 return UsePrecDivF32; 68 } else { 69 // Otherwise, use div.approx if fast math is enabled 70 if (TM.Options.UnsafeFPMath) 71 return 0; 72 else 73 return 2; 74 } 75 } 76 77 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const { 78 if (UsePrecSqrtF32.getNumOccurrences() > 0) { 79 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it 80 return UsePrecSqrtF32; 81 } else { 82 // Otherwise, use sqrt.approx if fast math is enabled 83 return !TM.Options.UnsafeFPMath; 84 } 85 } 86 87 bool NVPTXDAGToDAGISel::useF32FTZ() const { 88 if (FtzEnabled.getNumOccurrences() > 0) { 89 // If nvptx-f32ftz is used on the command-line, always honor it 90 return FtzEnabled; 91 } else { 92 const Function *F = MF->getFunction(); 93 // Otherwise, check for an nvptx-f32ftz attribute on the function 94 if (F->hasFnAttribute("nvptx-f32ftz")) 95 return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true"; 96 else 97 return false; 98 } 99 } 100 101 bool NVPTXDAGToDAGISel::allowFMA() const { 102 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering(); 103 return TL->allowFMA(*MF, OptLevel); 104 } 105 106 /// Select - Select instructions not customized! Used for 107 /// expanded, promoted and normal instructions. 108 void NVPTXDAGToDAGISel::Select(SDNode *N) { 109 110 if (N->isMachineOpcode()) { 111 N->setNodeId(-1); 112 return; // Already selected. 113 } 114 115 switch (N->getOpcode()) { 116 case ISD::LOAD: 117 if (tryLoad(N)) 118 return; 119 break; 120 case ISD::STORE: 121 if (tryStore(N)) 122 return; 123 break; 124 case NVPTXISD::LoadV2: 125 case NVPTXISD::LoadV4: 126 if (tryLoadVector(N)) 127 return; 128 break; 129 case NVPTXISD::LDGV2: 130 case NVPTXISD::LDGV4: 131 case NVPTXISD::LDUV2: 132 case NVPTXISD::LDUV4: 133 if (tryLDGLDU(N)) 134 return; 135 break; 136 case NVPTXISD::StoreV2: 137 case NVPTXISD::StoreV4: 138 if (tryStoreVector(N)) 139 return; 140 break; 141 case NVPTXISD::LoadParam: 142 case NVPTXISD::LoadParamV2: 143 case NVPTXISD::LoadParamV4: 144 if (tryLoadParam(N)) 145 return; 146 break; 147 case NVPTXISD::StoreRetval: 148 case NVPTXISD::StoreRetvalV2: 149 case NVPTXISD::StoreRetvalV4: 150 if (tryStoreRetval(N)) 151 return; 152 break; 153 case NVPTXISD::StoreParam: 154 case NVPTXISD::StoreParamV2: 155 case NVPTXISD::StoreParamV4: 156 case NVPTXISD::StoreParamS32: 157 case NVPTXISD::StoreParamU32: 158 if (tryStoreParam(N)) 159 return; 160 break; 161 case ISD::INTRINSIC_WO_CHAIN: 162 if (tryIntrinsicNoChain(N)) 163 return; 164 break; 165 case ISD::INTRINSIC_W_CHAIN: 166 if (tryIntrinsicChain(N)) 167 return; 168 break; 169 case NVPTXISD::Tex1DFloatS32: 170 case NVPTXISD::Tex1DFloatFloat: 171 case NVPTXISD::Tex1DFloatFloatLevel: 172 case NVPTXISD::Tex1DFloatFloatGrad: 173 case NVPTXISD::Tex1DS32S32: 174 case NVPTXISD::Tex1DS32Float: 175 case NVPTXISD::Tex1DS32FloatLevel: 176 case NVPTXISD::Tex1DS32FloatGrad: 177 case NVPTXISD::Tex1DU32S32: 178 case NVPTXISD::Tex1DU32Float: 179 case NVPTXISD::Tex1DU32FloatLevel: 180 case NVPTXISD::Tex1DU32FloatGrad: 181 case NVPTXISD::Tex1DArrayFloatS32: 182 case NVPTXISD::Tex1DArrayFloatFloat: 183 case NVPTXISD::Tex1DArrayFloatFloatLevel: 184 case NVPTXISD::Tex1DArrayFloatFloatGrad: 185 case NVPTXISD::Tex1DArrayS32S32: 186 case NVPTXISD::Tex1DArrayS32Float: 187 case NVPTXISD::Tex1DArrayS32FloatLevel: 188 case NVPTXISD::Tex1DArrayS32FloatGrad: 189 case NVPTXISD::Tex1DArrayU32S32: 190 case NVPTXISD::Tex1DArrayU32Float: 191 case NVPTXISD::Tex1DArrayU32FloatLevel: 192 case NVPTXISD::Tex1DArrayU32FloatGrad: 193 case NVPTXISD::Tex2DFloatS32: 194 case NVPTXISD::Tex2DFloatFloat: 195 case NVPTXISD::Tex2DFloatFloatLevel: 196 case NVPTXISD::Tex2DFloatFloatGrad: 197 case NVPTXISD::Tex2DS32S32: 198 case NVPTXISD::Tex2DS32Float: 199 case NVPTXISD::Tex2DS32FloatLevel: 200 case NVPTXISD::Tex2DS32FloatGrad: 201 case NVPTXISD::Tex2DU32S32: 202 case NVPTXISD::Tex2DU32Float: 203 case NVPTXISD::Tex2DU32FloatLevel: 204 case NVPTXISD::Tex2DU32FloatGrad: 205 case NVPTXISD::Tex2DArrayFloatS32: 206 case NVPTXISD::Tex2DArrayFloatFloat: 207 case NVPTXISD::Tex2DArrayFloatFloatLevel: 208 case NVPTXISD::Tex2DArrayFloatFloatGrad: 209 case NVPTXISD::Tex2DArrayS32S32: 210 case NVPTXISD::Tex2DArrayS32Float: 211 case NVPTXISD::Tex2DArrayS32FloatLevel: 212 case NVPTXISD::Tex2DArrayS32FloatGrad: 213 case NVPTXISD::Tex2DArrayU32S32: 214 case NVPTXISD::Tex2DArrayU32Float: 215 case NVPTXISD::Tex2DArrayU32FloatLevel: 216 case NVPTXISD::Tex2DArrayU32FloatGrad: 217 case NVPTXISD::Tex3DFloatS32: 218 case NVPTXISD::Tex3DFloatFloat: 219 case NVPTXISD::Tex3DFloatFloatLevel: 220 case NVPTXISD::Tex3DFloatFloatGrad: 221 case NVPTXISD::Tex3DS32S32: 222 case NVPTXISD::Tex3DS32Float: 223 case NVPTXISD::Tex3DS32FloatLevel: 224 case NVPTXISD::Tex3DS32FloatGrad: 225 case NVPTXISD::Tex3DU32S32: 226 case NVPTXISD::Tex3DU32Float: 227 case NVPTXISD::Tex3DU32FloatLevel: 228 case NVPTXISD::Tex3DU32FloatGrad: 229 case NVPTXISD::TexCubeFloatFloat: 230 case NVPTXISD::TexCubeFloatFloatLevel: 231 case NVPTXISD::TexCubeS32Float: 232 case NVPTXISD::TexCubeS32FloatLevel: 233 case NVPTXISD::TexCubeU32Float: 234 case NVPTXISD::TexCubeU32FloatLevel: 235 case NVPTXISD::TexCubeArrayFloatFloat: 236 case NVPTXISD::TexCubeArrayFloatFloatLevel: 237 case NVPTXISD::TexCubeArrayS32Float: 238 case NVPTXISD::TexCubeArrayS32FloatLevel: 239 case NVPTXISD::TexCubeArrayU32Float: 240 case NVPTXISD::TexCubeArrayU32FloatLevel: 241 case NVPTXISD::Tld4R2DFloatFloat: 242 case NVPTXISD::Tld4G2DFloatFloat: 243 case NVPTXISD::Tld4B2DFloatFloat: 244 case NVPTXISD::Tld4A2DFloatFloat: 245 case NVPTXISD::Tld4R2DS64Float: 246 case NVPTXISD::Tld4G2DS64Float: 247 case NVPTXISD::Tld4B2DS64Float: 248 case NVPTXISD::Tld4A2DS64Float: 249 case NVPTXISD::Tld4R2DU64Float: 250 case NVPTXISD::Tld4G2DU64Float: 251 case NVPTXISD::Tld4B2DU64Float: 252 case NVPTXISD::Tld4A2DU64Float: 253 case NVPTXISD::TexUnified1DFloatS32: 254 case NVPTXISD::TexUnified1DFloatFloat: 255 case NVPTXISD::TexUnified1DFloatFloatLevel: 256 case NVPTXISD::TexUnified1DFloatFloatGrad: 257 case NVPTXISD::TexUnified1DS32S32: 258 case NVPTXISD::TexUnified1DS32Float: 259 case NVPTXISD::TexUnified1DS32FloatLevel: 260 case NVPTXISD::TexUnified1DS32FloatGrad: 261 case NVPTXISD::TexUnified1DU32S32: 262 case NVPTXISD::TexUnified1DU32Float: 263 case NVPTXISD::TexUnified1DU32FloatLevel: 264 case NVPTXISD::TexUnified1DU32FloatGrad: 265 case NVPTXISD::TexUnified1DArrayFloatS32: 266 case NVPTXISD::TexUnified1DArrayFloatFloat: 267 case NVPTXISD::TexUnified1DArrayFloatFloatLevel: 268 case NVPTXISD::TexUnified1DArrayFloatFloatGrad: 269 case NVPTXISD::TexUnified1DArrayS32S32: 270 case NVPTXISD::TexUnified1DArrayS32Float: 271 case NVPTXISD::TexUnified1DArrayS32FloatLevel: 272 case NVPTXISD::TexUnified1DArrayS32FloatGrad: 273 case NVPTXISD::TexUnified1DArrayU32S32: 274 case NVPTXISD::TexUnified1DArrayU32Float: 275 case NVPTXISD::TexUnified1DArrayU32FloatLevel: 276 case NVPTXISD::TexUnified1DArrayU32FloatGrad: 277 case NVPTXISD::TexUnified2DFloatS32: 278 case NVPTXISD::TexUnified2DFloatFloat: 279 case NVPTXISD::TexUnified2DFloatFloatLevel: 280 case NVPTXISD::TexUnified2DFloatFloatGrad: 281 case NVPTXISD::TexUnified2DS32S32: 282 case NVPTXISD::TexUnified2DS32Float: 283 case NVPTXISD::TexUnified2DS32FloatLevel: 284 case NVPTXISD::TexUnified2DS32FloatGrad: 285 case NVPTXISD::TexUnified2DU32S32: 286 case NVPTXISD::TexUnified2DU32Float: 287 case NVPTXISD::TexUnified2DU32FloatLevel: 288 case NVPTXISD::TexUnified2DU32FloatGrad: 289 case NVPTXISD::TexUnified2DArrayFloatS32: 290 case NVPTXISD::TexUnified2DArrayFloatFloat: 291 case NVPTXISD::TexUnified2DArrayFloatFloatLevel: 292 case NVPTXISD::TexUnified2DArrayFloatFloatGrad: 293 case NVPTXISD::TexUnified2DArrayS32S32: 294 case NVPTXISD::TexUnified2DArrayS32Float: 295 case NVPTXISD::TexUnified2DArrayS32FloatLevel: 296 case NVPTXISD::TexUnified2DArrayS32FloatGrad: 297 case NVPTXISD::TexUnified2DArrayU32S32: 298 case NVPTXISD::TexUnified2DArrayU32Float: 299 case NVPTXISD::TexUnified2DArrayU32FloatLevel: 300 case NVPTXISD::TexUnified2DArrayU32FloatGrad: 301 case NVPTXISD::TexUnified3DFloatS32: 302 case NVPTXISD::TexUnified3DFloatFloat: 303 case NVPTXISD::TexUnified3DFloatFloatLevel: 304 case NVPTXISD::TexUnified3DFloatFloatGrad: 305 case NVPTXISD::TexUnified3DS32S32: 306 case NVPTXISD::TexUnified3DS32Float: 307 case NVPTXISD::TexUnified3DS32FloatLevel: 308 case NVPTXISD::TexUnified3DS32FloatGrad: 309 case NVPTXISD::TexUnified3DU32S32: 310 case NVPTXISD::TexUnified3DU32Float: 311 case NVPTXISD::TexUnified3DU32FloatLevel: 312 case NVPTXISD::TexUnified3DU32FloatGrad: 313 case NVPTXISD::TexUnifiedCubeFloatFloat: 314 case NVPTXISD::TexUnifiedCubeFloatFloatLevel: 315 case NVPTXISD::TexUnifiedCubeS32Float: 316 case NVPTXISD::TexUnifiedCubeS32FloatLevel: 317 case NVPTXISD::TexUnifiedCubeU32Float: 318 case NVPTXISD::TexUnifiedCubeU32FloatLevel: 319 case NVPTXISD::TexUnifiedCubeArrayFloatFloat: 320 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: 321 case NVPTXISD::TexUnifiedCubeArrayS32Float: 322 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: 323 case NVPTXISD::TexUnifiedCubeArrayU32Float: 324 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: 325 case NVPTXISD::Tld4UnifiedR2DFloatFloat: 326 case NVPTXISD::Tld4UnifiedG2DFloatFloat: 327 case NVPTXISD::Tld4UnifiedB2DFloatFloat: 328 case NVPTXISD::Tld4UnifiedA2DFloatFloat: 329 case NVPTXISD::Tld4UnifiedR2DS64Float: 330 case NVPTXISD::Tld4UnifiedG2DS64Float: 331 case NVPTXISD::Tld4UnifiedB2DS64Float: 332 case NVPTXISD::Tld4UnifiedA2DS64Float: 333 case NVPTXISD::Tld4UnifiedR2DU64Float: 334 case NVPTXISD::Tld4UnifiedG2DU64Float: 335 case NVPTXISD::Tld4UnifiedB2DU64Float: 336 case NVPTXISD::Tld4UnifiedA2DU64Float: 337 if (tryTextureIntrinsic(N)) 338 return; 339 break; 340 case NVPTXISD::Suld1DI8Clamp: 341 case NVPTXISD::Suld1DI16Clamp: 342 case NVPTXISD::Suld1DI32Clamp: 343 case NVPTXISD::Suld1DI64Clamp: 344 case NVPTXISD::Suld1DV2I8Clamp: 345 case NVPTXISD::Suld1DV2I16Clamp: 346 case NVPTXISD::Suld1DV2I32Clamp: 347 case NVPTXISD::Suld1DV2I64Clamp: 348 case NVPTXISD::Suld1DV4I8Clamp: 349 case NVPTXISD::Suld1DV4I16Clamp: 350 case NVPTXISD::Suld1DV4I32Clamp: 351 case NVPTXISD::Suld1DArrayI8Clamp: 352 case NVPTXISD::Suld1DArrayI16Clamp: 353 case NVPTXISD::Suld1DArrayI32Clamp: 354 case NVPTXISD::Suld1DArrayI64Clamp: 355 case NVPTXISD::Suld1DArrayV2I8Clamp: 356 case NVPTXISD::Suld1DArrayV2I16Clamp: 357 case NVPTXISD::Suld1DArrayV2I32Clamp: 358 case NVPTXISD::Suld1DArrayV2I64Clamp: 359 case NVPTXISD::Suld1DArrayV4I8Clamp: 360 case NVPTXISD::Suld1DArrayV4I16Clamp: 361 case NVPTXISD::Suld1DArrayV4I32Clamp: 362 case NVPTXISD::Suld2DI8Clamp: 363 case NVPTXISD::Suld2DI16Clamp: 364 case NVPTXISD::Suld2DI32Clamp: 365 case NVPTXISD::Suld2DI64Clamp: 366 case NVPTXISD::Suld2DV2I8Clamp: 367 case NVPTXISD::Suld2DV2I16Clamp: 368 case NVPTXISD::Suld2DV2I32Clamp: 369 case NVPTXISD::Suld2DV2I64Clamp: 370 case NVPTXISD::Suld2DV4I8Clamp: 371 case NVPTXISD::Suld2DV4I16Clamp: 372 case NVPTXISD::Suld2DV4I32Clamp: 373 case NVPTXISD::Suld2DArrayI8Clamp: 374 case NVPTXISD::Suld2DArrayI16Clamp: 375 case NVPTXISD::Suld2DArrayI32Clamp: 376 case NVPTXISD::Suld2DArrayI64Clamp: 377 case NVPTXISD::Suld2DArrayV2I8Clamp: 378 case NVPTXISD::Suld2DArrayV2I16Clamp: 379 case NVPTXISD::Suld2DArrayV2I32Clamp: 380 case NVPTXISD::Suld2DArrayV2I64Clamp: 381 case NVPTXISD::Suld2DArrayV4I8Clamp: 382 case NVPTXISD::Suld2DArrayV4I16Clamp: 383 case NVPTXISD::Suld2DArrayV4I32Clamp: 384 case NVPTXISD::Suld3DI8Clamp: 385 case NVPTXISD::Suld3DI16Clamp: 386 case NVPTXISD::Suld3DI32Clamp: 387 case NVPTXISD::Suld3DI64Clamp: 388 case NVPTXISD::Suld3DV2I8Clamp: 389 case NVPTXISD::Suld3DV2I16Clamp: 390 case NVPTXISD::Suld3DV2I32Clamp: 391 case NVPTXISD::Suld3DV2I64Clamp: 392 case NVPTXISD::Suld3DV4I8Clamp: 393 case NVPTXISD::Suld3DV4I16Clamp: 394 case NVPTXISD::Suld3DV4I32Clamp: 395 case NVPTXISD::Suld1DI8Trap: 396 case NVPTXISD::Suld1DI16Trap: 397 case NVPTXISD::Suld1DI32Trap: 398 case NVPTXISD::Suld1DI64Trap: 399 case NVPTXISD::Suld1DV2I8Trap: 400 case NVPTXISD::Suld1DV2I16Trap: 401 case NVPTXISD::Suld1DV2I32Trap: 402 case NVPTXISD::Suld1DV2I64Trap: 403 case NVPTXISD::Suld1DV4I8Trap: 404 case NVPTXISD::Suld1DV4I16Trap: 405 case NVPTXISD::Suld1DV4I32Trap: 406 case NVPTXISD::Suld1DArrayI8Trap: 407 case NVPTXISD::Suld1DArrayI16Trap: 408 case NVPTXISD::Suld1DArrayI32Trap: 409 case NVPTXISD::Suld1DArrayI64Trap: 410 case NVPTXISD::Suld1DArrayV2I8Trap: 411 case NVPTXISD::Suld1DArrayV2I16Trap: 412 case NVPTXISD::Suld1DArrayV2I32Trap: 413 case NVPTXISD::Suld1DArrayV2I64Trap: 414 case NVPTXISD::Suld1DArrayV4I8Trap: 415 case NVPTXISD::Suld1DArrayV4I16Trap: 416 case NVPTXISD::Suld1DArrayV4I32Trap: 417 case NVPTXISD::Suld2DI8Trap: 418 case NVPTXISD::Suld2DI16Trap: 419 case NVPTXISD::Suld2DI32Trap: 420 case NVPTXISD::Suld2DI64Trap: 421 case NVPTXISD::Suld2DV2I8Trap: 422 case NVPTXISD::Suld2DV2I16Trap: 423 case NVPTXISD::Suld2DV2I32Trap: 424 case NVPTXISD::Suld2DV2I64Trap: 425 case NVPTXISD::Suld2DV4I8Trap: 426 case NVPTXISD::Suld2DV4I16Trap: 427 case NVPTXISD::Suld2DV4I32Trap: 428 case NVPTXISD::Suld2DArrayI8Trap: 429 case NVPTXISD::Suld2DArrayI16Trap: 430 case NVPTXISD::Suld2DArrayI32Trap: 431 case NVPTXISD::Suld2DArrayI64Trap: 432 case NVPTXISD::Suld2DArrayV2I8Trap: 433 case NVPTXISD::Suld2DArrayV2I16Trap: 434 case NVPTXISD::Suld2DArrayV2I32Trap: 435 case NVPTXISD::Suld2DArrayV2I64Trap: 436 case NVPTXISD::Suld2DArrayV4I8Trap: 437 case NVPTXISD::Suld2DArrayV4I16Trap: 438 case NVPTXISD::Suld2DArrayV4I32Trap: 439 case NVPTXISD::Suld3DI8Trap: 440 case NVPTXISD::Suld3DI16Trap: 441 case NVPTXISD::Suld3DI32Trap: 442 case NVPTXISD::Suld3DI64Trap: 443 case NVPTXISD::Suld3DV2I8Trap: 444 case NVPTXISD::Suld3DV2I16Trap: 445 case NVPTXISD::Suld3DV2I32Trap: 446 case NVPTXISD::Suld3DV2I64Trap: 447 case NVPTXISD::Suld3DV4I8Trap: 448 case NVPTXISD::Suld3DV4I16Trap: 449 case NVPTXISD::Suld3DV4I32Trap: 450 case NVPTXISD::Suld1DI8Zero: 451 case NVPTXISD::Suld1DI16Zero: 452 case NVPTXISD::Suld1DI32Zero: 453 case NVPTXISD::Suld1DI64Zero: 454 case NVPTXISD::Suld1DV2I8Zero: 455 case NVPTXISD::Suld1DV2I16Zero: 456 case NVPTXISD::Suld1DV2I32Zero: 457 case NVPTXISD::Suld1DV2I64Zero: 458 case NVPTXISD::Suld1DV4I8Zero: 459 case NVPTXISD::Suld1DV4I16Zero: 460 case NVPTXISD::Suld1DV4I32Zero: 461 case NVPTXISD::Suld1DArrayI8Zero: 462 case NVPTXISD::Suld1DArrayI16Zero: 463 case NVPTXISD::Suld1DArrayI32Zero: 464 case NVPTXISD::Suld1DArrayI64Zero: 465 case NVPTXISD::Suld1DArrayV2I8Zero: 466 case NVPTXISD::Suld1DArrayV2I16Zero: 467 case NVPTXISD::Suld1DArrayV2I32Zero: 468 case NVPTXISD::Suld1DArrayV2I64Zero: 469 case NVPTXISD::Suld1DArrayV4I8Zero: 470 case NVPTXISD::Suld1DArrayV4I16Zero: 471 case NVPTXISD::Suld1DArrayV4I32Zero: 472 case NVPTXISD::Suld2DI8Zero: 473 case NVPTXISD::Suld2DI16Zero: 474 case NVPTXISD::Suld2DI32Zero: 475 case NVPTXISD::Suld2DI64Zero: 476 case NVPTXISD::Suld2DV2I8Zero: 477 case NVPTXISD::Suld2DV2I16Zero: 478 case NVPTXISD::Suld2DV2I32Zero: 479 case NVPTXISD::Suld2DV2I64Zero: 480 case NVPTXISD::Suld2DV4I8Zero: 481 case NVPTXISD::Suld2DV4I16Zero: 482 case NVPTXISD::Suld2DV4I32Zero: 483 case NVPTXISD::Suld2DArrayI8Zero: 484 case NVPTXISD::Suld2DArrayI16Zero: 485 case NVPTXISD::Suld2DArrayI32Zero: 486 case NVPTXISD::Suld2DArrayI64Zero: 487 case NVPTXISD::Suld2DArrayV2I8Zero: 488 case NVPTXISD::Suld2DArrayV2I16Zero: 489 case NVPTXISD::Suld2DArrayV2I32Zero: 490 case NVPTXISD::Suld2DArrayV2I64Zero: 491 case NVPTXISD::Suld2DArrayV4I8Zero: 492 case NVPTXISD::Suld2DArrayV4I16Zero: 493 case NVPTXISD::Suld2DArrayV4I32Zero: 494 case NVPTXISD::Suld3DI8Zero: 495 case NVPTXISD::Suld3DI16Zero: 496 case NVPTXISD::Suld3DI32Zero: 497 case NVPTXISD::Suld3DI64Zero: 498 case NVPTXISD::Suld3DV2I8Zero: 499 case NVPTXISD::Suld3DV2I16Zero: 500 case NVPTXISD::Suld3DV2I32Zero: 501 case NVPTXISD::Suld3DV2I64Zero: 502 case NVPTXISD::Suld3DV4I8Zero: 503 case NVPTXISD::Suld3DV4I16Zero: 504 case NVPTXISD::Suld3DV4I32Zero: 505 if (trySurfaceIntrinsic(N)) 506 return; 507 break; 508 case ISD::AND: 509 case ISD::SRA: 510 case ISD::SRL: 511 // Try to select BFE 512 if (tryBFE(N)) 513 return; 514 break; 515 case ISD::ADDRSPACECAST: 516 SelectAddrSpaceCast(N); 517 return; 518 default: 519 break; 520 } 521 SelectCode(N); 522 } 523 524 bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) { 525 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 526 switch (IID) { 527 default: 528 return false; 529 case Intrinsic::nvvm_ldg_global_f: 530 case Intrinsic::nvvm_ldg_global_i: 531 case Intrinsic::nvvm_ldg_global_p: 532 case Intrinsic::nvvm_ldu_global_f: 533 case Intrinsic::nvvm_ldu_global_i: 534 case Intrinsic::nvvm_ldu_global_p: 535 return tryLDGLDU(N); 536 } 537 } 538 539 static unsigned int getCodeAddrSpace(MemSDNode *N) { 540 const Value *Src = N->getMemOperand()->getValue(); 541 542 if (!Src) 543 return NVPTX::PTXLdStInstCode::GENERIC; 544 545 if (auto *PT = dyn_cast<PointerType>(Src->getType())) { 546 switch (PT->getAddressSpace()) { 547 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL; 548 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL; 549 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED; 550 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC; 551 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM; 552 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT; 553 default: break; 554 } 555 } 556 return NVPTX::PTXLdStInstCode::GENERIC; 557 } 558 559 static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, 560 unsigned CodeAddrSpace, MachineFunction *F) { 561 // To use non-coherent caching, the load has to be from global 562 // memory and we have to prove that the memory area is not written 563 // to anywhere for the duration of the kernel call, not even after 564 // the load. 565 // 566 // To ensure that there are no writes to the memory, we require the 567 // underlying pointer to be a noalias (__restrict) kernel parameter 568 // that is never used for a write. We can only do this for kernel 569 // functions since from within a device function, we cannot know if 570 // there were or will be writes to the memory from the caller - or we 571 // could, but then we would have to do inter-procedural analysis. 572 if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL || 573 !isKernelFunction(*F->getFunction())) { 574 return false; 575 } 576 577 // We use GetUnderlyingObjects() here instead of 578 // GetUnderlyingObject() mainly because the former looks through phi 579 // nodes while the latter does not. We need to look through phi 580 // nodes to handle pointer induction variables. 581 SmallVector<Value *, 8> Objs; 582 GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()), 583 Objs, F->getDataLayout()); 584 for (Value *Obj : Objs) { 585 auto *A = dyn_cast<const Argument>(Obj); 586 if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false; 587 } 588 589 return true; 590 } 591 592 bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) { 593 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 594 switch (IID) { 595 default: 596 return false; 597 case Intrinsic::nvvm_texsurf_handle_internal: 598 SelectTexSurfHandle(N); 599 return true; 600 } 601 } 602 603 void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) { 604 // Op 0 is the intrinsic ID 605 SDValue Wrapper = N->getOperand(1); 606 SDValue GlobalVal = Wrapper.getOperand(0); 607 ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), 608 MVT::i64, GlobalVal)); 609 } 610 611 void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { 612 SDValue Src = N->getOperand(0); 613 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N); 614 unsigned SrcAddrSpace = CastN->getSrcAddressSpace(); 615 unsigned DstAddrSpace = CastN->getDestAddressSpace(); 616 617 assert(SrcAddrSpace != DstAddrSpace && 618 "addrspacecast must be between different address spaces"); 619 620 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) { 621 // Specific to generic 622 unsigned Opc; 623 switch (SrcAddrSpace) { 624 default: report_fatal_error("Bad address space in addrspacecast"); 625 case ADDRESS_SPACE_GLOBAL: 626 Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes; 627 break; 628 case ADDRESS_SPACE_SHARED: 629 Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes; 630 break; 631 case ADDRESS_SPACE_CONST: 632 Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes; 633 break; 634 case ADDRESS_SPACE_LOCAL: 635 Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes; 636 break; 637 } 638 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 639 Src)); 640 return; 641 } else { 642 // Generic to specific 643 if (SrcAddrSpace != 0) 644 report_fatal_error("Cannot cast between two non-generic address spaces"); 645 unsigned Opc; 646 switch (DstAddrSpace) { 647 default: report_fatal_error("Bad address space in addrspacecast"); 648 case ADDRESS_SPACE_GLOBAL: 649 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64 650 : NVPTX::cvta_to_global_yes; 651 break; 652 case ADDRESS_SPACE_SHARED: 653 Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64 654 : NVPTX::cvta_to_shared_yes; 655 break; 656 case ADDRESS_SPACE_CONST: 657 Opc = 658 TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes; 659 break; 660 case ADDRESS_SPACE_LOCAL: 661 Opc = 662 TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes; 663 break; 664 case ADDRESS_SPACE_PARAM: 665 Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64 666 : NVPTX::nvvm_ptr_gen_to_param; 667 break; 668 } 669 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 670 Src)); 671 return; 672 } 673 } 674 675 bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) { 676 SDLoc dl(N); 677 LoadSDNode *LD = cast<LoadSDNode>(N); 678 EVT LoadedVT = LD->getMemoryVT(); 679 SDNode *NVPTXLD = nullptr; 680 681 // do not support pre/post inc/dec 682 if (LD->isIndexed()) 683 return false; 684 685 if (!LoadedVT.isSimple()) 686 return false; 687 688 // Address Space Setting 689 unsigned int codeAddrSpace = getCodeAddrSpace(LD); 690 691 if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) { 692 return tryLDGLDU(N); 693 } 694 695 // Volatile Setting 696 // - .volatile is only availalble for .global and .shared 697 bool isVolatile = LD->isVolatile(); 698 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 699 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 700 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 701 isVolatile = false; 702 703 // Vector Setting 704 MVT SimpleVT = LoadedVT.getSimpleVT(); 705 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; 706 if (SimpleVT.isVector()) { 707 unsigned num = SimpleVT.getVectorNumElements(); 708 if (num == 2) 709 vecType = NVPTX::PTXLdStInstCode::V2; 710 else if (num == 4) 711 vecType = NVPTX::PTXLdStInstCode::V4; 712 else 713 return false; 714 } 715 716 // Type Setting: fromType + fromTypeWidth 717 // 718 // Sign : ISD::SEXTLOAD 719 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the 720 // type is integer 721 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float 722 MVT ScalarVT = SimpleVT.getScalarType(); 723 // Read at least 8 bits (predicates are stored as 8-bit values) 724 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits()); 725 unsigned int fromType; 726 if ((LD->getExtensionType() == ISD::SEXTLOAD)) 727 fromType = NVPTX::PTXLdStInstCode::Signed; 728 else if (ScalarVT.isFloatingPoint()) 729 fromType = NVPTX::PTXLdStInstCode::Float; 730 else 731 fromType = NVPTX::PTXLdStInstCode::Unsigned; 732 733 // Create the machine instruction DAG 734 SDValue Chain = N->getOperand(0); 735 SDValue N1 = N->getOperand(1); 736 SDValue Addr; 737 SDValue Offset, Base; 738 unsigned Opcode; 739 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy; 740 741 if (SelectDirectAddr(N1, Addr)) { 742 switch (TargetVT) { 743 case MVT::i8: 744 Opcode = NVPTX::LD_i8_avar; 745 break; 746 case MVT::i16: 747 Opcode = NVPTX::LD_i16_avar; 748 break; 749 case MVT::i32: 750 Opcode = NVPTX::LD_i32_avar; 751 break; 752 case MVT::i64: 753 Opcode = NVPTX::LD_i64_avar; 754 break; 755 case MVT::f32: 756 Opcode = NVPTX::LD_f32_avar; 757 break; 758 case MVT::f64: 759 Opcode = NVPTX::LD_f64_avar; 760 break; 761 default: 762 return false; 763 } 764 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl), 765 getI32Imm(vecType, dl), getI32Imm(fromType, dl), 766 getI32Imm(fromTypeWidth, dl), Addr, Chain }; 767 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); 768 } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset) 769 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) { 770 switch (TargetVT) { 771 case MVT::i8: 772 Opcode = NVPTX::LD_i8_asi; 773 break; 774 case MVT::i16: 775 Opcode = NVPTX::LD_i16_asi; 776 break; 777 case MVT::i32: 778 Opcode = NVPTX::LD_i32_asi; 779 break; 780 case MVT::i64: 781 Opcode = NVPTX::LD_i64_asi; 782 break; 783 case MVT::f32: 784 Opcode = NVPTX::LD_f32_asi; 785 break; 786 case MVT::f64: 787 Opcode = NVPTX::LD_f64_asi; 788 break; 789 default: 790 return false; 791 } 792 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl), 793 getI32Imm(vecType, dl), getI32Imm(fromType, dl), 794 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain }; 795 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); 796 } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset) 797 : SelectADDRri(N1.getNode(), N1, Base, Offset)) { 798 if (TM.is64Bit()) { 799 switch (TargetVT) { 800 case MVT::i8: 801 Opcode = NVPTX::LD_i8_ari_64; 802 break; 803 case MVT::i16: 804 Opcode = NVPTX::LD_i16_ari_64; 805 break; 806 case MVT::i32: 807 Opcode = NVPTX::LD_i32_ari_64; 808 break; 809 case MVT::i64: 810 Opcode = NVPTX::LD_i64_ari_64; 811 break; 812 case MVT::f32: 813 Opcode = NVPTX::LD_f32_ari_64; 814 break; 815 case MVT::f64: 816 Opcode = NVPTX::LD_f64_ari_64; 817 break; 818 default: 819 return false; 820 } 821 } else { 822 switch (TargetVT) { 823 case MVT::i8: 824 Opcode = NVPTX::LD_i8_ari; 825 break; 826 case MVT::i16: 827 Opcode = NVPTX::LD_i16_ari; 828 break; 829 case MVT::i32: 830 Opcode = NVPTX::LD_i32_ari; 831 break; 832 case MVT::i64: 833 Opcode = NVPTX::LD_i64_ari; 834 break; 835 case MVT::f32: 836 Opcode = NVPTX::LD_f32_ari; 837 break; 838 case MVT::f64: 839 Opcode = NVPTX::LD_f64_ari; 840 break; 841 default: 842 return false; 843 } 844 } 845 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl), 846 getI32Imm(vecType, dl), getI32Imm(fromType, dl), 847 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain }; 848 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); 849 } else { 850 if (TM.is64Bit()) { 851 switch (TargetVT) { 852 case MVT::i8: 853 Opcode = NVPTX::LD_i8_areg_64; 854 break; 855 case MVT::i16: 856 Opcode = NVPTX::LD_i16_areg_64; 857 break; 858 case MVT::i32: 859 Opcode = NVPTX::LD_i32_areg_64; 860 break; 861 case MVT::i64: 862 Opcode = NVPTX::LD_i64_areg_64; 863 break; 864 case MVT::f32: 865 Opcode = NVPTX::LD_f32_areg_64; 866 break; 867 case MVT::f64: 868 Opcode = NVPTX::LD_f64_areg_64; 869 break; 870 default: 871 return false; 872 } 873 } else { 874 switch (TargetVT) { 875 case MVT::i8: 876 Opcode = NVPTX::LD_i8_areg; 877 break; 878 case MVT::i16: 879 Opcode = NVPTX::LD_i16_areg; 880 break; 881 case MVT::i32: 882 Opcode = NVPTX::LD_i32_areg; 883 break; 884 case MVT::i64: 885 Opcode = NVPTX::LD_i64_areg; 886 break; 887 case MVT::f32: 888 Opcode = NVPTX::LD_f32_areg; 889 break; 890 case MVT::f64: 891 Opcode = NVPTX::LD_f64_areg; 892 break; 893 default: 894 return false; 895 } 896 } 897 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl), 898 getI32Imm(vecType, dl), getI32Imm(fromType, dl), 899 getI32Imm(fromTypeWidth, dl), N1, Chain }; 900 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); 901 } 902 903 if (!NVPTXLD) 904 return false; 905 906 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 907 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 908 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1); 909 910 ReplaceNode(N, NVPTXLD); 911 return true; 912 } 913 914 bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) { 915 916 SDValue Chain = N->getOperand(0); 917 SDValue Op1 = N->getOperand(1); 918 SDValue Addr, Offset, Base; 919 unsigned Opcode; 920 SDLoc DL(N); 921 SDNode *LD; 922 MemSDNode *MemSD = cast<MemSDNode>(N); 923 EVT LoadedVT = MemSD->getMemoryVT(); 924 925 if (!LoadedVT.isSimple()) 926 return false; 927 928 // Address Space Setting 929 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD); 930 931 if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) { 932 return tryLDGLDU(N); 933 } 934 935 // Volatile Setting 936 // - .volatile is only availalble for .global and .shared 937 bool IsVolatile = MemSD->isVolatile(); 938 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 939 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 940 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 941 IsVolatile = false; 942 943 // Vector Setting 944 MVT SimpleVT = LoadedVT.getSimpleVT(); 945 946 // Type Setting: fromType + fromTypeWidth 947 // 948 // Sign : ISD::SEXTLOAD 949 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the 950 // type is integer 951 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float 952 MVT ScalarVT = SimpleVT.getScalarType(); 953 // Read at least 8 bits (predicates are stored as 8-bit values) 954 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits()); 955 unsigned int FromType; 956 // The last operand holds the original LoadSDNode::getExtensionType() value 957 unsigned ExtensionType = cast<ConstantSDNode>( 958 N->getOperand(N->getNumOperands() - 1))->getZExtValue(); 959 if (ExtensionType == ISD::SEXTLOAD) 960 FromType = NVPTX::PTXLdStInstCode::Signed; 961 else if (ScalarVT.isFloatingPoint()) 962 FromType = NVPTX::PTXLdStInstCode::Float; 963 else 964 FromType = NVPTX::PTXLdStInstCode::Unsigned; 965 966 unsigned VecType; 967 968 switch (N->getOpcode()) { 969 case NVPTXISD::LoadV2: 970 VecType = NVPTX::PTXLdStInstCode::V2; 971 break; 972 case NVPTXISD::LoadV4: 973 VecType = NVPTX::PTXLdStInstCode::V4; 974 break; 975 default: 976 return false; 977 } 978 979 EVT EltVT = N->getValueType(0); 980 981 if (SelectDirectAddr(Op1, Addr)) { 982 switch (N->getOpcode()) { 983 default: 984 return false; 985 case NVPTXISD::LoadV2: 986 switch (EltVT.getSimpleVT().SimpleTy) { 987 default: 988 return false; 989 case MVT::i8: 990 Opcode = NVPTX::LDV_i8_v2_avar; 991 break; 992 case MVT::i16: 993 Opcode = NVPTX::LDV_i16_v2_avar; 994 break; 995 case MVT::i32: 996 Opcode = NVPTX::LDV_i32_v2_avar; 997 break; 998 case MVT::i64: 999 Opcode = NVPTX::LDV_i64_v2_avar; 1000 break; 1001 case MVT::f32: 1002 Opcode = NVPTX::LDV_f32_v2_avar; 1003 break; 1004 case MVT::f64: 1005 Opcode = NVPTX::LDV_f64_v2_avar; 1006 break; 1007 } 1008 break; 1009 case NVPTXISD::LoadV4: 1010 switch (EltVT.getSimpleVT().SimpleTy) { 1011 default: 1012 return false; 1013 case MVT::i8: 1014 Opcode = NVPTX::LDV_i8_v4_avar; 1015 break; 1016 case MVT::i16: 1017 Opcode = NVPTX::LDV_i16_v4_avar; 1018 break; 1019 case MVT::i32: 1020 Opcode = NVPTX::LDV_i32_v4_avar; 1021 break; 1022 case MVT::f32: 1023 Opcode = NVPTX::LDV_f32_v4_avar; 1024 break; 1025 } 1026 break; 1027 } 1028 1029 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), 1030 getI32Imm(VecType, DL), getI32Imm(FromType, DL), 1031 getI32Imm(FromTypeWidth, DL), Addr, Chain }; 1032 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 1033 } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset) 1034 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { 1035 switch (N->getOpcode()) { 1036 default: 1037 return false; 1038 case NVPTXISD::LoadV2: 1039 switch (EltVT.getSimpleVT().SimpleTy) { 1040 default: 1041 return false; 1042 case MVT::i8: 1043 Opcode = NVPTX::LDV_i8_v2_asi; 1044 break; 1045 case MVT::i16: 1046 Opcode = NVPTX::LDV_i16_v2_asi; 1047 break; 1048 case MVT::i32: 1049 Opcode = NVPTX::LDV_i32_v2_asi; 1050 break; 1051 case MVT::i64: 1052 Opcode = NVPTX::LDV_i64_v2_asi; 1053 break; 1054 case MVT::f32: 1055 Opcode = NVPTX::LDV_f32_v2_asi; 1056 break; 1057 case MVT::f64: 1058 Opcode = NVPTX::LDV_f64_v2_asi; 1059 break; 1060 } 1061 break; 1062 case NVPTXISD::LoadV4: 1063 switch (EltVT.getSimpleVT().SimpleTy) { 1064 default: 1065 return false; 1066 case MVT::i8: 1067 Opcode = NVPTX::LDV_i8_v4_asi; 1068 break; 1069 case MVT::i16: 1070 Opcode = NVPTX::LDV_i16_v4_asi; 1071 break; 1072 case MVT::i32: 1073 Opcode = NVPTX::LDV_i32_v4_asi; 1074 break; 1075 case MVT::f32: 1076 Opcode = NVPTX::LDV_f32_v4_asi; 1077 break; 1078 } 1079 break; 1080 } 1081 1082 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), 1083 getI32Imm(VecType, DL), getI32Imm(FromType, DL), 1084 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain }; 1085 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 1086 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) 1087 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { 1088 if (TM.is64Bit()) { 1089 switch (N->getOpcode()) { 1090 default: 1091 return false; 1092 case NVPTXISD::LoadV2: 1093 switch (EltVT.getSimpleVT().SimpleTy) { 1094 default: 1095 return false; 1096 case MVT::i8: 1097 Opcode = NVPTX::LDV_i8_v2_ari_64; 1098 break; 1099 case MVT::i16: 1100 Opcode = NVPTX::LDV_i16_v2_ari_64; 1101 break; 1102 case MVT::i32: 1103 Opcode = NVPTX::LDV_i32_v2_ari_64; 1104 break; 1105 case MVT::i64: 1106 Opcode = NVPTX::LDV_i64_v2_ari_64; 1107 break; 1108 case MVT::f32: 1109 Opcode = NVPTX::LDV_f32_v2_ari_64; 1110 break; 1111 case MVT::f64: 1112 Opcode = NVPTX::LDV_f64_v2_ari_64; 1113 break; 1114 } 1115 break; 1116 case NVPTXISD::LoadV4: 1117 switch (EltVT.getSimpleVT().SimpleTy) { 1118 default: 1119 return false; 1120 case MVT::i8: 1121 Opcode = NVPTX::LDV_i8_v4_ari_64; 1122 break; 1123 case MVT::i16: 1124 Opcode = NVPTX::LDV_i16_v4_ari_64; 1125 break; 1126 case MVT::i32: 1127 Opcode = NVPTX::LDV_i32_v4_ari_64; 1128 break; 1129 case MVT::f32: 1130 Opcode = NVPTX::LDV_f32_v4_ari_64; 1131 break; 1132 } 1133 break; 1134 } 1135 } else { 1136 switch (N->getOpcode()) { 1137 default: 1138 return false; 1139 case NVPTXISD::LoadV2: 1140 switch (EltVT.getSimpleVT().SimpleTy) { 1141 default: 1142 return false; 1143 case MVT::i8: 1144 Opcode = NVPTX::LDV_i8_v2_ari; 1145 break; 1146 case MVT::i16: 1147 Opcode = NVPTX::LDV_i16_v2_ari; 1148 break; 1149 case MVT::i32: 1150 Opcode = NVPTX::LDV_i32_v2_ari; 1151 break; 1152 case MVT::i64: 1153 Opcode = NVPTX::LDV_i64_v2_ari; 1154 break; 1155 case MVT::f32: 1156 Opcode = NVPTX::LDV_f32_v2_ari; 1157 break; 1158 case MVT::f64: 1159 Opcode = NVPTX::LDV_f64_v2_ari; 1160 break; 1161 } 1162 break; 1163 case NVPTXISD::LoadV4: 1164 switch (EltVT.getSimpleVT().SimpleTy) { 1165 default: 1166 return false; 1167 case MVT::i8: 1168 Opcode = NVPTX::LDV_i8_v4_ari; 1169 break; 1170 case MVT::i16: 1171 Opcode = NVPTX::LDV_i16_v4_ari; 1172 break; 1173 case MVT::i32: 1174 Opcode = NVPTX::LDV_i32_v4_ari; 1175 break; 1176 case MVT::f32: 1177 Opcode = NVPTX::LDV_f32_v4_ari; 1178 break; 1179 } 1180 break; 1181 } 1182 } 1183 1184 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), 1185 getI32Imm(VecType, DL), getI32Imm(FromType, DL), 1186 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain }; 1187 1188 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 1189 } else { 1190 if (TM.is64Bit()) { 1191 switch (N->getOpcode()) { 1192 default: 1193 return false; 1194 case NVPTXISD::LoadV2: 1195 switch (EltVT.getSimpleVT().SimpleTy) { 1196 default: 1197 return false; 1198 case MVT::i8: 1199 Opcode = NVPTX::LDV_i8_v2_areg_64; 1200 break; 1201 case MVT::i16: 1202 Opcode = NVPTX::LDV_i16_v2_areg_64; 1203 break; 1204 case MVT::i32: 1205 Opcode = NVPTX::LDV_i32_v2_areg_64; 1206 break; 1207 case MVT::i64: 1208 Opcode = NVPTX::LDV_i64_v2_areg_64; 1209 break; 1210 case MVT::f32: 1211 Opcode = NVPTX::LDV_f32_v2_areg_64; 1212 break; 1213 case MVT::f64: 1214 Opcode = NVPTX::LDV_f64_v2_areg_64; 1215 break; 1216 } 1217 break; 1218 case NVPTXISD::LoadV4: 1219 switch (EltVT.getSimpleVT().SimpleTy) { 1220 default: 1221 return false; 1222 case MVT::i8: 1223 Opcode = NVPTX::LDV_i8_v4_areg_64; 1224 break; 1225 case MVT::i16: 1226 Opcode = NVPTX::LDV_i16_v4_areg_64; 1227 break; 1228 case MVT::i32: 1229 Opcode = NVPTX::LDV_i32_v4_areg_64; 1230 break; 1231 case MVT::f32: 1232 Opcode = NVPTX::LDV_f32_v4_areg_64; 1233 break; 1234 } 1235 break; 1236 } 1237 } else { 1238 switch (N->getOpcode()) { 1239 default: 1240 return false; 1241 case NVPTXISD::LoadV2: 1242 switch (EltVT.getSimpleVT().SimpleTy) { 1243 default: 1244 return false; 1245 case MVT::i8: 1246 Opcode = NVPTX::LDV_i8_v2_areg; 1247 break; 1248 case MVT::i16: 1249 Opcode = NVPTX::LDV_i16_v2_areg; 1250 break; 1251 case MVT::i32: 1252 Opcode = NVPTX::LDV_i32_v2_areg; 1253 break; 1254 case MVT::i64: 1255 Opcode = NVPTX::LDV_i64_v2_areg; 1256 break; 1257 case MVT::f32: 1258 Opcode = NVPTX::LDV_f32_v2_areg; 1259 break; 1260 case MVT::f64: 1261 Opcode = NVPTX::LDV_f64_v2_areg; 1262 break; 1263 } 1264 break; 1265 case NVPTXISD::LoadV4: 1266 switch (EltVT.getSimpleVT().SimpleTy) { 1267 default: 1268 return false; 1269 case MVT::i8: 1270 Opcode = NVPTX::LDV_i8_v4_areg; 1271 break; 1272 case MVT::i16: 1273 Opcode = NVPTX::LDV_i16_v4_areg; 1274 break; 1275 case MVT::i32: 1276 Opcode = NVPTX::LDV_i32_v4_areg; 1277 break; 1278 case MVT::f32: 1279 Opcode = NVPTX::LDV_f32_v4_areg; 1280 break; 1281 } 1282 break; 1283 } 1284 } 1285 1286 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), 1287 getI32Imm(VecType, DL), getI32Imm(FromType, DL), 1288 getI32Imm(FromTypeWidth, DL), Op1, Chain }; 1289 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); 1290 } 1291 1292 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 1293 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 1294 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1); 1295 1296 ReplaceNode(N, LD); 1297 return true; 1298 } 1299 1300 bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { 1301 1302 SDValue Chain = N->getOperand(0); 1303 SDValue Op1; 1304 MemSDNode *Mem; 1305 bool IsLDG = true; 1306 1307 // If this is an LDG intrinsic, the address is the third operand. If its an 1308 // LDG/LDU SD node (from custom vector handling), then its the second operand 1309 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { 1310 Op1 = N->getOperand(2); 1311 Mem = cast<MemIntrinsicSDNode>(N); 1312 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 1313 switch (IID) { 1314 default: 1315 return false; 1316 case Intrinsic::nvvm_ldg_global_f: 1317 case Intrinsic::nvvm_ldg_global_i: 1318 case Intrinsic::nvvm_ldg_global_p: 1319 IsLDG = true; 1320 break; 1321 case Intrinsic::nvvm_ldu_global_f: 1322 case Intrinsic::nvvm_ldu_global_i: 1323 case Intrinsic::nvvm_ldu_global_p: 1324 IsLDG = false; 1325 break; 1326 } 1327 } else { 1328 Op1 = N->getOperand(1); 1329 Mem = cast<MemSDNode>(N); 1330 } 1331 1332 unsigned Opcode; 1333 SDLoc DL(N); 1334 SDNode *LD; 1335 SDValue Base, Offset, Addr; 1336 1337 EVT EltVT = Mem->getMemoryVT(); 1338 unsigned NumElts = 1; 1339 if (EltVT.isVector()) { 1340 NumElts = EltVT.getVectorNumElements(); 1341 EltVT = EltVT.getVectorElementType(); 1342 } 1343 1344 // Build the "promoted" result VTList for the load. If we are really loading 1345 // i8s, then the return type will be promoted to i16 since we do not expose 1346 // 8-bit registers in NVPTX. 1347 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT; 1348 SmallVector<EVT, 5> InstVTs; 1349 for (unsigned i = 0; i != NumElts; ++i) { 1350 InstVTs.push_back(NodeVT); 1351 } 1352 InstVTs.push_back(MVT::Other); 1353 SDVTList InstVTList = CurDAG->getVTList(InstVTs); 1354 1355 if (SelectDirectAddr(Op1, Addr)) { 1356 switch (N->getOpcode()) { 1357 default: 1358 return false; 1359 case ISD::INTRINSIC_W_CHAIN: 1360 if (IsLDG) { 1361 switch (EltVT.getSimpleVT().SimpleTy) { 1362 default: 1363 return false; 1364 case MVT::i8: 1365 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar; 1366 break; 1367 case MVT::i16: 1368 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar; 1369 break; 1370 case MVT::i32: 1371 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar; 1372 break; 1373 case MVT::i64: 1374 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar; 1375 break; 1376 case MVT::f32: 1377 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar; 1378 break; 1379 case MVT::f64: 1380 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar; 1381 break; 1382 } 1383 } else { 1384 switch (EltVT.getSimpleVT().SimpleTy) { 1385 default: 1386 return false; 1387 case MVT::i8: 1388 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar; 1389 break; 1390 case MVT::i16: 1391 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar; 1392 break; 1393 case MVT::i32: 1394 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar; 1395 break; 1396 case MVT::i64: 1397 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar; 1398 break; 1399 case MVT::f32: 1400 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar; 1401 break; 1402 case MVT::f64: 1403 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar; 1404 break; 1405 } 1406 } 1407 break; 1408 case NVPTXISD::LDGV2: 1409 switch (EltVT.getSimpleVT().SimpleTy) { 1410 default: 1411 return false; 1412 case MVT::i8: 1413 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar; 1414 break; 1415 case MVT::i16: 1416 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar; 1417 break; 1418 case MVT::i32: 1419 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar; 1420 break; 1421 case MVT::i64: 1422 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar; 1423 break; 1424 case MVT::f32: 1425 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar; 1426 break; 1427 case MVT::f64: 1428 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar; 1429 break; 1430 } 1431 break; 1432 case NVPTXISD::LDUV2: 1433 switch (EltVT.getSimpleVT().SimpleTy) { 1434 default: 1435 return false; 1436 case MVT::i8: 1437 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar; 1438 break; 1439 case MVT::i16: 1440 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar; 1441 break; 1442 case MVT::i32: 1443 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar; 1444 break; 1445 case MVT::i64: 1446 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar; 1447 break; 1448 case MVT::f32: 1449 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar; 1450 break; 1451 case MVT::f64: 1452 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar; 1453 break; 1454 } 1455 break; 1456 case NVPTXISD::LDGV4: 1457 switch (EltVT.getSimpleVT().SimpleTy) { 1458 default: 1459 return false; 1460 case MVT::i8: 1461 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar; 1462 break; 1463 case MVT::i16: 1464 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar; 1465 break; 1466 case MVT::i32: 1467 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar; 1468 break; 1469 case MVT::f32: 1470 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar; 1471 break; 1472 } 1473 break; 1474 case NVPTXISD::LDUV4: 1475 switch (EltVT.getSimpleVT().SimpleTy) { 1476 default: 1477 return false; 1478 case MVT::i8: 1479 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar; 1480 break; 1481 case MVT::i16: 1482 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar; 1483 break; 1484 case MVT::i32: 1485 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar; 1486 break; 1487 case MVT::f32: 1488 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar; 1489 break; 1490 } 1491 break; 1492 } 1493 1494 SDValue Ops[] = { Addr, Chain }; 1495 LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops); 1496 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) 1497 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { 1498 if (TM.is64Bit()) { 1499 switch (N->getOpcode()) { 1500 default: 1501 return false; 1502 case ISD::LOAD: 1503 case ISD::INTRINSIC_W_CHAIN: 1504 if (IsLDG) { 1505 switch (EltVT.getSimpleVT().SimpleTy) { 1506 default: 1507 return false; 1508 case MVT::i8: 1509 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64; 1510 break; 1511 case MVT::i16: 1512 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64; 1513 break; 1514 case MVT::i32: 1515 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64; 1516 break; 1517 case MVT::i64: 1518 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64; 1519 break; 1520 case MVT::f32: 1521 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64; 1522 break; 1523 case MVT::f64: 1524 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64; 1525 break; 1526 } 1527 } else { 1528 switch (EltVT.getSimpleVT().SimpleTy) { 1529 default: 1530 return false; 1531 case MVT::i8: 1532 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64; 1533 break; 1534 case MVT::i16: 1535 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64; 1536 break; 1537 case MVT::i32: 1538 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64; 1539 break; 1540 case MVT::i64: 1541 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64; 1542 break; 1543 case MVT::f32: 1544 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64; 1545 break; 1546 case MVT::f64: 1547 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64; 1548 break; 1549 } 1550 } 1551 break; 1552 case NVPTXISD::LoadV2: 1553 case NVPTXISD::LDGV2: 1554 switch (EltVT.getSimpleVT().SimpleTy) { 1555 default: 1556 return false; 1557 case MVT::i8: 1558 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64; 1559 break; 1560 case MVT::i16: 1561 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64; 1562 break; 1563 case MVT::i32: 1564 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64; 1565 break; 1566 case MVT::i64: 1567 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64; 1568 break; 1569 case MVT::f32: 1570 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64; 1571 break; 1572 case MVT::f64: 1573 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64; 1574 break; 1575 } 1576 break; 1577 case NVPTXISD::LDUV2: 1578 switch (EltVT.getSimpleVT().SimpleTy) { 1579 default: 1580 return false; 1581 case MVT::i8: 1582 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64; 1583 break; 1584 case MVT::i16: 1585 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64; 1586 break; 1587 case MVT::i32: 1588 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64; 1589 break; 1590 case MVT::i64: 1591 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64; 1592 break; 1593 case MVT::f32: 1594 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64; 1595 break; 1596 case MVT::f64: 1597 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64; 1598 break; 1599 } 1600 break; 1601 case NVPTXISD::LoadV4: 1602 case NVPTXISD::LDGV4: 1603 switch (EltVT.getSimpleVT().SimpleTy) { 1604 default: 1605 return false; 1606 case MVT::i8: 1607 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64; 1608 break; 1609 case MVT::i16: 1610 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64; 1611 break; 1612 case MVT::i32: 1613 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64; 1614 break; 1615 case MVT::f32: 1616 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64; 1617 break; 1618 } 1619 break; 1620 case NVPTXISD::LDUV4: 1621 switch (EltVT.getSimpleVT().SimpleTy) { 1622 default: 1623 return false; 1624 case MVT::i8: 1625 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64; 1626 break; 1627 case MVT::i16: 1628 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64; 1629 break; 1630 case MVT::i32: 1631 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64; 1632 break; 1633 case MVT::f32: 1634 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64; 1635 break; 1636 } 1637 break; 1638 } 1639 } else { 1640 switch (N->getOpcode()) { 1641 default: 1642 return false; 1643 case ISD::LOAD: 1644 case ISD::INTRINSIC_W_CHAIN: 1645 if (IsLDG) { 1646 switch (EltVT.getSimpleVT().SimpleTy) { 1647 default: 1648 return false; 1649 case MVT::i8: 1650 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari; 1651 break; 1652 case MVT::i16: 1653 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari; 1654 break; 1655 case MVT::i32: 1656 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari; 1657 break; 1658 case MVT::i64: 1659 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari; 1660 break; 1661 case MVT::f32: 1662 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari; 1663 break; 1664 case MVT::f64: 1665 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari; 1666 break; 1667 } 1668 } else { 1669 switch (EltVT.getSimpleVT().SimpleTy) { 1670 default: 1671 return false; 1672 case MVT::i8: 1673 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari; 1674 break; 1675 case MVT::i16: 1676 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari; 1677 break; 1678 case MVT::i32: 1679 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari; 1680 break; 1681 case MVT::i64: 1682 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari; 1683 break; 1684 case MVT::f32: 1685 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari; 1686 break; 1687 case MVT::f64: 1688 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari; 1689 break; 1690 } 1691 } 1692 break; 1693 case NVPTXISD::LoadV2: 1694 case NVPTXISD::LDGV2: 1695 switch (EltVT.getSimpleVT().SimpleTy) { 1696 default: 1697 return false; 1698 case MVT::i8: 1699 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32; 1700 break; 1701 case MVT::i16: 1702 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32; 1703 break; 1704 case MVT::i32: 1705 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32; 1706 break; 1707 case MVT::i64: 1708 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32; 1709 break; 1710 case MVT::f32: 1711 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32; 1712 break; 1713 case MVT::f64: 1714 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32; 1715 break; 1716 } 1717 break; 1718 case NVPTXISD::LDUV2: 1719 switch (EltVT.getSimpleVT().SimpleTy) { 1720 default: 1721 return false; 1722 case MVT::i8: 1723 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32; 1724 break; 1725 case MVT::i16: 1726 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32; 1727 break; 1728 case MVT::i32: 1729 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32; 1730 break; 1731 case MVT::i64: 1732 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32; 1733 break; 1734 case MVT::f32: 1735 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32; 1736 break; 1737 case MVT::f64: 1738 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32; 1739 break; 1740 } 1741 break; 1742 case NVPTXISD::LoadV4: 1743 case NVPTXISD::LDGV4: 1744 switch (EltVT.getSimpleVT().SimpleTy) { 1745 default: 1746 return false; 1747 case MVT::i8: 1748 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32; 1749 break; 1750 case MVT::i16: 1751 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32; 1752 break; 1753 case MVT::i32: 1754 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32; 1755 break; 1756 case MVT::f32: 1757 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32; 1758 break; 1759 } 1760 break; 1761 case NVPTXISD::LDUV4: 1762 switch (EltVT.getSimpleVT().SimpleTy) { 1763 default: 1764 return false; 1765 case MVT::i8: 1766 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32; 1767 break; 1768 case MVT::i16: 1769 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32; 1770 break; 1771 case MVT::i32: 1772 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32; 1773 break; 1774 case MVT::f32: 1775 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32; 1776 break; 1777 } 1778 break; 1779 } 1780 } 1781 1782 SDValue Ops[] = { Base, Offset, Chain }; 1783 1784 LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops); 1785 } else { 1786 if (TM.is64Bit()) { 1787 switch (N->getOpcode()) { 1788 default: 1789 return false; 1790 case ISD::LOAD: 1791 case ISD::INTRINSIC_W_CHAIN: 1792 if (IsLDG) { 1793 switch (EltVT.getSimpleVT().SimpleTy) { 1794 default: 1795 return false; 1796 case MVT::i8: 1797 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64; 1798 break; 1799 case MVT::i16: 1800 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64; 1801 break; 1802 case MVT::i32: 1803 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64; 1804 break; 1805 case MVT::i64: 1806 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64; 1807 break; 1808 case MVT::f32: 1809 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64; 1810 break; 1811 case MVT::f64: 1812 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64; 1813 break; 1814 } 1815 } else { 1816 switch (EltVT.getSimpleVT().SimpleTy) { 1817 default: 1818 return false; 1819 case MVT::i8: 1820 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64; 1821 break; 1822 case MVT::i16: 1823 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64; 1824 break; 1825 case MVT::i32: 1826 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64; 1827 break; 1828 case MVT::i64: 1829 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64; 1830 break; 1831 case MVT::f32: 1832 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64; 1833 break; 1834 case MVT::f64: 1835 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64; 1836 break; 1837 } 1838 } 1839 break; 1840 case NVPTXISD::LoadV2: 1841 case NVPTXISD::LDGV2: 1842 switch (EltVT.getSimpleVT().SimpleTy) { 1843 default: 1844 return false; 1845 case MVT::i8: 1846 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64; 1847 break; 1848 case MVT::i16: 1849 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64; 1850 break; 1851 case MVT::i32: 1852 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64; 1853 break; 1854 case MVT::i64: 1855 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64; 1856 break; 1857 case MVT::f32: 1858 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64; 1859 break; 1860 case MVT::f64: 1861 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64; 1862 break; 1863 } 1864 break; 1865 case NVPTXISD::LDUV2: 1866 switch (EltVT.getSimpleVT().SimpleTy) { 1867 default: 1868 return false; 1869 case MVT::i8: 1870 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64; 1871 break; 1872 case MVT::i16: 1873 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64; 1874 break; 1875 case MVT::i32: 1876 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64; 1877 break; 1878 case MVT::i64: 1879 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64; 1880 break; 1881 case MVT::f32: 1882 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64; 1883 break; 1884 case MVT::f64: 1885 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64; 1886 break; 1887 } 1888 break; 1889 case NVPTXISD::LoadV4: 1890 case NVPTXISD::LDGV4: 1891 switch (EltVT.getSimpleVT().SimpleTy) { 1892 default: 1893 return false; 1894 case MVT::i8: 1895 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64; 1896 break; 1897 case MVT::i16: 1898 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64; 1899 break; 1900 case MVT::i32: 1901 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64; 1902 break; 1903 case MVT::f32: 1904 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64; 1905 break; 1906 } 1907 break; 1908 case NVPTXISD::LDUV4: 1909 switch (EltVT.getSimpleVT().SimpleTy) { 1910 default: 1911 return false; 1912 case MVT::i8: 1913 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64; 1914 break; 1915 case MVT::i16: 1916 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64; 1917 break; 1918 case MVT::i32: 1919 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64; 1920 break; 1921 case MVT::f32: 1922 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64; 1923 break; 1924 } 1925 break; 1926 } 1927 } else { 1928 switch (N->getOpcode()) { 1929 default: 1930 return false; 1931 case ISD::LOAD: 1932 case ISD::INTRINSIC_W_CHAIN: 1933 if (IsLDG) { 1934 switch (EltVT.getSimpleVT().SimpleTy) { 1935 default: 1936 return false; 1937 case MVT::i8: 1938 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg; 1939 break; 1940 case MVT::i16: 1941 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg; 1942 break; 1943 case MVT::i32: 1944 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg; 1945 break; 1946 case MVT::i64: 1947 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg; 1948 break; 1949 case MVT::f32: 1950 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg; 1951 break; 1952 case MVT::f64: 1953 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg; 1954 break; 1955 } 1956 } else { 1957 switch (EltVT.getSimpleVT().SimpleTy) { 1958 default: 1959 return false; 1960 case MVT::i8: 1961 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg; 1962 break; 1963 case MVT::i16: 1964 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg; 1965 break; 1966 case MVT::i32: 1967 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg; 1968 break; 1969 case MVT::i64: 1970 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg; 1971 break; 1972 case MVT::f32: 1973 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg; 1974 break; 1975 case MVT::f64: 1976 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg; 1977 break; 1978 } 1979 } 1980 break; 1981 case NVPTXISD::LoadV2: 1982 case NVPTXISD::LDGV2: 1983 switch (EltVT.getSimpleVT().SimpleTy) { 1984 default: 1985 return false; 1986 case MVT::i8: 1987 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32; 1988 break; 1989 case MVT::i16: 1990 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32; 1991 break; 1992 case MVT::i32: 1993 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32; 1994 break; 1995 case MVT::i64: 1996 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32; 1997 break; 1998 case MVT::f32: 1999 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32; 2000 break; 2001 case MVT::f64: 2002 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32; 2003 break; 2004 } 2005 break; 2006 case NVPTXISD::LDUV2: 2007 switch (EltVT.getSimpleVT().SimpleTy) { 2008 default: 2009 return false; 2010 case MVT::i8: 2011 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32; 2012 break; 2013 case MVT::i16: 2014 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32; 2015 break; 2016 case MVT::i32: 2017 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32; 2018 break; 2019 case MVT::i64: 2020 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32; 2021 break; 2022 case MVT::f32: 2023 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32; 2024 break; 2025 case MVT::f64: 2026 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32; 2027 break; 2028 } 2029 break; 2030 case NVPTXISD::LoadV4: 2031 case NVPTXISD::LDGV4: 2032 switch (EltVT.getSimpleVT().SimpleTy) { 2033 default: 2034 return false; 2035 case MVT::i8: 2036 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32; 2037 break; 2038 case MVT::i16: 2039 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32; 2040 break; 2041 case MVT::i32: 2042 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32; 2043 break; 2044 case MVT::f32: 2045 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32; 2046 break; 2047 } 2048 break; 2049 case NVPTXISD::LDUV4: 2050 switch (EltVT.getSimpleVT().SimpleTy) { 2051 default: 2052 return false; 2053 case MVT::i8: 2054 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32; 2055 break; 2056 case MVT::i16: 2057 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32; 2058 break; 2059 case MVT::i32: 2060 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32; 2061 break; 2062 case MVT::f32: 2063 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32; 2064 break; 2065 } 2066 break; 2067 } 2068 } 2069 2070 SDValue Ops[] = { Op1, Chain }; 2071 LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops); 2072 } 2073 2074 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 2075 MemRefs0[0] = Mem->getMemOperand(); 2076 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1); 2077 2078 // For automatic generation of LDG (through SelectLoad[Vector], not the 2079 // intrinsics), we may have an extending load like: 2080 // 2081 // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64 2082 // 2083 // In this case, the matching logic above will select a load for the original 2084 // memory type (in this case, i8) and our types will not match (the node needs 2085 // to return an i32 in this case). Our LDG/LDU nodes do not support the 2086 // concept of sign-/zero-extension, so emulate it here by adding an explicit 2087 // CVT instruction. Ptxas should clean up any redundancies here. 2088 2089 EVT OrigType = N->getValueType(0); 2090 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N); 2091 2092 if (OrigType != EltVT && LdNode) { 2093 // We have an extending-load. The instruction we selected operates on the 2094 // smaller type, but the SDNode we are replacing has the larger type. We 2095 // need to emit a CVT to make the types match. 2096 bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD; 2097 unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(), 2098 EltVT.getSimpleVT(), IsSigned); 2099 2100 // For each output value, apply the manual sign/zero-extension and make sure 2101 // all users of the load go through that CVT. 2102 for (unsigned i = 0; i != NumElts; ++i) { 2103 SDValue Res(LD, i); 2104 SDValue OrigVal(N, i); 2105 2106 SDNode *CvtNode = 2107 CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res, 2108 CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, 2109 DL, MVT::i32)); 2110 ReplaceUses(OrigVal, SDValue(CvtNode, 0)); 2111 } 2112 } 2113 2114 ReplaceNode(N, LD); 2115 return true; 2116 } 2117 2118 bool NVPTXDAGToDAGISel::tryStore(SDNode *N) { 2119 SDLoc dl(N); 2120 StoreSDNode *ST = cast<StoreSDNode>(N); 2121 EVT StoreVT = ST->getMemoryVT(); 2122 SDNode *NVPTXST = nullptr; 2123 2124 // do not support pre/post inc/dec 2125 if (ST->isIndexed()) 2126 return false; 2127 2128 if (!StoreVT.isSimple()) 2129 return false; 2130 2131 // Address Space Setting 2132 unsigned int codeAddrSpace = getCodeAddrSpace(ST); 2133 2134 // Volatile Setting 2135 // - .volatile is only availalble for .global and .shared 2136 bool isVolatile = ST->isVolatile(); 2137 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 2138 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 2139 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 2140 isVolatile = false; 2141 2142 // Vector Setting 2143 MVT SimpleVT = StoreVT.getSimpleVT(); 2144 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; 2145 if (SimpleVT.isVector()) { 2146 unsigned num = SimpleVT.getVectorNumElements(); 2147 if (num == 2) 2148 vecType = NVPTX::PTXLdStInstCode::V2; 2149 else if (num == 4) 2150 vecType = NVPTX::PTXLdStInstCode::V4; 2151 else 2152 return false; 2153 } 2154 2155 // Type Setting: toType + toTypeWidth 2156 // - for integer type, always use 'u' 2157 // 2158 MVT ScalarVT = SimpleVT.getScalarType(); 2159 unsigned toTypeWidth = ScalarVT.getSizeInBits(); 2160 unsigned int toType; 2161 if (ScalarVT.isFloatingPoint()) 2162 toType = NVPTX::PTXLdStInstCode::Float; 2163 else 2164 toType = NVPTX::PTXLdStInstCode::Unsigned; 2165 2166 // Create the machine instruction DAG 2167 SDValue Chain = N->getOperand(0); 2168 SDValue N1 = N->getOperand(1); 2169 SDValue N2 = N->getOperand(2); 2170 SDValue Addr; 2171 SDValue Offset, Base; 2172 unsigned Opcode; 2173 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy; 2174 2175 if (SelectDirectAddr(N2, Addr)) { 2176 switch (SourceVT) { 2177 case MVT::i8: 2178 Opcode = NVPTX::ST_i8_avar; 2179 break; 2180 case MVT::i16: 2181 Opcode = NVPTX::ST_i16_avar; 2182 break; 2183 case MVT::i32: 2184 Opcode = NVPTX::ST_i32_avar; 2185 break; 2186 case MVT::i64: 2187 Opcode = NVPTX::ST_i64_avar; 2188 break; 2189 case MVT::f32: 2190 Opcode = NVPTX::ST_f32_avar; 2191 break; 2192 case MVT::f64: 2193 Opcode = NVPTX::ST_f64_avar; 2194 break; 2195 default: 2196 return false; 2197 } 2198 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl), 2199 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl), 2200 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr, 2201 Chain }; 2202 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); 2203 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) 2204 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { 2205 switch (SourceVT) { 2206 case MVT::i8: 2207 Opcode = NVPTX::ST_i8_asi; 2208 break; 2209 case MVT::i16: 2210 Opcode = NVPTX::ST_i16_asi; 2211 break; 2212 case MVT::i32: 2213 Opcode = NVPTX::ST_i32_asi; 2214 break; 2215 case MVT::i64: 2216 Opcode = NVPTX::ST_i64_asi; 2217 break; 2218 case MVT::f32: 2219 Opcode = NVPTX::ST_f32_asi; 2220 break; 2221 case MVT::f64: 2222 Opcode = NVPTX::ST_f64_asi; 2223 break; 2224 default: 2225 return false; 2226 } 2227 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl), 2228 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl), 2229 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base, 2230 Offset, Chain }; 2231 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); 2232 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset) 2233 : SelectADDRri(N2.getNode(), N2, Base, Offset)) { 2234 if (TM.is64Bit()) { 2235 switch (SourceVT) { 2236 case MVT::i8: 2237 Opcode = NVPTX::ST_i8_ari_64; 2238 break; 2239 case MVT::i16: 2240 Opcode = NVPTX::ST_i16_ari_64; 2241 break; 2242 case MVT::i32: 2243 Opcode = NVPTX::ST_i32_ari_64; 2244 break; 2245 case MVT::i64: 2246 Opcode = NVPTX::ST_i64_ari_64; 2247 break; 2248 case MVT::f32: 2249 Opcode = NVPTX::ST_f32_ari_64; 2250 break; 2251 case MVT::f64: 2252 Opcode = NVPTX::ST_f64_ari_64; 2253 break; 2254 default: 2255 return false; 2256 } 2257 } else { 2258 switch (SourceVT) { 2259 case MVT::i8: 2260 Opcode = NVPTX::ST_i8_ari; 2261 break; 2262 case MVT::i16: 2263 Opcode = NVPTX::ST_i16_ari; 2264 break; 2265 case MVT::i32: 2266 Opcode = NVPTX::ST_i32_ari; 2267 break; 2268 case MVT::i64: 2269 Opcode = NVPTX::ST_i64_ari; 2270 break; 2271 case MVT::f32: 2272 Opcode = NVPTX::ST_f32_ari; 2273 break; 2274 case MVT::f64: 2275 Opcode = NVPTX::ST_f64_ari; 2276 break; 2277 default: 2278 return false; 2279 } 2280 } 2281 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl), 2282 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl), 2283 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base, 2284 Offset, Chain }; 2285 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); 2286 } else { 2287 if (TM.is64Bit()) { 2288 switch (SourceVT) { 2289 case MVT::i8: 2290 Opcode = NVPTX::ST_i8_areg_64; 2291 break; 2292 case MVT::i16: 2293 Opcode = NVPTX::ST_i16_areg_64; 2294 break; 2295 case MVT::i32: 2296 Opcode = NVPTX::ST_i32_areg_64; 2297 break; 2298 case MVT::i64: 2299 Opcode = NVPTX::ST_i64_areg_64; 2300 break; 2301 case MVT::f32: 2302 Opcode = NVPTX::ST_f32_areg_64; 2303 break; 2304 case MVT::f64: 2305 Opcode = NVPTX::ST_f64_areg_64; 2306 break; 2307 default: 2308 return false; 2309 } 2310 } else { 2311 switch (SourceVT) { 2312 case MVT::i8: 2313 Opcode = NVPTX::ST_i8_areg; 2314 break; 2315 case MVT::i16: 2316 Opcode = NVPTX::ST_i16_areg; 2317 break; 2318 case MVT::i32: 2319 Opcode = NVPTX::ST_i32_areg; 2320 break; 2321 case MVT::i64: 2322 Opcode = NVPTX::ST_i64_areg; 2323 break; 2324 case MVT::f32: 2325 Opcode = NVPTX::ST_f32_areg; 2326 break; 2327 case MVT::f64: 2328 Opcode = NVPTX::ST_f64_areg; 2329 break; 2330 default: 2331 return false; 2332 } 2333 } 2334 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl), 2335 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl), 2336 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2, 2337 Chain }; 2338 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); 2339 } 2340 2341 if (!NVPTXST) 2342 return false; 2343 2344 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 2345 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 2346 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1); 2347 ReplaceNode(N, NVPTXST); 2348 return true; 2349 } 2350 2351 bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) { 2352 SDValue Chain = N->getOperand(0); 2353 SDValue Op1 = N->getOperand(1); 2354 SDValue Addr, Offset, Base; 2355 unsigned Opcode; 2356 SDLoc DL(N); 2357 SDNode *ST; 2358 EVT EltVT = Op1.getValueType(); 2359 MemSDNode *MemSD = cast<MemSDNode>(N); 2360 EVT StoreVT = MemSD->getMemoryVT(); 2361 2362 // Address Space Setting 2363 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD); 2364 2365 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) { 2366 report_fatal_error("Cannot store to pointer that points to constant " 2367 "memory space"); 2368 } 2369 2370 // Volatile Setting 2371 // - .volatile is only availalble for .global and .shared 2372 bool IsVolatile = MemSD->isVolatile(); 2373 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && 2374 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && 2375 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) 2376 IsVolatile = false; 2377 2378 // Type Setting: toType + toTypeWidth 2379 // - for integer type, always use 'u' 2380 assert(StoreVT.isSimple() && "Store value is not simple"); 2381 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType(); 2382 unsigned ToTypeWidth = ScalarVT.getSizeInBits(); 2383 unsigned ToType; 2384 if (ScalarVT.isFloatingPoint()) 2385 ToType = NVPTX::PTXLdStInstCode::Float; 2386 else 2387 ToType = NVPTX::PTXLdStInstCode::Unsigned; 2388 2389 SmallVector<SDValue, 12> StOps; 2390 SDValue N2; 2391 unsigned VecType; 2392 2393 switch (N->getOpcode()) { 2394 case NVPTXISD::StoreV2: 2395 VecType = NVPTX::PTXLdStInstCode::V2; 2396 StOps.push_back(N->getOperand(1)); 2397 StOps.push_back(N->getOperand(2)); 2398 N2 = N->getOperand(3); 2399 break; 2400 case NVPTXISD::StoreV4: 2401 VecType = NVPTX::PTXLdStInstCode::V4; 2402 StOps.push_back(N->getOperand(1)); 2403 StOps.push_back(N->getOperand(2)); 2404 StOps.push_back(N->getOperand(3)); 2405 StOps.push_back(N->getOperand(4)); 2406 N2 = N->getOperand(5); 2407 break; 2408 default: 2409 return false; 2410 } 2411 2412 StOps.push_back(getI32Imm(IsVolatile, DL)); 2413 StOps.push_back(getI32Imm(CodeAddrSpace, DL)); 2414 StOps.push_back(getI32Imm(VecType, DL)); 2415 StOps.push_back(getI32Imm(ToType, DL)); 2416 StOps.push_back(getI32Imm(ToTypeWidth, DL)); 2417 2418 if (SelectDirectAddr(N2, Addr)) { 2419 switch (N->getOpcode()) { 2420 default: 2421 return false; 2422 case NVPTXISD::StoreV2: 2423 switch (EltVT.getSimpleVT().SimpleTy) { 2424 default: 2425 return false; 2426 case MVT::i8: 2427 Opcode = NVPTX::STV_i8_v2_avar; 2428 break; 2429 case MVT::i16: 2430 Opcode = NVPTX::STV_i16_v2_avar; 2431 break; 2432 case MVT::i32: 2433 Opcode = NVPTX::STV_i32_v2_avar; 2434 break; 2435 case MVT::i64: 2436 Opcode = NVPTX::STV_i64_v2_avar; 2437 break; 2438 case MVT::f32: 2439 Opcode = NVPTX::STV_f32_v2_avar; 2440 break; 2441 case MVT::f64: 2442 Opcode = NVPTX::STV_f64_v2_avar; 2443 break; 2444 } 2445 break; 2446 case NVPTXISD::StoreV4: 2447 switch (EltVT.getSimpleVT().SimpleTy) { 2448 default: 2449 return false; 2450 case MVT::i8: 2451 Opcode = NVPTX::STV_i8_v4_avar; 2452 break; 2453 case MVT::i16: 2454 Opcode = NVPTX::STV_i16_v4_avar; 2455 break; 2456 case MVT::i32: 2457 Opcode = NVPTX::STV_i32_v4_avar; 2458 break; 2459 case MVT::f32: 2460 Opcode = NVPTX::STV_f32_v4_avar; 2461 break; 2462 } 2463 break; 2464 } 2465 StOps.push_back(Addr); 2466 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) 2467 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { 2468 switch (N->getOpcode()) { 2469 default: 2470 return false; 2471 case NVPTXISD::StoreV2: 2472 switch (EltVT.getSimpleVT().SimpleTy) { 2473 default: 2474 return false; 2475 case MVT::i8: 2476 Opcode = NVPTX::STV_i8_v2_asi; 2477 break; 2478 case MVT::i16: 2479 Opcode = NVPTX::STV_i16_v2_asi; 2480 break; 2481 case MVT::i32: 2482 Opcode = NVPTX::STV_i32_v2_asi; 2483 break; 2484 case MVT::i64: 2485 Opcode = NVPTX::STV_i64_v2_asi; 2486 break; 2487 case MVT::f32: 2488 Opcode = NVPTX::STV_f32_v2_asi; 2489 break; 2490 case MVT::f64: 2491 Opcode = NVPTX::STV_f64_v2_asi; 2492 break; 2493 } 2494 break; 2495 case NVPTXISD::StoreV4: 2496 switch (EltVT.getSimpleVT().SimpleTy) { 2497 default: 2498 return false; 2499 case MVT::i8: 2500 Opcode = NVPTX::STV_i8_v4_asi; 2501 break; 2502 case MVT::i16: 2503 Opcode = NVPTX::STV_i16_v4_asi; 2504 break; 2505 case MVT::i32: 2506 Opcode = NVPTX::STV_i32_v4_asi; 2507 break; 2508 case MVT::f32: 2509 Opcode = NVPTX::STV_f32_v4_asi; 2510 break; 2511 } 2512 break; 2513 } 2514 StOps.push_back(Base); 2515 StOps.push_back(Offset); 2516 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset) 2517 : SelectADDRri(N2.getNode(), N2, Base, Offset)) { 2518 if (TM.is64Bit()) { 2519 switch (N->getOpcode()) { 2520 default: 2521 return false; 2522 case NVPTXISD::StoreV2: 2523 switch (EltVT.getSimpleVT().SimpleTy) { 2524 default: 2525 return false; 2526 case MVT::i8: 2527 Opcode = NVPTX::STV_i8_v2_ari_64; 2528 break; 2529 case MVT::i16: 2530 Opcode = NVPTX::STV_i16_v2_ari_64; 2531 break; 2532 case MVT::i32: 2533 Opcode = NVPTX::STV_i32_v2_ari_64; 2534 break; 2535 case MVT::i64: 2536 Opcode = NVPTX::STV_i64_v2_ari_64; 2537 break; 2538 case MVT::f32: 2539 Opcode = NVPTX::STV_f32_v2_ari_64; 2540 break; 2541 case MVT::f64: 2542 Opcode = NVPTX::STV_f64_v2_ari_64; 2543 break; 2544 } 2545 break; 2546 case NVPTXISD::StoreV4: 2547 switch (EltVT.getSimpleVT().SimpleTy) { 2548 default: 2549 return false; 2550 case MVT::i8: 2551 Opcode = NVPTX::STV_i8_v4_ari_64; 2552 break; 2553 case MVT::i16: 2554 Opcode = NVPTX::STV_i16_v4_ari_64; 2555 break; 2556 case MVT::i32: 2557 Opcode = NVPTX::STV_i32_v4_ari_64; 2558 break; 2559 case MVT::f32: 2560 Opcode = NVPTX::STV_f32_v4_ari_64; 2561 break; 2562 } 2563 break; 2564 } 2565 } else { 2566 switch (N->getOpcode()) { 2567 default: 2568 return false; 2569 case NVPTXISD::StoreV2: 2570 switch (EltVT.getSimpleVT().SimpleTy) { 2571 default: 2572 return false; 2573 case MVT::i8: 2574 Opcode = NVPTX::STV_i8_v2_ari; 2575 break; 2576 case MVT::i16: 2577 Opcode = NVPTX::STV_i16_v2_ari; 2578 break; 2579 case MVT::i32: 2580 Opcode = NVPTX::STV_i32_v2_ari; 2581 break; 2582 case MVT::i64: 2583 Opcode = NVPTX::STV_i64_v2_ari; 2584 break; 2585 case MVT::f32: 2586 Opcode = NVPTX::STV_f32_v2_ari; 2587 break; 2588 case MVT::f64: 2589 Opcode = NVPTX::STV_f64_v2_ari; 2590 break; 2591 } 2592 break; 2593 case NVPTXISD::StoreV4: 2594 switch (EltVT.getSimpleVT().SimpleTy) { 2595 default: 2596 return false; 2597 case MVT::i8: 2598 Opcode = NVPTX::STV_i8_v4_ari; 2599 break; 2600 case MVT::i16: 2601 Opcode = NVPTX::STV_i16_v4_ari; 2602 break; 2603 case MVT::i32: 2604 Opcode = NVPTX::STV_i32_v4_ari; 2605 break; 2606 case MVT::f32: 2607 Opcode = NVPTX::STV_f32_v4_ari; 2608 break; 2609 } 2610 break; 2611 } 2612 } 2613 StOps.push_back(Base); 2614 StOps.push_back(Offset); 2615 } else { 2616 if (TM.is64Bit()) { 2617 switch (N->getOpcode()) { 2618 default: 2619 return false; 2620 case NVPTXISD::StoreV2: 2621 switch (EltVT.getSimpleVT().SimpleTy) { 2622 default: 2623 return false; 2624 case MVT::i8: 2625 Opcode = NVPTX::STV_i8_v2_areg_64; 2626 break; 2627 case MVT::i16: 2628 Opcode = NVPTX::STV_i16_v2_areg_64; 2629 break; 2630 case MVT::i32: 2631 Opcode = NVPTX::STV_i32_v2_areg_64; 2632 break; 2633 case MVT::i64: 2634 Opcode = NVPTX::STV_i64_v2_areg_64; 2635 break; 2636 case MVT::f32: 2637 Opcode = NVPTX::STV_f32_v2_areg_64; 2638 break; 2639 case MVT::f64: 2640 Opcode = NVPTX::STV_f64_v2_areg_64; 2641 break; 2642 } 2643 break; 2644 case NVPTXISD::StoreV4: 2645 switch (EltVT.getSimpleVT().SimpleTy) { 2646 default: 2647 return false; 2648 case MVT::i8: 2649 Opcode = NVPTX::STV_i8_v4_areg_64; 2650 break; 2651 case MVT::i16: 2652 Opcode = NVPTX::STV_i16_v4_areg_64; 2653 break; 2654 case MVT::i32: 2655 Opcode = NVPTX::STV_i32_v4_areg_64; 2656 break; 2657 case MVT::f32: 2658 Opcode = NVPTX::STV_f32_v4_areg_64; 2659 break; 2660 } 2661 break; 2662 } 2663 } else { 2664 switch (N->getOpcode()) { 2665 default: 2666 return false; 2667 case NVPTXISD::StoreV2: 2668 switch (EltVT.getSimpleVT().SimpleTy) { 2669 default: 2670 return false; 2671 case MVT::i8: 2672 Opcode = NVPTX::STV_i8_v2_areg; 2673 break; 2674 case MVT::i16: 2675 Opcode = NVPTX::STV_i16_v2_areg; 2676 break; 2677 case MVT::i32: 2678 Opcode = NVPTX::STV_i32_v2_areg; 2679 break; 2680 case MVT::i64: 2681 Opcode = NVPTX::STV_i64_v2_areg; 2682 break; 2683 case MVT::f32: 2684 Opcode = NVPTX::STV_f32_v2_areg; 2685 break; 2686 case MVT::f64: 2687 Opcode = NVPTX::STV_f64_v2_areg; 2688 break; 2689 } 2690 break; 2691 case NVPTXISD::StoreV4: 2692 switch (EltVT.getSimpleVT().SimpleTy) { 2693 default: 2694 return false; 2695 case MVT::i8: 2696 Opcode = NVPTX::STV_i8_v4_areg; 2697 break; 2698 case MVT::i16: 2699 Opcode = NVPTX::STV_i16_v4_areg; 2700 break; 2701 case MVT::i32: 2702 Opcode = NVPTX::STV_i32_v4_areg; 2703 break; 2704 case MVT::f32: 2705 Opcode = NVPTX::STV_f32_v4_areg; 2706 break; 2707 } 2708 break; 2709 } 2710 } 2711 StOps.push_back(N2); 2712 } 2713 2714 StOps.push_back(Chain); 2715 2716 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps); 2717 2718 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 2719 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 2720 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1); 2721 2722 ReplaceNode(N, ST); 2723 return true; 2724 } 2725 2726 bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) { 2727 SDValue Chain = Node->getOperand(0); 2728 SDValue Offset = Node->getOperand(2); 2729 SDValue Flag = Node->getOperand(3); 2730 SDLoc DL(Node); 2731 MemSDNode *Mem = cast<MemSDNode>(Node); 2732 2733 unsigned VecSize; 2734 switch (Node->getOpcode()) { 2735 default: 2736 return false; 2737 case NVPTXISD::LoadParam: 2738 VecSize = 1; 2739 break; 2740 case NVPTXISD::LoadParamV2: 2741 VecSize = 2; 2742 break; 2743 case NVPTXISD::LoadParamV4: 2744 VecSize = 4; 2745 break; 2746 } 2747 2748 EVT EltVT = Node->getValueType(0); 2749 EVT MemVT = Mem->getMemoryVT(); 2750 2751 unsigned Opc = 0; 2752 2753 switch (VecSize) { 2754 default: 2755 return false; 2756 case 1: 2757 switch (MemVT.getSimpleVT().SimpleTy) { 2758 default: 2759 return false; 2760 case MVT::i1: 2761 Opc = NVPTX::LoadParamMemI8; 2762 break; 2763 case MVT::i8: 2764 Opc = NVPTX::LoadParamMemI8; 2765 break; 2766 case MVT::i16: 2767 Opc = NVPTX::LoadParamMemI16; 2768 break; 2769 case MVT::i32: 2770 Opc = NVPTX::LoadParamMemI32; 2771 break; 2772 case MVT::i64: 2773 Opc = NVPTX::LoadParamMemI64; 2774 break; 2775 case MVT::f32: 2776 Opc = NVPTX::LoadParamMemF32; 2777 break; 2778 case MVT::f64: 2779 Opc = NVPTX::LoadParamMemF64; 2780 break; 2781 } 2782 break; 2783 case 2: 2784 switch (MemVT.getSimpleVT().SimpleTy) { 2785 default: 2786 return false; 2787 case MVT::i1: 2788 Opc = NVPTX::LoadParamMemV2I8; 2789 break; 2790 case MVT::i8: 2791 Opc = NVPTX::LoadParamMemV2I8; 2792 break; 2793 case MVT::i16: 2794 Opc = NVPTX::LoadParamMemV2I16; 2795 break; 2796 case MVT::i32: 2797 Opc = NVPTX::LoadParamMemV2I32; 2798 break; 2799 case MVT::i64: 2800 Opc = NVPTX::LoadParamMemV2I64; 2801 break; 2802 case MVT::f32: 2803 Opc = NVPTX::LoadParamMemV2F32; 2804 break; 2805 case MVT::f64: 2806 Opc = NVPTX::LoadParamMemV2F64; 2807 break; 2808 } 2809 break; 2810 case 4: 2811 switch (MemVT.getSimpleVT().SimpleTy) { 2812 default: 2813 return false; 2814 case MVT::i1: 2815 Opc = NVPTX::LoadParamMemV4I8; 2816 break; 2817 case MVT::i8: 2818 Opc = NVPTX::LoadParamMemV4I8; 2819 break; 2820 case MVT::i16: 2821 Opc = NVPTX::LoadParamMemV4I16; 2822 break; 2823 case MVT::i32: 2824 Opc = NVPTX::LoadParamMemV4I32; 2825 break; 2826 case MVT::f32: 2827 Opc = NVPTX::LoadParamMemV4F32; 2828 break; 2829 } 2830 break; 2831 } 2832 2833 SDVTList VTs; 2834 if (VecSize == 1) { 2835 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue); 2836 } else if (VecSize == 2) { 2837 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue); 2838 } else { 2839 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue }; 2840 VTs = CurDAG->getVTList(EVTs); 2841 } 2842 2843 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); 2844 2845 SmallVector<SDValue, 2> Ops; 2846 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32)); 2847 Ops.push_back(Chain); 2848 Ops.push_back(Flag); 2849 2850 ReplaceNode(Node, CurDAG->getMachineNode(Opc, DL, VTs, Ops)); 2851 return true; 2852 } 2853 2854 bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) { 2855 SDLoc DL(N); 2856 SDValue Chain = N->getOperand(0); 2857 SDValue Offset = N->getOperand(1); 2858 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); 2859 MemSDNode *Mem = cast<MemSDNode>(N); 2860 2861 // How many elements do we have? 2862 unsigned NumElts = 1; 2863 switch (N->getOpcode()) { 2864 default: 2865 return false; 2866 case NVPTXISD::StoreRetval: 2867 NumElts = 1; 2868 break; 2869 case NVPTXISD::StoreRetvalV2: 2870 NumElts = 2; 2871 break; 2872 case NVPTXISD::StoreRetvalV4: 2873 NumElts = 4; 2874 break; 2875 } 2876 2877 // Build vector of operands 2878 SmallVector<SDValue, 6> Ops; 2879 for (unsigned i = 0; i < NumElts; ++i) 2880 Ops.push_back(N->getOperand(i + 2)); 2881 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32)); 2882 Ops.push_back(Chain); 2883 2884 // Determine target opcode 2885 // If we have an i1, use an 8-bit store. The lowering code in 2886 // NVPTXISelLowering will have already emitted an upcast. 2887 unsigned Opcode = 0; 2888 switch (NumElts) { 2889 default: 2890 return false; 2891 case 1: 2892 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 2893 default: 2894 return false; 2895 case MVT::i1: 2896 Opcode = NVPTX::StoreRetvalI8; 2897 break; 2898 case MVT::i8: 2899 Opcode = NVPTX::StoreRetvalI8; 2900 break; 2901 case MVT::i16: 2902 Opcode = NVPTX::StoreRetvalI16; 2903 break; 2904 case MVT::i32: 2905 Opcode = NVPTX::StoreRetvalI32; 2906 break; 2907 case MVT::i64: 2908 Opcode = NVPTX::StoreRetvalI64; 2909 break; 2910 case MVT::f32: 2911 Opcode = NVPTX::StoreRetvalF32; 2912 break; 2913 case MVT::f64: 2914 Opcode = NVPTX::StoreRetvalF64; 2915 break; 2916 } 2917 break; 2918 case 2: 2919 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 2920 default: 2921 return false; 2922 case MVT::i1: 2923 Opcode = NVPTX::StoreRetvalV2I8; 2924 break; 2925 case MVT::i8: 2926 Opcode = NVPTX::StoreRetvalV2I8; 2927 break; 2928 case MVT::i16: 2929 Opcode = NVPTX::StoreRetvalV2I16; 2930 break; 2931 case MVT::i32: 2932 Opcode = NVPTX::StoreRetvalV2I32; 2933 break; 2934 case MVT::i64: 2935 Opcode = NVPTX::StoreRetvalV2I64; 2936 break; 2937 case MVT::f32: 2938 Opcode = NVPTX::StoreRetvalV2F32; 2939 break; 2940 case MVT::f64: 2941 Opcode = NVPTX::StoreRetvalV2F64; 2942 break; 2943 } 2944 break; 2945 case 4: 2946 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 2947 default: 2948 return false; 2949 case MVT::i1: 2950 Opcode = NVPTX::StoreRetvalV4I8; 2951 break; 2952 case MVT::i8: 2953 Opcode = NVPTX::StoreRetvalV4I8; 2954 break; 2955 case MVT::i16: 2956 Opcode = NVPTX::StoreRetvalV4I16; 2957 break; 2958 case MVT::i32: 2959 Opcode = NVPTX::StoreRetvalV4I32; 2960 break; 2961 case MVT::f32: 2962 Opcode = NVPTX::StoreRetvalV4F32; 2963 break; 2964 } 2965 break; 2966 } 2967 2968 SDNode *Ret = 2969 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops); 2970 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 2971 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 2972 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1); 2973 2974 ReplaceNode(N, Ret); 2975 return true; 2976 } 2977 2978 bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) { 2979 SDLoc DL(N); 2980 SDValue Chain = N->getOperand(0); 2981 SDValue Param = N->getOperand(1); 2982 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue(); 2983 SDValue Offset = N->getOperand(2); 2984 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); 2985 MemSDNode *Mem = cast<MemSDNode>(N); 2986 SDValue Flag = N->getOperand(N->getNumOperands() - 1); 2987 2988 // How many elements do we have? 2989 unsigned NumElts = 1; 2990 switch (N->getOpcode()) { 2991 default: 2992 return false; 2993 case NVPTXISD::StoreParamU32: 2994 case NVPTXISD::StoreParamS32: 2995 case NVPTXISD::StoreParam: 2996 NumElts = 1; 2997 break; 2998 case NVPTXISD::StoreParamV2: 2999 NumElts = 2; 3000 break; 3001 case NVPTXISD::StoreParamV4: 3002 NumElts = 4; 3003 break; 3004 } 3005 3006 // Build vector of operands 3007 SmallVector<SDValue, 8> Ops; 3008 for (unsigned i = 0; i < NumElts; ++i) 3009 Ops.push_back(N->getOperand(i + 3)); 3010 Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32)); 3011 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32)); 3012 Ops.push_back(Chain); 3013 Ops.push_back(Flag); 3014 3015 // Determine target opcode 3016 // If we have an i1, use an 8-bit store. The lowering code in 3017 // NVPTXISelLowering will have already emitted an upcast. 3018 unsigned Opcode = 0; 3019 switch (N->getOpcode()) { 3020 default: 3021 switch (NumElts) { 3022 default: 3023 return false; 3024 case 1: 3025 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 3026 default: 3027 return false; 3028 case MVT::i1: 3029 Opcode = NVPTX::StoreParamI8; 3030 break; 3031 case MVT::i8: 3032 Opcode = NVPTX::StoreParamI8; 3033 break; 3034 case MVT::i16: 3035 Opcode = NVPTX::StoreParamI16; 3036 break; 3037 case MVT::i32: 3038 Opcode = NVPTX::StoreParamI32; 3039 break; 3040 case MVT::i64: 3041 Opcode = NVPTX::StoreParamI64; 3042 break; 3043 case MVT::f32: 3044 Opcode = NVPTX::StoreParamF32; 3045 break; 3046 case MVT::f64: 3047 Opcode = NVPTX::StoreParamF64; 3048 break; 3049 } 3050 break; 3051 case 2: 3052 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 3053 default: 3054 return false; 3055 case MVT::i1: 3056 Opcode = NVPTX::StoreParamV2I8; 3057 break; 3058 case MVT::i8: 3059 Opcode = NVPTX::StoreParamV2I8; 3060 break; 3061 case MVT::i16: 3062 Opcode = NVPTX::StoreParamV2I16; 3063 break; 3064 case MVT::i32: 3065 Opcode = NVPTX::StoreParamV2I32; 3066 break; 3067 case MVT::i64: 3068 Opcode = NVPTX::StoreParamV2I64; 3069 break; 3070 case MVT::f32: 3071 Opcode = NVPTX::StoreParamV2F32; 3072 break; 3073 case MVT::f64: 3074 Opcode = NVPTX::StoreParamV2F64; 3075 break; 3076 } 3077 break; 3078 case 4: 3079 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { 3080 default: 3081 return false; 3082 case MVT::i1: 3083 Opcode = NVPTX::StoreParamV4I8; 3084 break; 3085 case MVT::i8: 3086 Opcode = NVPTX::StoreParamV4I8; 3087 break; 3088 case MVT::i16: 3089 Opcode = NVPTX::StoreParamV4I16; 3090 break; 3091 case MVT::i32: 3092 Opcode = NVPTX::StoreParamV4I32; 3093 break; 3094 case MVT::f32: 3095 Opcode = NVPTX::StoreParamV4F32; 3096 break; 3097 } 3098 break; 3099 } 3100 break; 3101 // Special case: if we have a sign-extend/zero-extend node, insert the 3102 // conversion instruction first, and use that as the value operand to 3103 // the selected StoreParam node. 3104 case NVPTXISD::StoreParamU32: { 3105 Opcode = NVPTX::StoreParamI32; 3106 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, 3107 MVT::i32); 3108 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL, 3109 MVT::i32, Ops[0], CvtNone); 3110 Ops[0] = SDValue(Cvt, 0); 3111 break; 3112 } 3113 case NVPTXISD::StoreParamS32: { 3114 Opcode = NVPTX::StoreParamI32; 3115 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, 3116 MVT::i32); 3117 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL, 3118 MVT::i32, Ops[0], CvtNone); 3119 Ops[0] = SDValue(Cvt, 0); 3120 break; 3121 } 3122 } 3123 3124 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue); 3125 SDNode *Ret = 3126 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops); 3127 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); 3128 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); 3129 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1); 3130 3131 ReplaceNode(N, Ret); 3132 return true; 3133 } 3134 3135 bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) { 3136 SDValue Chain = N->getOperand(0); 3137 unsigned Opc = 0; 3138 SmallVector<SDValue, 8> Ops; 3139 3140 switch (N->getOpcode()) { 3141 default: return false; 3142 case NVPTXISD::Tex1DFloatS32: 3143 Opc = NVPTX::TEX_1D_F32_S32; 3144 break; 3145 case NVPTXISD::Tex1DFloatFloat: 3146 Opc = NVPTX::TEX_1D_F32_F32; 3147 break; 3148 case NVPTXISD::Tex1DFloatFloatLevel: 3149 Opc = NVPTX::TEX_1D_F32_F32_LEVEL; 3150 break; 3151 case NVPTXISD::Tex1DFloatFloatGrad: 3152 Opc = NVPTX::TEX_1D_F32_F32_GRAD; 3153 break; 3154 case NVPTXISD::Tex1DS32S32: 3155 Opc = NVPTX::TEX_1D_S32_S32; 3156 break; 3157 case NVPTXISD::Tex1DS32Float: 3158 Opc = NVPTX::TEX_1D_S32_F32; 3159 break; 3160 case NVPTXISD::Tex1DS32FloatLevel: 3161 Opc = NVPTX::TEX_1D_S32_F32_LEVEL; 3162 break; 3163 case NVPTXISD::Tex1DS32FloatGrad: 3164 Opc = NVPTX::TEX_1D_S32_F32_GRAD; 3165 break; 3166 case NVPTXISD::Tex1DU32S32: 3167 Opc = NVPTX::TEX_1D_U32_S32; 3168 break; 3169 case NVPTXISD::Tex1DU32Float: 3170 Opc = NVPTX::TEX_1D_U32_F32; 3171 break; 3172 case NVPTXISD::Tex1DU32FloatLevel: 3173 Opc = NVPTX::TEX_1D_U32_F32_LEVEL; 3174 break; 3175 case NVPTXISD::Tex1DU32FloatGrad: 3176 Opc = NVPTX::TEX_1D_U32_F32_GRAD; 3177 break; 3178 case NVPTXISD::Tex1DArrayFloatS32: 3179 Opc = NVPTX::TEX_1D_ARRAY_F32_S32; 3180 break; 3181 case NVPTXISD::Tex1DArrayFloatFloat: 3182 Opc = NVPTX::TEX_1D_ARRAY_F32_F32; 3183 break; 3184 case NVPTXISD::Tex1DArrayFloatFloatLevel: 3185 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL; 3186 break; 3187 case NVPTXISD::Tex1DArrayFloatFloatGrad: 3188 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD; 3189 break; 3190 case NVPTXISD::Tex1DArrayS32S32: 3191 Opc = NVPTX::TEX_1D_ARRAY_S32_S32; 3192 break; 3193 case NVPTXISD::Tex1DArrayS32Float: 3194 Opc = NVPTX::TEX_1D_ARRAY_S32_F32; 3195 break; 3196 case NVPTXISD::Tex1DArrayS32FloatLevel: 3197 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL; 3198 break; 3199 case NVPTXISD::Tex1DArrayS32FloatGrad: 3200 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD; 3201 break; 3202 case NVPTXISD::Tex1DArrayU32S32: 3203 Opc = NVPTX::TEX_1D_ARRAY_U32_S32; 3204 break; 3205 case NVPTXISD::Tex1DArrayU32Float: 3206 Opc = NVPTX::TEX_1D_ARRAY_U32_F32; 3207 break; 3208 case NVPTXISD::Tex1DArrayU32FloatLevel: 3209 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL; 3210 break; 3211 case NVPTXISD::Tex1DArrayU32FloatGrad: 3212 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD; 3213 break; 3214 case NVPTXISD::Tex2DFloatS32: 3215 Opc = NVPTX::TEX_2D_F32_S32; 3216 break; 3217 case NVPTXISD::Tex2DFloatFloat: 3218 Opc = NVPTX::TEX_2D_F32_F32; 3219 break; 3220 case NVPTXISD::Tex2DFloatFloatLevel: 3221 Opc = NVPTX::TEX_2D_F32_F32_LEVEL; 3222 break; 3223 case NVPTXISD::Tex2DFloatFloatGrad: 3224 Opc = NVPTX::TEX_2D_F32_F32_GRAD; 3225 break; 3226 case NVPTXISD::Tex2DS32S32: 3227 Opc = NVPTX::TEX_2D_S32_S32; 3228 break; 3229 case NVPTXISD::Tex2DS32Float: 3230 Opc = NVPTX::TEX_2D_S32_F32; 3231 break; 3232 case NVPTXISD::Tex2DS32FloatLevel: 3233 Opc = NVPTX::TEX_2D_S32_F32_LEVEL; 3234 break; 3235 case NVPTXISD::Tex2DS32FloatGrad: 3236 Opc = NVPTX::TEX_2D_S32_F32_GRAD; 3237 break; 3238 case NVPTXISD::Tex2DU32S32: 3239 Opc = NVPTX::TEX_2D_U32_S32; 3240 break; 3241 case NVPTXISD::Tex2DU32Float: 3242 Opc = NVPTX::TEX_2D_U32_F32; 3243 break; 3244 case NVPTXISD::Tex2DU32FloatLevel: 3245 Opc = NVPTX::TEX_2D_U32_F32_LEVEL; 3246 break; 3247 case NVPTXISD::Tex2DU32FloatGrad: 3248 Opc = NVPTX::TEX_2D_U32_F32_GRAD; 3249 break; 3250 case NVPTXISD::Tex2DArrayFloatS32: 3251 Opc = NVPTX::TEX_2D_ARRAY_F32_S32; 3252 break; 3253 case NVPTXISD::Tex2DArrayFloatFloat: 3254 Opc = NVPTX::TEX_2D_ARRAY_F32_F32; 3255 break; 3256 case NVPTXISD::Tex2DArrayFloatFloatLevel: 3257 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL; 3258 break; 3259 case NVPTXISD::Tex2DArrayFloatFloatGrad: 3260 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD; 3261 break; 3262 case NVPTXISD::Tex2DArrayS32S32: 3263 Opc = NVPTX::TEX_2D_ARRAY_S32_S32; 3264 break; 3265 case NVPTXISD::Tex2DArrayS32Float: 3266 Opc = NVPTX::TEX_2D_ARRAY_S32_F32; 3267 break; 3268 case NVPTXISD::Tex2DArrayS32FloatLevel: 3269 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL; 3270 break; 3271 case NVPTXISD::Tex2DArrayS32FloatGrad: 3272 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD; 3273 break; 3274 case NVPTXISD::Tex2DArrayU32S32: 3275 Opc = NVPTX::TEX_2D_ARRAY_U32_S32; 3276 break; 3277 case NVPTXISD::Tex2DArrayU32Float: 3278 Opc = NVPTX::TEX_2D_ARRAY_U32_F32; 3279 break; 3280 case NVPTXISD::Tex2DArrayU32FloatLevel: 3281 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL; 3282 break; 3283 case NVPTXISD::Tex2DArrayU32FloatGrad: 3284 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD; 3285 break; 3286 case NVPTXISD::Tex3DFloatS32: 3287 Opc = NVPTX::TEX_3D_F32_S32; 3288 break; 3289 case NVPTXISD::Tex3DFloatFloat: 3290 Opc = NVPTX::TEX_3D_F32_F32; 3291 break; 3292 case NVPTXISD::Tex3DFloatFloatLevel: 3293 Opc = NVPTX::TEX_3D_F32_F32_LEVEL; 3294 break; 3295 case NVPTXISD::Tex3DFloatFloatGrad: 3296 Opc = NVPTX::TEX_3D_F32_F32_GRAD; 3297 break; 3298 case NVPTXISD::Tex3DS32S32: 3299 Opc = NVPTX::TEX_3D_S32_S32; 3300 break; 3301 case NVPTXISD::Tex3DS32Float: 3302 Opc = NVPTX::TEX_3D_S32_F32; 3303 break; 3304 case NVPTXISD::Tex3DS32FloatLevel: 3305 Opc = NVPTX::TEX_3D_S32_F32_LEVEL; 3306 break; 3307 case NVPTXISD::Tex3DS32FloatGrad: 3308 Opc = NVPTX::TEX_3D_S32_F32_GRAD; 3309 break; 3310 case NVPTXISD::Tex3DU32S32: 3311 Opc = NVPTX::TEX_3D_U32_S32; 3312 break; 3313 case NVPTXISD::Tex3DU32Float: 3314 Opc = NVPTX::TEX_3D_U32_F32; 3315 break; 3316 case NVPTXISD::Tex3DU32FloatLevel: 3317 Opc = NVPTX::TEX_3D_U32_F32_LEVEL; 3318 break; 3319 case NVPTXISD::Tex3DU32FloatGrad: 3320 Opc = NVPTX::TEX_3D_U32_F32_GRAD; 3321 break; 3322 case NVPTXISD::TexCubeFloatFloat: 3323 Opc = NVPTX::TEX_CUBE_F32_F32; 3324 break; 3325 case NVPTXISD::TexCubeFloatFloatLevel: 3326 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL; 3327 break; 3328 case NVPTXISD::TexCubeS32Float: 3329 Opc = NVPTX::TEX_CUBE_S32_F32; 3330 break; 3331 case NVPTXISD::TexCubeS32FloatLevel: 3332 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL; 3333 break; 3334 case NVPTXISD::TexCubeU32Float: 3335 Opc = NVPTX::TEX_CUBE_U32_F32; 3336 break; 3337 case NVPTXISD::TexCubeU32FloatLevel: 3338 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL; 3339 break; 3340 case NVPTXISD::TexCubeArrayFloatFloat: 3341 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32; 3342 break; 3343 case NVPTXISD::TexCubeArrayFloatFloatLevel: 3344 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL; 3345 break; 3346 case NVPTXISD::TexCubeArrayS32Float: 3347 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32; 3348 break; 3349 case NVPTXISD::TexCubeArrayS32FloatLevel: 3350 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL; 3351 break; 3352 case NVPTXISD::TexCubeArrayU32Float: 3353 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32; 3354 break; 3355 case NVPTXISD::TexCubeArrayU32FloatLevel: 3356 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL; 3357 break; 3358 case NVPTXISD::Tld4R2DFloatFloat: 3359 Opc = NVPTX::TLD4_R_2D_F32_F32; 3360 break; 3361 case NVPTXISD::Tld4G2DFloatFloat: 3362 Opc = NVPTX::TLD4_G_2D_F32_F32; 3363 break; 3364 case NVPTXISD::Tld4B2DFloatFloat: 3365 Opc = NVPTX::TLD4_B_2D_F32_F32; 3366 break; 3367 case NVPTXISD::Tld4A2DFloatFloat: 3368 Opc = NVPTX::TLD4_A_2D_F32_F32; 3369 break; 3370 case NVPTXISD::Tld4R2DS64Float: 3371 Opc = NVPTX::TLD4_R_2D_S32_F32; 3372 break; 3373 case NVPTXISD::Tld4G2DS64Float: 3374 Opc = NVPTX::TLD4_G_2D_S32_F32; 3375 break; 3376 case NVPTXISD::Tld4B2DS64Float: 3377 Opc = NVPTX::TLD4_B_2D_S32_F32; 3378 break; 3379 case NVPTXISD::Tld4A2DS64Float: 3380 Opc = NVPTX::TLD4_A_2D_S32_F32; 3381 break; 3382 case NVPTXISD::Tld4R2DU64Float: 3383 Opc = NVPTX::TLD4_R_2D_U32_F32; 3384 break; 3385 case NVPTXISD::Tld4G2DU64Float: 3386 Opc = NVPTX::TLD4_G_2D_U32_F32; 3387 break; 3388 case NVPTXISD::Tld4B2DU64Float: 3389 Opc = NVPTX::TLD4_B_2D_U32_F32; 3390 break; 3391 case NVPTXISD::Tld4A2DU64Float: 3392 Opc = NVPTX::TLD4_A_2D_U32_F32; 3393 break; 3394 case NVPTXISD::TexUnified1DFloatS32: 3395 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32; 3396 break; 3397 case NVPTXISD::TexUnified1DFloatFloat: 3398 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32; 3399 break; 3400 case NVPTXISD::TexUnified1DFloatFloatLevel: 3401 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL; 3402 break; 3403 case NVPTXISD::TexUnified1DFloatFloatGrad: 3404 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD; 3405 break; 3406 case NVPTXISD::TexUnified1DS32S32: 3407 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32; 3408 break; 3409 case NVPTXISD::TexUnified1DS32Float: 3410 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32; 3411 break; 3412 case NVPTXISD::TexUnified1DS32FloatLevel: 3413 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL; 3414 break; 3415 case NVPTXISD::TexUnified1DS32FloatGrad: 3416 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD; 3417 break; 3418 case NVPTXISD::TexUnified1DU32S32: 3419 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32; 3420 break; 3421 case NVPTXISD::TexUnified1DU32Float: 3422 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32; 3423 break; 3424 case NVPTXISD::TexUnified1DU32FloatLevel: 3425 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL; 3426 break; 3427 case NVPTXISD::TexUnified1DU32FloatGrad: 3428 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD; 3429 break; 3430 case NVPTXISD::TexUnified1DArrayFloatS32: 3431 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32; 3432 break; 3433 case NVPTXISD::TexUnified1DArrayFloatFloat: 3434 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32; 3435 break; 3436 case NVPTXISD::TexUnified1DArrayFloatFloatLevel: 3437 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL; 3438 break; 3439 case NVPTXISD::TexUnified1DArrayFloatFloatGrad: 3440 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD; 3441 break; 3442 case NVPTXISD::TexUnified1DArrayS32S32: 3443 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32; 3444 break; 3445 case NVPTXISD::TexUnified1DArrayS32Float: 3446 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32; 3447 break; 3448 case NVPTXISD::TexUnified1DArrayS32FloatLevel: 3449 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL; 3450 break; 3451 case NVPTXISD::TexUnified1DArrayS32FloatGrad: 3452 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD; 3453 break; 3454 case NVPTXISD::TexUnified1DArrayU32S32: 3455 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32; 3456 break; 3457 case NVPTXISD::TexUnified1DArrayU32Float: 3458 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32; 3459 break; 3460 case NVPTXISD::TexUnified1DArrayU32FloatLevel: 3461 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL; 3462 break; 3463 case NVPTXISD::TexUnified1DArrayU32FloatGrad: 3464 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD; 3465 break; 3466 case NVPTXISD::TexUnified2DFloatS32: 3467 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32; 3468 break; 3469 case NVPTXISD::TexUnified2DFloatFloat: 3470 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32; 3471 break; 3472 case NVPTXISD::TexUnified2DFloatFloatLevel: 3473 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL; 3474 break; 3475 case NVPTXISD::TexUnified2DFloatFloatGrad: 3476 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD; 3477 break; 3478 case NVPTXISD::TexUnified2DS32S32: 3479 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32; 3480 break; 3481 case NVPTXISD::TexUnified2DS32Float: 3482 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32; 3483 break; 3484 case NVPTXISD::TexUnified2DS32FloatLevel: 3485 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL; 3486 break; 3487 case NVPTXISD::TexUnified2DS32FloatGrad: 3488 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD; 3489 break; 3490 case NVPTXISD::TexUnified2DU32S32: 3491 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32; 3492 break; 3493 case NVPTXISD::TexUnified2DU32Float: 3494 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32; 3495 break; 3496 case NVPTXISD::TexUnified2DU32FloatLevel: 3497 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL; 3498 break; 3499 case NVPTXISD::TexUnified2DU32FloatGrad: 3500 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD; 3501 break; 3502 case NVPTXISD::TexUnified2DArrayFloatS32: 3503 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32; 3504 break; 3505 case NVPTXISD::TexUnified2DArrayFloatFloat: 3506 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32; 3507 break; 3508 case NVPTXISD::TexUnified2DArrayFloatFloatLevel: 3509 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL; 3510 break; 3511 case NVPTXISD::TexUnified2DArrayFloatFloatGrad: 3512 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD; 3513 break; 3514 case NVPTXISD::TexUnified2DArrayS32S32: 3515 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32; 3516 break; 3517 case NVPTXISD::TexUnified2DArrayS32Float: 3518 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32; 3519 break; 3520 case NVPTXISD::TexUnified2DArrayS32FloatLevel: 3521 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL; 3522 break; 3523 case NVPTXISD::TexUnified2DArrayS32FloatGrad: 3524 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD; 3525 break; 3526 case NVPTXISD::TexUnified2DArrayU32S32: 3527 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32; 3528 break; 3529 case NVPTXISD::TexUnified2DArrayU32Float: 3530 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32; 3531 break; 3532 case NVPTXISD::TexUnified2DArrayU32FloatLevel: 3533 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL; 3534 break; 3535 case NVPTXISD::TexUnified2DArrayU32FloatGrad: 3536 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD; 3537 break; 3538 case NVPTXISD::TexUnified3DFloatS32: 3539 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32; 3540 break; 3541 case NVPTXISD::TexUnified3DFloatFloat: 3542 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32; 3543 break; 3544 case NVPTXISD::TexUnified3DFloatFloatLevel: 3545 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL; 3546 break; 3547 case NVPTXISD::TexUnified3DFloatFloatGrad: 3548 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD; 3549 break; 3550 case NVPTXISD::TexUnified3DS32S32: 3551 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32; 3552 break; 3553 case NVPTXISD::TexUnified3DS32Float: 3554 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32; 3555 break; 3556 case NVPTXISD::TexUnified3DS32FloatLevel: 3557 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL; 3558 break; 3559 case NVPTXISD::TexUnified3DS32FloatGrad: 3560 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD; 3561 break; 3562 case NVPTXISD::TexUnified3DU32S32: 3563 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32; 3564 break; 3565 case NVPTXISD::TexUnified3DU32Float: 3566 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32; 3567 break; 3568 case NVPTXISD::TexUnified3DU32FloatLevel: 3569 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL; 3570 break; 3571 case NVPTXISD::TexUnified3DU32FloatGrad: 3572 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD; 3573 break; 3574 case NVPTXISD::TexUnifiedCubeFloatFloat: 3575 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32; 3576 break; 3577 case NVPTXISD::TexUnifiedCubeFloatFloatLevel: 3578 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL; 3579 break; 3580 case NVPTXISD::TexUnifiedCubeS32Float: 3581 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32; 3582 break; 3583 case NVPTXISD::TexUnifiedCubeS32FloatLevel: 3584 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL; 3585 break; 3586 case NVPTXISD::TexUnifiedCubeU32Float: 3587 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32; 3588 break; 3589 case NVPTXISD::TexUnifiedCubeU32FloatLevel: 3590 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL; 3591 break; 3592 case NVPTXISD::TexUnifiedCubeArrayFloatFloat: 3593 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32; 3594 break; 3595 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: 3596 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL; 3597 break; 3598 case NVPTXISD::TexUnifiedCubeArrayS32Float: 3599 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32; 3600 break; 3601 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: 3602 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL; 3603 break; 3604 case NVPTXISD::TexUnifiedCubeArrayU32Float: 3605 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32; 3606 break; 3607 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: 3608 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL; 3609 break; 3610 case NVPTXISD::Tld4UnifiedR2DFloatFloat: 3611 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32; 3612 break; 3613 case NVPTXISD::Tld4UnifiedG2DFloatFloat: 3614 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32; 3615 break; 3616 case NVPTXISD::Tld4UnifiedB2DFloatFloat: 3617 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32; 3618 break; 3619 case NVPTXISD::Tld4UnifiedA2DFloatFloat: 3620 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32; 3621 break; 3622 case NVPTXISD::Tld4UnifiedR2DS64Float: 3623 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32; 3624 break; 3625 case NVPTXISD::Tld4UnifiedG2DS64Float: 3626 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32; 3627 break; 3628 case NVPTXISD::Tld4UnifiedB2DS64Float: 3629 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32; 3630 break; 3631 case NVPTXISD::Tld4UnifiedA2DS64Float: 3632 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32; 3633 break; 3634 case NVPTXISD::Tld4UnifiedR2DU64Float: 3635 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32; 3636 break; 3637 case NVPTXISD::Tld4UnifiedG2DU64Float: 3638 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32; 3639 break; 3640 case NVPTXISD::Tld4UnifiedB2DU64Float: 3641 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32; 3642 break; 3643 case NVPTXISD::Tld4UnifiedA2DU64Float: 3644 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32; 3645 break; 3646 } 3647 3648 // Copy over operands 3649 for (unsigned i = 1; i < N->getNumOperands(); ++i) { 3650 Ops.push_back(N->getOperand(i)); 3651 } 3652 3653 Ops.push_back(Chain); 3654 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops)); 3655 return true; 3656 } 3657 3658 bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) { 3659 SDValue Chain = N->getOperand(0); 3660 SDValue TexHandle = N->getOperand(1); 3661 unsigned Opc = 0; 3662 SmallVector<SDValue, 8> Ops; 3663 switch (N->getOpcode()) { 3664 default: return false; 3665 case NVPTXISD::Suld1DI8Clamp: 3666 Opc = NVPTX::SULD_1D_I8_CLAMP; 3667 Ops.push_back(TexHandle); 3668 Ops.push_back(N->getOperand(2)); 3669 Ops.push_back(Chain); 3670 break; 3671 case NVPTXISD::Suld1DI16Clamp: 3672 Opc = NVPTX::SULD_1D_I16_CLAMP; 3673 Ops.push_back(TexHandle); 3674 Ops.push_back(N->getOperand(2)); 3675 Ops.push_back(Chain); 3676 break; 3677 case NVPTXISD::Suld1DI32Clamp: 3678 Opc = NVPTX::SULD_1D_I32_CLAMP; 3679 Ops.push_back(TexHandle); 3680 Ops.push_back(N->getOperand(2)); 3681 Ops.push_back(Chain); 3682 break; 3683 case NVPTXISD::Suld1DI64Clamp: 3684 Opc = NVPTX::SULD_1D_I64_CLAMP; 3685 Ops.push_back(TexHandle); 3686 Ops.push_back(N->getOperand(2)); 3687 Ops.push_back(Chain); 3688 break; 3689 case NVPTXISD::Suld1DV2I8Clamp: 3690 Opc = NVPTX::SULD_1D_V2I8_CLAMP; 3691 Ops.push_back(TexHandle); 3692 Ops.push_back(N->getOperand(2)); 3693 Ops.push_back(Chain); 3694 break; 3695 case NVPTXISD::Suld1DV2I16Clamp: 3696 Opc = NVPTX::SULD_1D_V2I16_CLAMP; 3697 Ops.push_back(TexHandle); 3698 Ops.push_back(N->getOperand(2)); 3699 Ops.push_back(Chain); 3700 break; 3701 case NVPTXISD::Suld1DV2I32Clamp: 3702 Opc = NVPTX::SULD_1D_V2I32_CLAMP; 3703 Ops.push_back(TexHandle); 3704 Ops.push_back(N->getOperand(2)); 3705 Ops.push_back(Chain); 3706 break; 3707 case NVPTXISD::Suld1DV2I64Clamp: 3708 Opc = NVPTX::SULD_1D_V2I64_CLAMP; 3709 Ops.push_back(TexHandle); 3710 Ops.push_back(N->getOperand(2)); 3711 Ops.push_back(Chain); 3712 break; 3713 case NVPTXISD::Suld1DV4I8Clamp: 3714 Opc = NVPTX::SULD_1D_V4I8_CLAMP; 3715 Ops.push_back(TexHandle); 3716 Ops.push_back(N->getOperand(2)); 3717 Ops.push_back(Chain); 3718 break; 3719 case NVPTXISD::Suld1DV4I16Clamp: 3720 Opc = NVPTX::SULD_1D_V4I16_CLAMP; 3721 Ops.push_back(TexHandle); 3722 Ops.push_back(N->getOperand(2)); 3723 Ops.push_back(Chain); 3724 break; 3725 case NVPTXISD::Suld1DV4I32Clamp: 3726 Opc = NVPTX::SULD_1D_V4I32_CLAMP; 3727 Ops.push_back(TexHandle); 3728 Ops.push_back(N->getOperand(2)); 3729 Ops.push_back(Chain); 3730 break; 3731 case NVPTXISD::Suld1DArrayI8Clamp: 3732 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP; 3733 Ops.push_back(TexHandle); 3734 Ops.push_back(N->getOperand(2)); 3735 Ops.push_back(N->getOperand(3)); 3736 Ops.push_back(Chain); 3737 break; 3738 case NVPTXISD::Suld1DArrayI16Clamp: 3739 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP; 3740 Ops.push_back(TexHandle); 3741 Ops.push_back(N->getOperand(2)); 3742 Ops.push_back(N->getOperand(3)); 3743 Ops.push_back(Chain); 3744 break; 3745 case NVPTXISD::Suld1DArrayI32Clamp: 3746 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP; 3747 Ops.push_back(TexHandle); 3748 Ops.push_back(N->getOperand(2)); 3749 Ops.push_back(N->getOperand(3)); 3750 Ops.push_back(Chain); 3751 break; 3752 case NVPTXISD::Suld1DArrayI64Clamp: 3753 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP; 3754 Ops.push_back(TexHandle); 3755 Ops.push_back(N->getOperand(2)); 3756 Ops.push_back(N->getOperand(3)); 3757 Ops.push_back(Chain); 3758 break; 3759 case NVPTXISD::Suld1DArrayV2I8Clamp: 3760 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP; 3761 Ops.push_back(TexHandle); 3762 Ops.push_back(N->getOperand(2)); 3763 Ops.push_back(N->getOperand(3)); 3764 Ops.push_back(Chain); 3765 break; 3766 case NVPTXISD::Suld1DArrayV2I16Clamp: 3767 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP; 3768 Ops.push_back(TexHandle); 3769 Ops.push_back(N->getOperand(2)); 3770 Ops.push_back(N->getOperand(3)); 3771 Ops.push_back(Chain); 3772 break; 3773 case NVPTXISD::Suld1DArrayV2I32Clamp: 3774 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP; 3775 Ops.push_back(TexHandle); 3776 Ops.push_back(N->getOperand(2)); 3777 Ops.push_back(N->getOperand(3)); 3778 Ops.push_back(Chain); 3779 break; 3780 case NVPTXISD::Suld1DArrayV2I64Clamp: 3781 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP; 3782 Ops.push_back(TexHandle); 3783 Ops.push_back(N->getOperand(2)); 3784 Ops.push_back(N->getOperand(3)); 3785 Ops.push_back(Chain); 3786 break; 3787 case NVPTXISD::Suld1DArrayV4I8Clamp: 3788 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP; 3789 Ops.push_back(TexHandle); 3790 Ops.push_back(N->getOperand(2)); 3791 Ops.push_back(N->getOperand(3)); 3792 Ops.push_back(Chain); 3793 break; 3794 case NVPTXISD::Suld1DArrayV4I16Clamp: 3795 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP; 3796 Ops.push_back(TexHandle); 3797 Ops.push_back(N->getOperand(2)); 3798 Ops.push_back(N->getOperand(3)); 3799 Ops.push_back(Chain); 3800 break; 3801 case NVPTXISD::Suld1DArrayV4I32Clamp: 3802 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP; 3803 Ops.push_back(TexHandle); 3804 Ops.push_back(N->getOperand(2)); 3805 Ops.push_back(N->getOperand(3)); 3806 Ops.push_back(Chain); 3807 break; 3808 case NVPTXISD::Suld2DI8Clamp: 3809 Opc = NVPTX::SULD_2D_I8_CLAMP; 3810 Ops.push_back(TexHandle); 3811 Ops.push_back(N->getOperand(2)); 3812 Ops.push_back(N->getOperand(3)); 3813 Ops.push_back(Chain); 3814 break; 3815 case NVPTXISD::Suld2DI16Clamp: 3816 Opc = NVPTX::SULD_2D_I16_CLAMP; 3817 Ops.push_back(TexHandle); 3818 Ops.push_back(N->getOperand(2)); 3819 Ops.push_back(N->getOperand(3)); 3820 Ops.push_back(Chain); 3821 break; 3822 case NVPTXISD::Suld2DI32Clamp: 3823 Opc = NVPTX::SULD_2D_I32_CLAMP; 3824 Ops.push_back(TexHandle); 3825 Ops.push_back(N->getOperand(2)); 3826 Ops.push_back(N->getOperand(3)); 3827 Ops.push_back(Chain); 3828 break; 3829 case NVPTXISD::Suld2DI64Clamp: 3830 Opc = NVPTX::SULD_2D_I64_CLAMP; 3831 Ops.push_back(TexHandle); 3832 Ops.push_back(N->getOperand(2)); 3833 Ops.push_back(N->getOperand(3)); 3834 Ops.push_back(Chain); 3835 break; 3836 case NVPTXISD::Suld2DV2I8Clamp: 3837 Opc = NVPTX::SULD_2D_V2I8_CLAMP; 3838 Ops.push_back(TexHandle); 3839 Ops.push_back(N->getOperand(2)); 3840 Ops.push_back(N->getOperand(3)); 3841 Ops.push_back(Chain); 3842 break; 3843 case NVPTXISD::Suld2DV2I16Clamp: 3844 Opc = NVPTX::SULD_2D_V2I16_CLAMP; 3845 Ops.push_back(TexHandle); 3846 Ops.push_back(N->getOperand(2)); 3847 Ops.push_back(N->getOperand(3)); 3848 Ops.push_back(Chain); 3849 break; 3850 case NVPTXISD::Suld2DV2I32Clamp: 3851 Opc = NVPTX::SULD_2D_V2I32_CLAMP; 3852 Ops.push_back(TexHandle); 3853 Ops.push_back(N->getOperand(2)); 3854 Ops.push_back(N->getOperand(3)); 3855 Ops.push_back(Chain); 3856 break; 3857 case NVPTXISD::Suld2DV2I64Clamp: 3858 Opc = NVPTX::SULD_2D_V2I64_CLAMP; 3859 Ops.push_back(TexHandle); 3860 Ops.push_back(N->getOperand(2)); 3861 Ops.push_back(N->getOperand(3)); 3862 Ops.push_back(Chain); 3863 break; 3864 case NVPTXISD::Suld2DV4I8Clamp: 3865 Opc = NVPTX::SULD_2D_V4I8_CLAMP; 3866 Ops.push_back(TexHandle); 3867 Ops.push_back(N->getOperand(2)); 3868 Ops.push_back(N->getOperand(3)); 3869 Ops.push_back(Chain); 3870 break; 3871 case NVPTXISD::Suld2DV4I16Clamp: 3872 Opc = NVPTX::SULD_2D_V4I16_CLAMP; 3873 Ops.push_back(TexHandle); 3874 Ops.push_back(N->getOperand(2)); 3875 Ops.push_back(N->getOperand(3)); 3876 Ops.push_back(Chain); 3877 break; 3878 case NVPTXISD::Suld2DV4I32Clamp: 3879 Opc = NVPTX::SULD_2D_V4I32_CLAMP; 3880 Ops.push_back(TexHandle); 3881 Ops.push_back(N->getOperand(2)); 3882 Ops.push_back(N->getOperand(3)); 3883 Ops.push_back(Chain); 3884 break; 3885 case NVPTXISD::Suld2DArrayI8Clamp: 3886 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP; 3887 Ops.push_back(TexHandle); 3888 Ops.push_back(N->getOperand(2)); 3889 Ops.push_back(N->getOperand(3)); 3890 Ops.push_back(N->getOperand(4)); 3891 Ops.push_back(Chain); 3892 break; 3893 case NVPTXISD::Suld2DArrayI16Clamp: 3894 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP; 3895 Ops.push_back(TexHandle); 3896 Ops.push_back(N->getOperand(2)); 3897 Ops.push_back(N->getOperand(3)); 3898 Ops.push_back(N->getOperand(4)); 3899 Ops.push_back(Chain); 3900 break; 3901 case NVPTXISD::Suld2DArrayI32Clamp: 3902 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP; 3903 Ops.push_back(TexHandle); 3904 Ops.push_back(N->getOperand(2)); 3905 Ops.push_back(N->getOperand(3)); 3906 Ops.push_back(N->getOperand(4)); 3907 Ops.push_back(Chain); 3908 break; 3909 case NVPTXISD::Suld2DArrayI64Clamp: 3910 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP; 3911 Ops.push_back(TexHandle); 3912 Ops.push_back(N->getOperand(2)); 3913 Ops.push_back(N->getOperand(3)); 3914 Ops.push_back(N->getOperand(4)); 3915 Ops.push_back(Chain); 3916 break; 3917 case NVPTXISD::Suld2DArrayV2I8Clamp: 3918 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP; 3919 Ops.push_back(TexHandle); 3920 Ops.push_back(N->getOperand(2)); 3921 Ops.push_back(N->getOperand(3)); 3922 Ops.push_back(N->getOperand(4)); 3923 Ops.push_back(Chain); 3924 break; 3925 case NVPTXISD::Suld2DArrayV2I16Clamp: 3926 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP; 3927 Ops.push_back(TexHandle); 3928 Ops.push_back(N->getOperand(2)); 3929 Ops.push_back(N->getOperand(3)); 3930 Ops.push_back(N->getOperand(4)); 3931 Ops.push_back(Chain); 3932 break; 3933 case NVPTXISD::Suld2DArrayV2I32Clamp: 3934 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP; 3935 Ops.push_back(TexHandle); 3936 Ops.push_back(N->getOperand(2)); 3937 Ops.push_back(N->getOperand(3)); 3938 Ops.push_back(N->getOperand(4)); 3939 Ops.push_back(Chain); 3940 break; 3941 case NVPTXISD::Suld2DArrayV2I64Clamp: 3942 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP; 3943 Ops.push_back(TexHandle); 3944 Ops.push_back(N->getOperand(2)); 3945 Ops.push_back(N->getOperand(3)); 3946 Ops.push_back(N->getOperand(4)); 3947 Ops.push_back(Chain); 3948 break; 3949 case NVPTXISD::Suld2DArrayV4I8Clamp: 3950 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP; 3951 Ops.push_back(TexHandle); 3952 Ops.push_back(N->getOperand(2)); 3953 Ops.push_back(N->getOperand(3)); 3954 Ops.push_back(N->getOperand(4)); 3955 Ops.push_back(Chain); 3956 break; 3957 case NVPTXISD::Suld2DArrayV4I16Clamp: 3958 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP; 3959 Ops.push_back(TexHandle); 3960 Ops.push_back(N->getOperand(2)); 3961 Ops.push_back(N->getOperand(3)); 3962 Ops.push_back(N->getOperand(4)); 3963 Ops.push_back(Chain); 3964 break; 3965 case NVPTXISD::Suld2DArrayV4I32Clamp: 3966 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP; 3967 Ops.push_back(TexHandle); 3968 Ops.push_back(N->getOperand(2)); 3969 Ops.push_back(N->getOperand(3)); 3970 Ops.push_back(N->getOperand(4)); 3971 Ops.push_back(Chain); 3972 break; 3973 case NVPTXISD::Suld3DI8Clamp: 3974 Opc = NVPTX::SULD_3D_I8_CLAMP; 3975 Ops.push_back(TexHandle); 3976 Ops.push_back(N->getOperand(2)); 3977 Ops.push_back(N->getOperand(3)); 3978 Ops.push_back(N->getOperand(4)); 3979 Ops.push_back(Chain); 3980 break; 3981 case NVPTXISD::Suld3DI16Clamp: 3982 Opc = NVPTX::SULD_3D_I16_CLAMP; 3983 Ops.push_back(TexHandle); 3984 Ops.push_back(N->getOperand(2)); 3985 Ops.push_back(N->getOperand(3)); 3986 Ops.push_back(N->getOperand(4)); 3987 Ops.push_back(Chain); 3988 break; 3989 case NVPTXISD::Suld3DI32Clamp: 3990 Opc = NVPTX::SULD_3D_I32_CLAMP; 3991 Ops.push_back(TexHandle); 3992 Ops.push_back(N->getOperand(2)); 3993 Ops.push_back(N->getOperand(3)); 3994 Ops.push_back(N->getOperand(4)); 3995 Ops.push_back(Chain); 3996 break; 3997 case NVPTXISD::Suld3DI64Clamp: 3998 Opc = NVPTX::SULD_3D_I64_CLAMP; 3999 Ops.push_back(TexHandle); 4000 Ops.push_back(N->getOperand(2)); 4001 Ops.push_back(N->getOperand(3)); 4002 Ops.push_back(N->getOperand(4)); 4003 Ops.push_back(Chain); 4004 break; 4005 case NVPTXISD::Suld3DV2I8Clamp: 4006 Opc = NVPTX::SULD_3D_V2I8_CLAMP; 4007 Ops.push_back(TexHandle); 4008 Ops.push_back(N->getOperand(2)); 4009 Ops.push_back(N->getOperand(3)); 4010 Ops.push_back(N->getOperand(4)); 4011 Ops.push_back(Chain); 4012 break; 4013 case NVPTXISD::Suld3DV2I16Clamp: 4014 Opc = NVPTX::SULD_3D_V2I16_CLAMP; 4015 Ops.push_back(TexHandle); 4016 Ops.push_back(N->getOperand(2)); 4017 Ops.push_back(N->getOperand(3)); 4018 Ops.push_back(N->getOperand(4)); 4019 Ops.push_back(Chain); 4020 break; 4021 case NVPTXISD::Suld3DV2I32Clamp: 4022 Opc = NVPTX::SULD_3D_V2I32_CLAMP; 4023 Ops.push_back(TexHandle); 4024 Ops.push_back(N->getOperand(2)); 4025 Ops.push_back(N->getOperand(3)); 4026 Ops.push_back(N->getOperand(4)); 4027 Ops.push_back(Chain); 4028 break; 4029 case NVPTXISD::Suld3DV2I64Clamp: 4030 Opc = NVPTX::SULD_3D_V2I64_CLAMP; 4031 Ops.push_back(TexHandle); 4032 Ops.push_back(N->getOperand(2)); 4033 Ops.push_back(N->getOperand(3)); 4034 Ops.push_back(N->getOperand(4)); 4035 Ops.push_back(Chain); 4036 break; 4037 case NVPTXISD::Suld3DV4I8Clamp: 4038 Opc = NVPTX::SULD_3D_V4I8_CLAMP; 4039 Ops.push_back(TexHandle); 4040 Ops.push_back(N->getOperand(2)); 4041 Ops.push_back(N->getOperand(3)); 4042 Ops.push_back(N->getOperand(4)); 4043 Ops.push_back(Chain); 4044 break; 4045 case NVPTXISD::Suld3DV4I16Clamp: 4046 Opc = NVPTX::SULD_3D_V4I16_CLAMP; 4047 Ops.push_back(TexHandle); 4048 Ops.push_back(N->getOperand(2)); 4049 Ops.push_back(N->getOperand(3)); 4050 Ops.push_back(N->getOperand(4)); 4051 Ops.push_back(Chain); 4052 break; 4053 case NVPTXISD::Suld3DV4I32Clamp: 4054 Opc = NVPTX::SULD_3D_V4I32_CLAMP; 4055 Ops.push_back(TexHandle); 4056 Ops.push_back(N->getOperand(2)); 4057 Ops.push_back(N->getOperand(3)); 4058 Ops.push_back(N->getOperand(4)); 4059 Ops.push_back(Chain); 4060 break; 4061 case NVPTXISD::Suld1DI8Trap: 4062 Opc = NVPTX::SULD_1D_I8_TRAP; 4063 Ops.push_back(TexHandle); 4064 Ops.push_back(N->getOperand(2)); 4065 Ops.push_back(Chain); 4066 break; 4067 case NVPTXISD::Suld1DI16Trap: 4068 Opc = NVPTX::SULD_1D_I16_TRAP; 4069 Ops.push_back(TexHandle); 4070 Ops.push_back(N->getOperand(2)); 4071 Ops.push_back(Chain); 4072 break; 4073 case NVPTXISD::Suld1DI32Trap: 4074 Opc = NVPTX::SULD_1D_I32_TRAP; 4075 Ops.push_back(TexHandle); 4076 Ops.push_back(N->getOperand(2)); 4077 Ops.push_back(Chain); 4078 break; 4079 case NVPTXISD::Suld1DI64Trap: 4080 Opc = NVPTX::SULD_1D_I64_TRAP; 4081 Ops.push_back(TexHandle); 4082 Ops.push_back(N->getOperand(2)); 4083 Ops.push_back(Chain); 4084 break; 4085 case NVPTXISD::Suld1DV2I8Trap: 4086 Opc = NVPTX::SULD_1D_V2I8_TRAP; 4087 Ops.push_back(TexHandle); 4088 Ops.push_back(N->getOperand(2)); 4089 Ops.push_back(Chain); 4090 break; 4091 case NVPTXISD::Suld1DV2I16Trap: 4092 Opc = NVPTX::SULD_1D_V2I16_TRAP; 4093 Ops.push_back(TexHandle); 4094 Ops.push_back(N->getOperand(2)); 4095 Ops.push_back(Chain); 4096 break; 4097 case NVPTXISD::Suld1DV2I32Trap: 4098 Opc = NVPTX::SULD_1D_V2I32_TRAP; 4099 Ops.push_back(TexHandle); 4100 Ops.push_back(N->getOperand(2)); 4101 Ops.push_back(Chain); 4102 break; 4103 case NVPTXISD::Suld1DV2I64Trap: 4104 Opc = NVPTX::SULD_1D_V2I64_TRAP; 4105 Ops.push_back(TexHandle); 4106 Ops.push_back(N->getOperand(2)); 4107 Ops.push_back(Chain); 4108 break; 4109 case NVPTXISD::Suld1DV4I8Trap: 4110 Opc = NVPTX::SULD_1D_V4I8_TRAP; 4111 Ops.push_back(TexHandle); 4112 Ops.push_back(N->getOperand(2)); 4113 Ops.push_back(Chain); 4114 break; 4115 case NVPTXISD::Suld1DV4I16Trap: 4116 Opc = NVPTX::SULD_1D_V4I16_TRAP; 4117 Ops.push_back(TexHandle); 4118 Ops.push_back(N->getOperand(2)); 4119 Ops.push_back(Chain); 4120 break; 4121 case NVPTXISD::Suld1DV4I32Trap: 4122 Opc = NVPTX::SULD_1D_V4I32_TRAP; 4123 Ops.push_back(TexHandle); 4124 Ops.push_back(N->getOperand(2)); 4125 Ops.push_back(Chain); 4126 break; 4127 case NVPTXISD::Suld1DArrayI8Trap: 4128 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP; 4129 Ops.push_back(TexHandle); 4130 Ops.push_back(N->getOperand(2)); 4131 Ops.push_back(N->getOperand(3)); 4132 Ops.push_back(Chain); 4133 break; 4134 case NVPTXISD::Suld1DArrayI16Trap: 4135 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP; 4136 Ops.push_back(TexHandle); 4137 Ops.push_back(N->getOperand(2)); 4138 Ops.push_back(N->getOperand(3)); 4139 Ops.push_back(Chain); 4140 break; 4141 case NVPTXISD::Suld1DArrayI32Trap: 4142 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP; 4143 Ops.push_back(TexHandle); 4144 Ops.push_back(N->getOperand(2)); 4145 Ops.push_back(N->getOperand(3)); 4146 Ops.push_back(Chain); 4147 break; 4148 case NVPTXISD::Suld1DArrayI64Trap: 4149 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP; 4150 Ops.push_back(TexHandle); 4151 Ops.push_back(N->getOperand(2)); 4152 Ops.push_back(N->getOperand(3)); 4153 Ops.push_back(Chain); 4154 break; 4155 case NVPTXISD::Suld1DArrayV2I8Trap: 4156 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP; 4157 Ops.push_back(TexHandle); 4158 Ops.push_back(N->getOperand(2)); 4159 Ops.push_back(N->getOperand(3)); 4160 Ops.push_back(Chain); 4161 break; 4162 case NVPTXISD::Suld1DArrayV2I16Trap: 4163 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP; 4164 Ops.push_back(TexHandle); 4165 Ops.push_back(N->getOperand(2)); 4166 Ops.push_back(N->getOperand(3)); 4167 Ops.push_back(Chain); 4168 break; 4169 case NVPTXISD::Suld1DArrayV2I32Trap: 4170 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP; 4171 Ops.push_back(TexHandle); 4172 Ops.push_back(N->getOperand(2)); 4173 Ops.push_back(N->getOperand(3)); 4174 Ops.push_back(Chain); 4175 break; 4176 case NVPTXISD::Suld1DArrayV2I64Trap: 4177 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP; 4178 Ops.push_back(TexHandle); 4179 Ops.push_back(N->getOperand(2)); 4180 Ops.push_back(N->getOperand(3)); 4181 Ops.push_back(Chain); 4182 break; 4183 case NVPTXISD::Suld1DArrayV4I8Trap: 4184 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP; 4185 Ops.push_back(TexHandle); 4186 Ops.push_back(N->getOperand(2)); 4187 Ops.push_back(N->getOperand(3)); 4188 Ops.push_back(Chain); 4189 break; 4190 case NVPTXISD::Suld1DArrayV4I16Trap: 4191 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP; 4192 Ops.push_back(TexHandle); 4193 Ops.push_back(N->getOperand(2)); 4194 Ops.push_back(N->getOperand(3)); 4195 Ops.push_back(Chain); 4196 break; 4197 case NVPTXISD::Suld1DArrayV4I32Trap: 4198 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP; 4199 Ops.push_back(TexHandle); 4200 Ops.push_back(N->getOperand(2)); 4201 Ops.push_back(N->getOperand(3)); 4202 Ops.push_back(Chain); 4203 break; 4204 case NVPTXISD::Suld2DI8Trap: 4205 Opc = NVPTX::SULD_2D_I8_TRAP; 4206 Ops.push_back(TexHandle); 4207 Ops.push_back(N->getOperand(2)); 4208 Ops.push_back(N->getOperand(3)); 4209 Ops.push_back(Chain); 4210 break; 4211 case NVPTXISD::Suld2DI16Trap: 4212 Opc = NVPTX::SULD_2D_I16_TRAP; 4213 Ops.push_back(TexHandle); 4214 Ops.push_back(N->getOperand(2)); 4215 Ops.push_back(N->getOperand(3)); 4216 Ops.push_back(Chain); 4217 break; 4218 case NVPTXISD::Suld2DI32Trap: 4219 Opc = NVPTX::SULD_2D_I32_TRAP; 4220 Ops.push_back(TexHandle); 4221 Ops.push_back(N->getOperand(2)); 4222 Ops.push_back(N->getOperand(3)); 4223 Ops.push_back(Chain); 4224 break; 4225 case NVPTXISD::Suld2DI64Trap: 4226 Opc = NVPTX::SULD_2D_I64_TRAP; 4227 Ops.push_back(TexHandle); 4228 Ops.push_back(N->getOperand(2)); 4229 Ops.push_back(N->getOperand(3)); 4230 Ops.push_back(Chain); 4231 break; 4232 case NVPTXISD::Suld2DV2I8Trap: 4233 Opc = NVPTX::SULD_2D_V2I8_TRAP; 4234 Ops.push_back(TexHandle); 4235 Ops.push_back(N->getOperand(2)); 4236 Ops.push_back(N->getOperand(3)); 4237 Ops.push_back(Chain); 4238 break; 4239 case NVPTXISD::Suld2DV2I16Trap: 4240 Opc = NVPTX::SULD_2D_V2I16_TRAP; 4241 Ops.push_back(TexHandle); 4242 Ops.push_back(N->getOperand(2)); 4243 Ops.push_back(N->getOperand(3)); 4244 Ops.push_back(Chain); 4245 break; 4246 case NVPTXISD::Suld2DV2I32Trap: 4247 Opc = NVPTX::SULD_2D_V2I32_TRAP; 4248 Ops.push_back(TexHandle); 4249 Ops.push_back(N->getOperand(2)); 4250 Ops.push_back(N->getOperand(3)); 4251 Ops.push_back(Chain); 4252 break; 4253 case NVPTXISD::Suld2DV2I64Trap: 4254 Opc = NVPTX::SULD_2D_V2I64_TRAP; 4255 Ops.push_back(TexHandle); 4256 Ops.push_back(N->getOperand(2)); 4257 Ops.push_back(N->getOperand(3)); 4258 Ops.push_back(Chain); 4259 break; 4260 case NVPTXISD::Suld2DV4I8Trap: 4261 Opc = NVPTX::SULD_2D_V4I8_TRAP; 4262 Ops.push_back(TexHandle); 4263 Ops.push_back(N->getOperand(2)); 4264 Ops.push_back(N->getOperand(3)); 4265 Ops.push_back(Chain); 4266 break; 4267 case NVPTXISD::Suld2DV4I16Trap: 4268 Opc = NVPTX::SULD_2D_V4I16_TRAP; 4269 Ops.push_back(TexHandle); 4270 Ops.push_back(N->getOperand(2)); 4271 Ops.push_back(N->getOperand(3)); 4272 Ops.push_back(Chain); 4273 break; 4274 case NVPTXISD::Suld2DV4I32Trap: 4275 Opc = NVPTX::SULD_2D_V4I32_TRAP; 4276 Ops.push_back(TexHandle); 4277 Ops.push_back(N->getOperand(2)); 4278 Ops.push_back(N->getOperand(3)); 4279 Ops.push_back(Chain); 4280 break; 4281 case NVPTXISD::Suld2DArrayI8Trap: 4282 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP; 4283 Ops.push_back(TexHandle); 4284 Ops.push_back(N->getOperand(2)); 4285 Ops.push_back(N->getOperand(3)); 4286 Ops.push_back(N->getOperand(4)); 4287 Ops.push_back(Chain); 4288 break; 4289 case NVPTXISD::Suld2DArrayI16Trap: 4290 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP; 4291 Ops.push_back(TexHandle); 4292 Ops.push_back(N->getOperand(2)); 4293 Ops.push_back(N->getOperand(3)); 4294 Ops.push_back(N->getOperand(4)); 4295 Ops.push_back(Chain); 4296 break; 4297 case NVPTXISD::Suld2DArrayI32Trap: 4298 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP; 4299 Ops.push_back(TexHandle); 4300 Ops.push_back(N->getOperand(2)); 4301 Ops.push_back(N->getOperand(3)); 4302 Ops.push_back(N->getOperand(4)); 4303 Ops.push_back(Chain); 4304 break; 4305 case NVPTXISD::Suld2DArrayI64Trap: 4306 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP; 4307 Ops.push_back(TexHandle); 4308 Ops.push_back(N->getOperand(2)); 4309 Ops.push_back(N->getOperand(3)); 4310 Ops.push_back(N->getOperand(4)); 4311 Ops.push_back(Chain); 4312 break; 4313 case NVPTXISD::Suld2DArrayV2I8Trap: 4314 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP; 4315 Ops.push_back(TexHandle); 4316 Ops.push_back(N->getOperand(2)); 4317 Ops.push_back(N->getOperand(3)); 4318 Ops.push_back(N->getOperand(4)); 4319 Ops.push_back(Chain); 4320 break; 4321 case NVPTXISD::Suld2DArrayV2I16Trap: 4322 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP; 4323 Ops.push_back(TexHandle); 4324 Ops.push_back(N->getOperand(2)); 4325 Ops.push_back(N->getOperand(3)); 4326 Ops.push_back(N->getOperand(4)); 4327 Ops.push_back(Chain); 4328 break; 4329 case NVPTXISD::Suld2DArrayV2I32Trap: 4330 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP; 4331 Ops.push_back(TexHandle); 4332 Ops.push_back(N->getOperand(2)); 4333 Ops.push_back(N->getOperand(3)); 4334 Ops.push_back(N->getOperand(4)); 4335 Ops.push_back(Chain); 4336 break; 4337 case NVPTXISD::Suld2DArrayV2I64Trap: 4338 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP; 4339 Ops.push_back(TexHandle); 4340 Ops.push_back(N->getOperand(2)); 4341 Ops.push_back(N->getOperand(3)); 4342 Ops.push_back(N->getOperand(4)); 4343 Ops.push_back(Chain); 4344 break; 4345 case NVPTXISD::Suld2DArrayV4I8Trap: 4346 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP; 4347 Ops.push_back(TexHandle); 4348 Ops.push_back(N->getOperand(2)); 4349 Ops.push_back(N->getOperand(3)); 4350 Ops.push_back(N->getOperand(4)); 4351 Ops.push_back(Chain); 4352 break; 4353 case NVPTXISD::Suld2DArrayV4I16Trap: 4354 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP; 4355 Ops.push_back(TexHandle); 4356 Ops.push_back(N->getOperand(2)); 4357 Ops.push_back(N->getOperand(3)); 4358 Ops.push_back(N->getOperand(4)); 4359 Ops.push_back(Chain); 4360 break; 4361 case NVPTXISD::Suld2DArrayV4I32Trap: 4362 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP; 4363 Ops.push_back(TexHandle); 4364 Ops.push_back(N->getOperand(2)); 4365 Ops.push_back(N->getOperand(3)); 4366 Ops.push_back(N->getOperand(4)); 4367 Ops.push_back(Chain); 4368 break; 4369 case NVPTXISD::Suld3DI8Trap: 4370 Opc = NVPTX::SULD_3D_I8_TRAP; 4371 Ops.push_back(TexHandle); 4372 Ops.push_back(N->getOperand(2)); 4373 Ops.push_back(N->getOperand(3)); 4374 Ops.push_back(N->getOperand(4)); 4375 Ops.push_back(Chain); 4376 break; 4377 case NVPTXISD::Suld3DI16Trap: 4378 Opc = NVPTX::SULD_3D_I16_TRAP; 4379 Ops.push_back(TexHandle); 4380 Ops.push_back(N->getOperand(2)); 4381 Ops.push_back(N->getOperand(3)); 4382 Ops.push_back(N->getOperand(4)); 4383 Ops.push_back(Chain); 4384 break; 4385 case NVPTXISD::Suld3DI32Trap: 4386 Opc = NVPTX::SULD_3D_I32_TRAP; 4387 Ops.push_back(TexHandle); 4388 Ops.push_back(N->getOperand(2)); 4389 Ops.push_back(N->getOperand(3)); 4390 Ops.push_back(N->getOperand(4)); 4391 Ops.push_back(Chain); 4392 break; 4393 case NVPTXISD::Suld3DI64Trap: 4394 Opc = NVPTX::SULD_3D_I64_TRAP; 4395 Ops.push_back(TexHandle); 4396 Ops.push_back(N->getOperand(2)); 4397 Ops.push_back(N->getOperand(3)); 4398 Ops.push_back(N->getOperand(4)); 4399 Ops.push_back(Chain); 4400 break; 4401 case NVPTXISD::Suld3DV2I8Trap: 4402 Opc = NVPTX::SULD_3D_V2I8_TRAP; 4403 Ops.push_back(TexHandle); 4404 Ops.push_back(N->getOperand(2)); 4405 Ops.push_back(N->getOperand(3)); 4406 Ops.push_back(N->getOperand(4)); 4407 Ops.push_back(Chain); 4408 break; 4409 case NVPTXISD::Suld3DV2I16Trap: 4410 Opc = NVPTX::SULD_3D_V2I16_TRAP; 4411 Ops.push_back(TexHandle); 4412 Ops.push_back(N->getOperand(2)); 4413 Ops.push_back(N->getOperand(3)); 4414 Ops.push_back(N->getOperand(4)); 4415 Ops.push_back(Chain); 4416 break; 4417 case NVPTXISD::Suld3DV2I32Trap: 4418 Opc = NVPTX::SULD_3D_V2I32_TRAP; 4419 Ops.push_back(TexHandle); 4420 Ops.push_back(N->getOperand(2)); 4421 Ops.push_back(N->getOperand(3)); 4422 Ops.push_back(N->getOperand(4)); 4423 Ops.push_back(Chain); 4424 break; 4425 case NVPTXISD::Suld3DV2I64Trap: 4426 Opc = NVPTX::SULD_3D_V2I64_TRAP; 4427 Ops.push_back(TexHandle); 4428 Ops.push_back(N->getOperand(2)); 4429 Ops.push_back(N->getOperand(3)); 4430 Ops.push_back(N->getOperand(4)); 4431 Ops.push_back(Chain); 4432 break; 4433 case NVPTXISD::Suld3DV4I8Trap: 4434 Opc = NVPTX::SULD_3D_V4I8_TRAP; 4435 Ops.push_back(TexHandle); 4436 Ops.push_back(N->getOperand(2)); 4437 Ops.push_back(N->getOperand(3)); 4438 Ops.push_back(N->getOperand(4)); 4439 Ops.push_back(Chain); 4440 break; 4441 case NVPTXISD::Suld3DV4I16Trap: 4442 Opc = NVPTX::SULD_3D_V4I16_TRAP; 4443 Ops.push_back(TexHandle); 4444 Ops.push_back(N->getOperand(2)); 4445 Ops.push_back(N->getOperand(3)); 4446 Ops.push_back(N->getOperand(4)); 4447 Ops.push_back(Chain); 4448 break; 4449 case NVPTXISD::Suld3DV4I32Trap: 4450 Opc = NVPTX::SULD_3D_V4I32_TRAP; 4451 Ops.push_back(TexHandle); 4452 Ops.push_back(N->getOperand(2)); 4453 Ops.push_back(N->getOperand(3)); 4454 Ops.push_back(N->getOperand(4)); 4455 Ops.push_back(Chain); 4456 break; 4457 case NVPTXISD::Suld1DI8Zero: 4458 Opc = NVPTX::SULD_1D_I8_ZERO; 4459 Ops.push_back(TexHandle); 4460 Ops.push_back(N->getOperand(2)); 4461 Ops.push_back(Chain); 4462 break; 4463 case NVPTXISD::Suld1DI16Zero: 4464 Opc = NVPTX::SULD_1D_I16_ZERO; 4465 Ops.push_back(TexHandle); 4466 Ops.push_back(N->getOperand(2)); 4467 Ops.push_back(Chain); 4468 break; 4469 case NVPTXISD::Suld1DI32Zero: 4470 Opc = NVPTX::SULD_1D_I32_ZERO; 4471 Ops.push_back(TexHandle); 4472 Ops.push_back(N->getOperand(2)); 4473 Ops.push_back(Chain); 4474 break; 4475 case NVPTXISD::Suld1DI64Zero: 4476 Opc = NVPTX::SULD_1D_I64_ZERO; 4477 Ops.push_back(TexHandle); 4478 Ops.push_back(N->getOperand(2)); 4479 Ops.push_back(Chain); 4480 break; 4481 case NVPTXISD::Suld1DV2I8Zero: 4482 Opc = NVPTX::SULD_1D_V2I8_ZERO; 4483 Ops.push_back(TexHandle); 4484 Ops.push_back(N->getOperand(2)); 4485 Ops.push_back(Chain); 4486 break; 4487 case NVPTXISD::Suld1DV2I16Zero: 4488 Opc = NVPTX::SULD_1D_V2I16_ZERO; 4489 Ops.push_back(TexHandle); 4490 Ops.push_back(N->getOperand(2)); 4491 Ops.push_back(Chain); 4492 break; 4493 case NVPTXISD::Suld1DV2I32Zero: 4494 Opc = NVPTX::SULD_1D_V2I32_ZERO; 4495 Ops.push_back(TexHandle); 4496 Ops.push_back(N->getOperand(2)); 4497 Ops.push_back(Chain); 4498 break; 4499 case NVPTXISD::Suld1DV2I64Zero: 4500 Opc = NVPTX::SULD_1D_V2I64_ZERO; 4501 Ops.push_back(TexHandle); 4502 Ops.push_back(N->getOperand(2)); 4503 Ops.push_back(Chain); 4504 break; 4505 case NVPTXISD::Suld1DV4I8Zero: 4506 Opc = NVPTX::SULD_1D_V4I8_ZERO; 4507 Ops.push_back(TexHandle); 4508 Ops.push_back(N->getOperand(2)); 4509 Ops.push_back(Chain); 4510 break; 4511 case NVPTXISD::Suld1DV4I16Zero: 4512 Opc = NVPTX::SULD_1D_V4I16_ZERO; 4513 Ops.push_back(TexHandle); 4514 Ops.push_back(N->getOperand(2)); 4515 Ops.push_back(Chain); 4516 break; 4517 case NVPTXISD::Suld1DV4I32Zero: 4518 Opc = NVPTX::SULD_1D_V4I32_ZERO; 4519 Ops.push_back(TexHandle); 4520 Ops.push_back(N->getOperand(2)); 4521 Ops.push_back(Chain); 4522 break; 4523 case NVPTXISD::Suld1DArrayI8Zero: 4524 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO; 4525 Ops.push_back(TexHandle); 4526 Ops.push_back(N->getOperand(2)); 4527 Ops.push_back(N->getOperand(3)); 4528 Ops.push_back(Chain); 4529 break; 4530 case NVPTXISD::Suld1DArrayI16Zero: 4531 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO; 4532 Ops.push_back(TexHandle); 4533 Ops.push_back(N->getOperand(2)); 4534 Ops.push_back(N->getOperand(3)); 4535 Ops.push_back(Chain); 4536 break; 4537 case NVPTXISD::Suld1DArrayI32Zero: 4538 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO; 4539 Ops.push_back(TexHandle); 4540 Ops.push_back(N->getOperand(2)); 4541 Ops.push_back(N->getOperand(3)); 4542 Ops.push_back(Chain); 4543 break; 4544 case NVPTXISD::Suld1DArrayI64Zero: 4545 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO; 4546 Ops.push_back(TexHandle); 4547 Ops.push_back(N->getOperand(2)); 4548 Ops.push_back(N->getOperand(3)); 4549 Ops.push_back(Chain); 4550 break; 4551 case NVPTXISD::Suld1DArrayV2I8Zero: 4552 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO; 4553 Ops.push_back(TexHandle); 4554 Ops.push_back(N->getOperand(2)); 4555 Ops.push_back(N->getOperand(3)); 4556 Ops.push_back(Chain); 4557 break; 4558 case NVPTXISD::Suld1DArrayV2I16Zero: 4559 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO; 4560 Ops.push_back(TexHandle); 4561 Ops.push_back(N->getOperand(2)); 4562 Ops.push_back(N->getOperand(3)); 4563 Ops.push_back(Chain); 4564 break; 4565 case NVPTXISD::Suld1DArrayV2I32Zero: 4566 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO; 4567 Ops.push_back(TexHandle); 4568 Ops.push_back(N->getOperand(2)); 4569 Ops.push_back(N->getOperand(3)); 4570 Ops.push_back(Chain); 4571 break; 4572 case NVPTXISD::Suld1DArrayV2I64Zero: 4573 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO; 4574 Ops.push_back(TexHandle); 4575 Ops.push_back(N->getOperand(2)); 4576 Ops.push_back(N->getOperand(3)); 4577 Ops.push_back(Chain); 4578 break; 4579 case NVPTXISD::Suld1DArrayV4I8Zero: 4580 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO; 4581 Ops.push_back(TexHandle); 4582 Ops.push_back(N->getOperand(2)); 4583 Ops.push_back(N->getOperand(3)); 4584 Ops.push_back(Chain); 4585 break; 4586 case NVPTXISD::Suld1DArrayV4I16Zero: 4587 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO; 4588 Ops.push_back(TexHandle); 4589 Ops.push_back(N->getOperand(2)); 4590 Ops.push_back(N->getOperand(3)); 4591 Ops.push_back(Chain); 4592 break; 4593 case NVPTXISD::Suld1DArrayV4I32Zero: 4594 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO; 4595 Ops.push_back(TexHandle); 4596 Ops.push_back(N->getOperand(2)); 4597 Ops.push_back(N->getOperand(3)); 4598 Ops.push_back(Chain); 4599 break; 4600 case NVPTXISD::Suld2DI8Zero: 4601 Opc = NVPTX::SULD_2D_I8_ZERO; 4602 Ops.push_back(TexHandle); 4603 Ops.push_back(N->getOperand(2)); 4604 Ops.push_back(N->getOperand(3)); 4605 Ops.push_back(Chain); 4606 break; 4607 case NVPTXISD::Suld2DI16Zero: 4608 Opc = NVPTX::SULD_2D_I16_ZERO; 4609 Ops.push_back(TexHandle); 4610 Ops.push_back(N->getOperand(2)); 4611 Ops.push_back(N->getOperand(3)); 4612 Ops.push_back(Chain); 4613 break; 4614 case NVPTXISD::Suld2DI32Zero: 4615 Opc = NVPTX::SULD_2D_I32_ZERO; 4616 Ops.push_back(TexHandle); 4617 Ops.push_back(N->getOperand(2)); 4618 Ops.push_back(N->getOperand(3)); 4619 Ops.push_back(Chain); 4620 break; 4621 case NVPTXISD::Suld2DI64Zero: 4622 Opc = NVPTX::SULD_2D_I64_ZERO; 4623 Ops.push_back(TexHandle); 4624 Ops.push_back(N->getOperand(2)); 4625 Ops.push_back(N->getOperand(3)); 4626 Ops.push_back(Chain); 4627 break; 4628 case NVPTXISD::Suld2DV2I8Zero: 4629 Opc = NVPTX::SULD_2D_V2I8_ZERO; 4630 Ops.push_back(TexHandle); 4631 Ops.push_back(N->getOperand(2)); 4632 Ops.push_back(N->getOperand(3)); 4633 Ops.push_back(Chain); 4634 break; 4635 case NVPTXISD::Suld2DV2I16Zero: 4636 Opc = NVPTX::SULD_2D_V2I16_ZERO; 4637 Ops.push_back(TexHandle); 4638 Ops.push_back(N->getOperand(2)); 4639 Ops.push_back(N->getOperand(3)); 4640 Ops.push_back(Chain); 4641 break; 4642 case NVPTXISD::Suld2DV2I32Zero: 4643 Opc = NVPTX::SULD_2D_V2I32_ZERO; 4644 Ops.push_back(TexHandle); 4645 Ops.push_back(N->getOperand(2)); 4646 Ops.push_back(N->getOperand(3)); 4647 Ops.push_back(Chain); 4648 break; 4649 case NVPTXISD::Suld2DV2I64Zero: 4650 Opc = NVPTX::SULD_2D_V2I64_ZERO; 4651 Ops.push_back(TexHandle); 4652 Ops.push_back(N->getOperand(2)); 4653 Ops.push_back(N->getOperand(3)); 4654 Ops.push_back(Chain); 4655 break; 4656 case NVPTXISD::Suld2DV4I8Zero: 4657 Opc = NVPTX::SULD_2D_V4I8_ZERO; 4658 Ops.push_back(TexHandle); 4659 Ops.push_back(N->getOperand(2)); 4660 Ops.push_back(N->getOperand(3)); 4661 Ops.push_back(Chain); 4662 break; 4663 case NVPTXISD::Suld2DV4I16Zero: 4664 Opc = NVPTX::SULD_2D_V4I16_ZERO; 4665 Ops.push_back(TexHandle); 4666 Ops.push_back(N->getOperand(2)); 4667 Ops.push_back(N->getOperand(3)); 4668 Ops.push_back(Chain); 4669 break; 4670 case NVPTXISD::Suld2DV4I32Zero: 4671 Opc = NVPTX::SULD_2D_V4I32_ZERO; 4672 Ops.push_back(TexHandle); 4673 Ops.push_back(N->getOperand(2)); 4674 Ops.push_back(N->getOperand(3)); 4675 Ops.push_back(Chain); 4676 break; 4677 case NVPTXISD::Suld2DArrayI8Zero: 4678 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO; 4679 Ops.push_back(TexHandle); 4680 Ops.push_back(N->getOperand(2)); 4681 Ops.push_back(N->getOperand(3)); 4682 Ops.push_back(N->getOperand(4)); 4683 Ops.push_back(Chain); 4684 break; 4685 case NVPTXISD::Suld2DArrayI16Zero: 4686 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO; 4687 Ops.push_back(TexHandle); 4688 Ops.push_back(N->getOperand(2)); 4689 Ops.push_back(N->getOperand(3)); 4690 Ops.push_back(N->getOperand(4)); 4691 Ops.push_back(Chain); 4692 break; 4693 case NVPTXISD::Suld2DArrayI32Zero: 4694 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO; 4695 Ops.push_back(TexHandle); 4696 Ops.push_back(N->getOperand(2)); 4697 Ops.push_back(N->getOperand(3)); 4698 Ops.push_back(N->getOperand(4)); 4699 Ops.push_back(Chain); 4700 break; 4701 case NVPTXISD::Suld2DArrayI64Zero: 4702 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO; 4703 Ops.push_back(TexHandle); 4704 Ops.push_back(N->getOperand(2)); 4705 Ops.push_back(N->getOperand(3)); 4706 Ops.push_back(N->getOperand(4)); 4707 Ops.push_back(Chain); 4708 break; 4709 case NVPTXISD::Suld2DArrayV2I8Zero: 4710 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO; 4711 Ops.push_back(TexHandle); 4712 Ops.push_back(N->getOperand(2)); 4713 Ops.push_back(N->getOperand(3)); 4714 Ops.push_back(N->getOperand(4)); 4715 Ops.push_back(Chain); 4716 break; 4717 case NVPTXISD::Suld2DArrayV2I16Zero: 4718 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO; 4719 Ops.push_back(TexHandle); 4720 Ops.push_back(N->getOperand(2)); 4721 Ops.push_back(N->getOperand(3)); 4722 Ops.push_back(N->getOperand(4)); 4723 Ops.push_back(Chain); 4724 break; 4725 case NVPTXISD::Suld2DArrayV2I32Zero: 4726 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO; 4727 Ops.push_back(TexHandle); 4728 Ops.push_back(N->getOperand(2)); 4729 Ops.push_back(N->getOperand(3)); 4730 Ops.push_back(N->getOperand(4)); 4731 Ops.push_back(Chain); 4732 break; 4733 case NVPTXISD::Suld2DArrayV2I64Zero: 4734 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO; 4735 Ops.push_back(TexHandle); 4736 Ops.push_back(N->getOperand(2)); 4737 Ops.push_back(N->getOperand(3)); 4738 Ops.push_back(N->getOperand(4)); 4739 Ops.push_back(Chain); 4740 break; 4741 case NVPTXISD::Suld2DArrayV4I8Zero: 4742 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO; 4743 Ops.push_back(TexHandle); 4744 Ops.push_back(N->getOperand(2)); 4745 Ops.push_back(N->getOperand(3)); 4746 Ops.push_back(N->getOperand(4)); 4747 Ops.push_back(Chain); 4748 break; 4749 case NVPTXISD::Suld2DArrayV4I16Zero: 4750 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO; 4751 Ops.push_back(TexHandle); 4752 Ops.push_back(N->getOperand(2)); 4753 Ops.push_back(N->getOperand(3)); 4754 Ops.push_back(N->getOperand(4)); 4755 Ops.push_back(Chain); 4756 break; 4757 case NVPTXISD::Suld2DArrayV4I32Zero: 4758 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO; 4759 Ops.push_back(TexHandle); 4760 Ops.push_back(N->getOperand(2)); 4761 Ops.push_back(N->getOperand(3)); 4762 Ops.push_back(N->getOperand(4)); 4763 Ops.push_back(Chain); 4764 break; 4765 case NVPTXISD::Suld3DI8Zero: 4766 Opc = NVPTX::SULD_3D_I8_ZERO; 4767 Ops.push_back(TexHandle); 4768 Ops.push_back(N->getOperand(2)); 4769 Ops.push_back(N->getOperand(3)); 4770 Ops.push_back(N->getOperand(4)); 4771 Ops.push_back(Chain); 4772 break; 4773 case NVPTXISD::Suld3DI16Zero: 4774 Opc = NVPTX::SULD_3D_I16_ZERO; 4775 Ops.push_back(TexHandle); 4776 Ops.push_back(N->getOperand(2)); 4777 Ops.push_back(N->getOperand(3)); 4778 Ops.push_back(N->getOperand(4)); 4779 Ops.push_back(Chain); 4780 break; 4781 case NVPTXISD::Suld3DI32Zero: 4782 Opc = NVPTX::SULD_3D_I32_ZERO; 4783 Ops.push_back(TexHandle); 4784 Ops.push_back(N->getOperand(2)); 4785 Ops.push_back(N->getOperand(3)); 4786 Ops.push_back(N->getOperand(4)); 4787 Ops.push_back(Chain); 4788 break; 4789 case NVPTXISD::Suld3DI64Zero: 4790 Opc = NVPTX::SULD_3D_I64_ZERO; 4791 Ops.push_back(TexHandle); 4792 Ops.push_back(N->getOperand(2)); 4793 Ops.push_back(N->getOperand(3)); 4794 Ops.push_back(N->getOperand(4)); 4795 Ops.push_back(Chain); 4796 break; 4797 case NVPTXISD::Suld3DV2I8Zero: 4798 Opc = NVPTX::SULD_3D_V2I8_ZERO; 4799 Ops.push_back(TexHandle); 4800 Ops.push_back(N->getOperand(2)); 4801 Ops.push_back(N->getOperand(3)); 4802 Ops.push_back(N->getOperand(4)); 4803 Ops.push_back(Chain); 4804 break; 4805 case NVPTXISD::Suld3DV2I16Zero: 4806 Opc = NVPTX::SULD_3D_V2I16_ZERO; 4807 Ops.push_back(TexHandle); 4808 Ops.push_back(N->getOperand(2)); 4809 Ops.push_back(N->getOperand(3)); 4810 Ops.push_back(N->getOperand(4)); 4811 Ops.push_back(Chain); 4812 break; 4813 case NVPTXISD::Suld3DV2I32Zero: 4814 Opc = NVPTX::SULD_3D_V2I32_ZERO; 4815 Ops.push_back(TexHandle); 4816 Ops.push_back(N->getOperand(2)); 4817 Ops.push_back(N->getOperand(3)); 4818 Ops.push_back(N->getOperand(4)); 4819 Ops.push_back(Chain); 4820 break; 4821 case NVPTXISD::Suld3DV2I64Zero: 4822 Opc = NVPTX::SULD_3D_V2I64_ZERO; 4823 Ops.push_back(TexHandle); 4824 Ops.push_back(N->getOperand(2)); 4825 Ops.push_back(N->getOperand(3)); 4826 Ops.push_back(N->getOperand(4)); 4827 Ops.push_back(Chain); 4828 break; 4829 case NVPTXISD::Suld3DV4I8Zero: 4830 Opc = NVPTX::SULD_3D_V4I8_ZERO; 4831 Ops.push_back(TexHandle); 4832 Ops.push_back(N->getOperand(2)); 4833 Ops.push_back(N->getOperand(3)); 4834 Ops.push_back(N->getOperand(4)); 4835 Ops.push_back(Chain); 4836 break; 4837 case NVPTXISD::Suld3DV4I16Zero: 4838 Opc = NVPTX::SULD_3D_V4I16_ZERO; 4839 Ops.push_back(TexHandle); 4840 Ops.push_back(N->getOperand(2)); 4841 Ops.push_back(N->getOperand(3)); 4842 Ops.push_back(N->getOperand(4)); 4843 Ops.push_back(Chain); 4844 break; 4845 case NVPTXISD::Suld3DV4I32Zero: 4846 Opc = NVPTX::SULD_3D_V4I32_ZERO; 4847 Ops.push_back(TexHandle); 4848 Ops.push_back(N->getOperand(2)); 4849 Ops.push_back(N->getOperand(3)); 4850 Ops.push_back(N->getOperand(4)); 4851 Ops.push_back(Chain); 4852 break; 4853 } 4854 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops)); 4855 return true; 4856 } 4857 4858 4859 /// SelectBFE - Look for instruction sequences that can be made more efficient 4860 /// by using the 'bfe' (bit-field extract) PTX instruction 4861 bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) { 4862 SDLoc DL(N); 4863 SDValue LHS = N->getOperand(0); 4864 SDValue RHS = N->getOperand(1); 4865 SDValue Len; 4866 SDValue Start; 4867 SDValue Val; 4868 bool IsSigned = false; 4869 4870 if (N->getOpcode() == ISD::AND) { 4871 // Canonicalize the operands 4872 // We want 'and %val, %mask' 4873 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) { 4874 std::swap(LHS, RHS); 4875 } 4876 4877 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS); 4878 if (!Mask) { 4879 // We need a constant mask on the RHS of the AND 4880 return false; 4881 } 4882 4883 // Extract the mask bits 4884 uint64_t MaskVal = Mask->getZExtValue(); 4885 if (!isMask_64(MaskVal)) { 4886 // We *could* handle shifted masks here, but doing so would require an 4887 // 'and' operation to fix up the low-order bits so we would trade 4888 // shr+and for bfe+and, which has the same throughput 4889 return false; 4890 } 4891 4892 // How many bits are in our mask? 4893 uint64_t NumBits = countTrailingOnes(MaskVal); 4894 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32); 4895 4896 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) { 4897 // We have a 'srl/and' pair, extract the effective start bit and length 4898 Val = LHS.getNode()->getOperand(0); 4899 Start = LHS.getNode()->getOperand(1); 4900 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start); 4901 if (StartConst) { 4902 uint64_t StartVal = StartConst->getZExtValue(); 4903 // How many "good" bits do we have left? "good" is defined here as bits 4904 // that exist in the original value, not shifted in. 4905 uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal; 4906 if (NumBits > GoodBits) { 4907 // Do not handle the case where bits have been shifted in. In theory 4908 // we could handle this, but the cost is likely higher than just 4909 // emitting the srl/and pair. 4910 return false; 4911 } 4912 Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32); 4913 } else { 4914 // Do not handle the case where the shift amount (can be zero if no srl 4915 // was found) is not constant. We could handle this case, but it would 4916 // require run-time logic that would be more expensive than just 4917 // emitting the srl/and pair. 4918 return false; 4919 } 4920 } else { 4921 // Do not handle the case where the LHS of the and is not a shift. While 4922 // it would be trivial to handle this case, it would just transform 4923 // 'and' -> 'bfe', but 'and' has higher-throughput. 4924 return false; 4925 } 4926 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) { 4927 if (LHS->getOpcode() == ISD::AND) { 4928 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS); 4929 if (!ShiftCnst) { 4930 // Shift amount must be constant 4931 return false; 4932 } 4933 4934 uint64_t ShiftAmt = ShiftCnst->getZExtValue(); 4935 4936 SDValue AndLHS = LHS->getOperand(0); 4937 SDValue AndRHS = LHS->getOperand(1); 4938 4939 // Canonicalize the AND to have the mask on the RHS 4940 if (isa<ConstantSDNode>(AndLHS)) { 4941 std::swap(AndLHS, AndRHS); 4942 } 4943 4944 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS); 4945 if (!MaskCnst) { 4946 // Mask must be constant 4947 return false; 4948 } 4949 4950 uint64_t MaskVal = MaskCnst->getZExtValue(); 4951 uint64_t NumZeros; 4952 uint64_t NumBits; 4953 if (isMask_64(MaskVal)) { 4954 NumZeros = 0; 4955 // The number of bits in the result bitfield will be the number of 4956 // trailing ones (the AND) minus the number of bits we shift off 4957 NumBits = countTrailingOnes(MaskVal) - ShiftAmt; 4958 } else if (isShiftedMask_64(MaskVal)) { 4959 NumZeros = countTrailingZeros(MaskVal); 4960 unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros); 4961 // The number of bits in the result bitfield will be the number of 4962 // trailing zeros plus the number of set bits in the mask minus the 4963 // number of bits we shift off 4964 NumBits = NumZeros + NumOnes - ShiftAmt; 4965 } else { 4966 // This is not a mask we can handle 4967 return false; 4968 } 4969 4970 if (ShiftAmt < NumZeros) { 4971 // Handling this case would require extra logic that would make this 4972 // transformation non-profitable 4973 return false; 4974 } 4975 4976 Val = AndLHS; 4977 Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32); 4978 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32); 4979 } else if (LHS->getOpcode() == ISD::SHL) { 4980 // Here, we have a pattern like: 4981 // 4982 // (sra (shl val, NN), MM) 4983 // or 4984 // (srl (shl val, NN), MM) 4985 // 4986 // If MM >= NN, we can efficiently optimize this with bfe 4987 Val = LHS->getOperand(0); 4988 4989 SDValue ShlRHS = LHS->getOperand(1); 4990 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS); 4991 if (!ShlCnst) { 4992 // Shift amount must be constant 4993 return false; 4994 } 4995 uint64_t InnerShiftAmt = ShlCnst->getZExtValue(); 4996 4997 SDValue ShrRHS = RHS; 4998 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS); 4999 if (!ShrCnst) { 5000 // Shift amount must be constant 5001 return false; 5002 } 5003 uint64_t OuterShiftAmt = ShrCnst->getZExtValue(); 5004 5005 // To avoid extra codegen and be profitable, we need Outer >= Inner 5006 if (OuterShiftAmt < InnerShiftAmt) { 5007 return false; 5008 } 5009 5010 // If the outer shift is more than the type size, we have no bitfield to 5011 // extract (since we also check that the inner shift is <= the outer shift 5012 // then this also implies that the inner shift is < the type size) 5013 if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) { 5014 return false; 5015 } 5016 5017 Start = 5018 CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, MVT::i32); 5019 Len = 5020 CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() - 5021 OuterShiftAmt, DL, MVT::i32); 5022 5023 if (N->getOpcode() == ISD::SRA) { 5024 // If we have a arithmetic right shift, we need to use the signed bfe 5025 // variant 5026 IsSigned = true; 5027 } 5028 } else { 5029 // No can do... 5030 return false; 5031 } 5032 } else { 5033 // No can do... 5034 return false; 5035 } 5036 5037 5038 unsigned Opc; 5039 // For the BFE operations we form here from "and" and "srl", always use the 5040 // unsigned variants. 5041 if (Val.getValueType() == MVT::i32) { 5042 if (IsSigned) { 5043 Opc = NVPTX::BFE_S32rii; 5044 } else { 5045 Opc = NVPTX::BFE_U32rii; 5046 } 5047 } else if (Val.getValueType() == MVT::i64) { 5048 if (IsSigned) { 5049 Opc = NVPTX::BFE_S64rii; 5050 } else { 5051 Opc = NVPTX::BFE_U64rii; 5052 } 5053 } else { 5054 // We cannot handle this type 5055 return false; 5056 } 5057 5058 SDValue Ops[] = { 5059 Val, Start, Len 5060 }; 5061 5062 ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops)); 5063 return true; 5064 } 5065 5066 // SelectDirectAddr - Match a direct address for DAG. 5067 // A direct address could be a globaladdress or externalsymbol. 5068 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { 5069 // Return true if TGA or ES. 5070 if (N.getOpcode() == ISD::TargetGlobalAddress || 5071 N.getOpcode() == ISD::TargetExternalSymbol) { 5072 Address = N; 5073 return true; 5074 } 5075 if (N.getOpcode() == NVPTXISD::Wrapper) { 5076 Address = N.getOperand(0); 5077 return true; 5078 } 5079 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) { 5080 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue(); 5081 if (IID == Intrinsic::nvvm_ptr_gen_to_param) 5082 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam) 5083 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address)); 5084 } 5085 return false; 5086 } 5087 5088 // symbol+offset 5089 bool NVPTXDAGToDAGISel::SelectADDRsi_imp( 5090 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { 5091 if (Addr.getOpcode() == ISD::ADD) { 5092 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { 5093 SDValue base = Addr.getOperand(0); 5094 if (SelectDirectAddr(base, Base)) { 5095 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode), 5096 mvt); 5097 return true; 5098 } 5099 } 5100 } 5101 return false; 5102 } 5103 5104 // symbol+offset 5105 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr, 5106 SDValue &Base, SDValue &Offset) { 5107 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32); 5108 } 5109 5110 // symbol+offset 5111 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr, 5112 SDValue &Base, SDValue &Offset) { 5113 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64); 5114 } 5115 5116 // register+offset 5117 bool NVPTXDAGToDAGISel::SelectADDRri_imp( 5118 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { 5119 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 5120 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); 5121 Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt); 5122 return true; 5123 } 5124 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 5125 Addr.getOpcode() == ISD::TargetGlobalAddress) 5126 return false; // direct calls. 5127 5128 if (Addr.getOpcode() == ISD::ADD) { 5129 if (SelectDirectAddr(Addr.getOperand(0), Addr)) { 5130 return false; 5131 } 5132 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { 5133 if (FrameIndexSDNode *FIN = 5134 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) 5135 // Constant offset from frame ref. 5136 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); 5137 else 5138 Base = Addr.getOperand(0); 5139 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode), 5140 mvt); 5141 return true; 5142 } 5143 } 5144 return false; 5145 } 5146 5147 // register+offset 5148 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr, 5149 SDValue &Base, SDValue &Offset) { 5150 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32); 5151 } 5152 5153 // register+offset 5154 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr, 5155 SDValue &Base, SDValue &Offset) { 5156 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64); 5157 } 5158 5159 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, 5160 unsigned int spN) const { 5161 const Value *Src = nullptr; 5162 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) { 5163 if (spN == 0 && mN->getMemOperand()->getPseudoValue()) 5164 return true; 5165 Src = mN->getMemOperand()->getValue(); 5166 } 5167 if (!Src) 5168 return false; 5169 if (auto *PT = dyn_cast<PointerType>(Src->getType())) 5170 return (PT->getAddressSpace() == spN); 5171 return false; 5172 } 5173 5174 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 5175 /// inline asm expressions. 5176 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand( 5177 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 5178 SDValue Op0, Op1; 5179 switch (ConstraintID) { 5180 default: 5181 return true; 5182 case InlineAsm::Constraint_m: // memory 5183 if (SelectDirectAddr(Op, Op0)) { 5184 OutOps.push_back(Op0); 5185 OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); 5186 return false; 5187 } 5188 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) { 5189 OutOps.push_back(Op0); 5190 OutOps.push_back(Op1); 5191 return false; 5192 } 5193 break; 5194 } 5195 return true; 5196 } 5197 5198 /// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a 5199 /// conversion from \p SrcTy to \p DestTy. 5200 unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy, 5201 bool IsSigned) { 5202 switch (SrcTy.SimpleTy) { 5203 default: 5204 llvm_unreachable("Unhandled source type"); 5205 case MVT::i8: 5206 switch (DestTy.SimpleTy) { 5207 default: 5208 llvm_unreachable("Unhandled dest type"); 5209 case MVT::i16: 5210 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8; 5211 case MVT::i32: 5212 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8; 5213 case MVT::i64: 5214 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8; 5215 } 5216 case MVT::i16: 5217 switch (DestTy.SimpleTy) { 5218 default: 5219 llvm_unreachable("Unhandled dest type"); 5220 case MVT::i8: 5221 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16; 5222 case MVT::i32: 5223 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16; 5224 case MVT::i64: 5225 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16; 5226 } 5227 case MVT::i32: 5228 switch (DestTy.SimpleTy) { 5229 default: 5230 llvm_unreachable("Unhandled dest type"); 5231 case MVT::i8: 5232 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32; 5233 case MVT::i16: 5234 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32; 5235 case MVT::i64: 5236 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32; 5237 } 5238 case MVT::i64: 5239 switch (DestTy.SimpleTy) { 5240 default: 5241 llvm_unreachable("Unhandled dest type"); 5242 case MVT::i8: 5243 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64; 5244 case MVT::i16: 5245 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64; 5246 case MVT::i32: 5247 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64; 5248 } 5249 } 5250 } 5251