1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 // This file contains TargetLowering functions borrowed from AMDLI. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPUISelLowering.h" 15 #include "AMDGPURegisterInfo.h" 16 #include "AMDILDevices.h" 17 #include "AMDILIntrinsicInfo.h" 18 #include "AMDGPUSubtarget.h" 19 #include "AMDILUtilityFunctions.h" 20 #include "llvm/CallingConv.h" 21 #include "llvm/CodeGen/MachineFrameInfo.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/PseudoSourceValue.h" 24 #include "llvm/CodeGen/SelectionDAG.h" 25 #include "llvm/CodeGen/SelectionDAGNodes.h" 26 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 27 #include "llvm/DerivedTypes.h" 28 #include "llvm/Instructions.h" 29 #include "llvm/Intrinsics.h" 30 #include "llvm/Support/raw_ostream.h" 31 #include "llvm/Target/TargetInstrInfo.h" 32 #include "llvm/Target/TargetOptions.h" 33 34 using namespace llvm; 35 //===----------------------------------------------------------------------===// 36 // Calling Convention Implementation 37 //===----------------------------------------------------------------------===// 38 #include "AMDGPUGenCallingConv.inc" 39 40 //===----------------------------------------------------------------------===// 41 // TargetLowering Implementation Help Functions End 42 //===----------------------------------------------------------------------===// 43 44 //===----------------------------------------------------------------------===// 45 // TargetLowering Class Implementation Begins 46 //===----------------------------------------------------------------------===// 47 void AMDGPUTargetLowering::InitAMDILLowering() 48 { 49 int types[] = 50 { 51 (int)MVT::i8, 52 (int)MVT::i16, 53 (int)MVT::i32, 54 (int)MVT::f32, 55 (int)MVT::f64, 56 (int)MVT::i64, 57 (int)MVT::v2i8, 58 (int)MVT::v4i8, 59 (int)MVT::v2i16, 60 (int)MVT::v4i16, 61 (int)MVT::v4f32, 62 (int)MVT::v4i32, 63 (int)MVT::v2f32, 64 (int)MVT::v2i32, 65 (int)MVT::v2f64, 66 (int)MVT::v2i64 67 }; 68 69 int IntTypes[] = 70 { 71 (int)MVT::i8, 72 (int)MVT::i16, 73 (int)MVT::i32, 74 (int)MVT::i64 75 }; 76 77 int FloatTypes[] = 78 { 79 (int)MVT::f32, 80 (int)MVT::f64 81 }; 82 83 int VectorTypes[] = 84 { 85 (int)MVT::v2i8, 86 (int)MVT::v4i8, 87 (int)MVT::v2i16, 88 (int)MVT::v4i16, 89 (int)MVT::v4f32, 90 (int)MVT::v4i32, 91 (int)MVT::v2f32, 92 (int)MVT::v2i32, 93 (int)MVT::v2f64, 94 (int)MVT::v2i64 95 }; 96 size_t numTypes = sizeof(types) / sizeof(*types); 97 size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes); 98 size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes); 99 size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes); 100 101 const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>(); 102 // These are the current register classes that are 103 // supported 104 105 for (unsigned int x = 0; x < numTypes; ++x) { 106 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; 107 108 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types 109 // We cannot sextinreg, expand to shifts 110 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); 111 setOperationAction(ISD::SUBE, VT, Expand); 112 setOperationAction(ISD::SUBC, VT, Expand); 113 setOperationAction(ISD::ADDE, VT, Expand); 114 setOperationAction(ISD::ADDC, VT, Expand); 115 setOperationAction(ISD::BRCOND, VT, Custom); 116 setOperationAction(ISD::BR_JT, VT, Expand); 117 setOperationAction(ISD::BRIND, VT, Expand); 118 // TODO: Implement custom UREM/SREM routines 119 setOperationAction(ISD::SREM, VT, Expand); 120 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 121 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 122 if (VT != MVT::i64 && VT != MVT::v2i64) { 123 setOperationAction(ISD::SDIV, VT, Custom); 124 } 125 } 126 for (unsigned int x = 0; x < numFloatTypes; ++x) { 127 MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; 128 129 // IL does not have these operations for floating point types 130 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand); 131 setOperationAction(ISD::SETOLT, VT, Expand); 132 setOperationAction(ISD::SETOGE, VT, Expand); 133 setOperationAction(ISD::SETOGT, VT, Expand); 134 setOperationAction(ISD::SETOLE, VT, Expand); 135 setOperationAction(ISD::SETULT, VT, Expand); 136 setOperationAction(ISD::SETUGE, VT, Expand); 137 setOperationAction(ISD::SETUGT, VT, Expand); 138 setOperationAction(ISD::SETULE, VT, Expand); 139 } 140 141 for (unsigned int x = 0; x < numIntTypes; ++x) { 142 MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x]; 143 144 // GPU also does not have divrem function for signed or unsigned 145 setOperationAction(ISD::SDIVREM, VT, Expand); 146 147 // GPU does not have [S|U]MUL_LOHI functions as a single instruction 148 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 149 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 150 151 // GPU doesn't have a rotl, rotr, or byteswap instruction 152 setOperationAction(ISD::ROTR, VT, Expand); 153 setOperationAction(ISD::BSWAP, VT, Expand); 154 155 // GPU doesn't have any counting operators 156 setOperationAction(ISD::CTPOP, VT, Expand); 157 setOperationAction(ISD::CTTZ, VT, Expand); 158 setOperationAction(ISD::CTLZ, VT, Expand); 159 } 160 161 for ( unsigned int ii = 0; ii < numVectorTypes; ++ii ) 162 { 163 MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii]; 164 165 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 166 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); 167 setOperationAction(ISD::SDIVREM, VT, Expand); 168 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 169 // setOperationAction(ISD::VSETCC, VT, Expand); 170 setOperationAction(ISD::SELECT_CC, VT, Expand); 171 172 } 173 if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) { 174 setOperationAction(ISD::MULHU, MVT::i64, Expand); 175 setOperationAction(ISD::MULHU, MVT::v2i64, Expand); 176 setOperationAction(ISD::MULHS, MVT::i64, Expand); 177 setOperationAction(ISD::MULHS, MVT::v2i64, Expand); 178 setOperationAction(ISD::ADD, MVT::v2i64, Expand); 179 setOperationAction(ISD::SREM, MVT::v2i64, Expand); 180 setOperationAction(ISD::Constant , MVT::i64 , Legal); 181 setOperationAction(ISD::SDIV, MVT::v2i64, Expand); 182 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand); 183 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand); 184 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand); 185 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand); 186 } 187 if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) { 188 // we support loading/storing v2f64 but not operations on the type 189 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 190 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 191 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 192 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand); 193 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); 194 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal); 195 // We want to expand vector conversions into their scalar 196 // counterparts. 197 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand); 198 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand); 199 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand); 200 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand); 201 setOperationAction(ISD::FABS, MVT::f64, Expand); 202 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 203 } 204 // TODO: Fix the UDIV24 algorithm so it works for these 205 // types correctly. This needs vector comparisons 206 // for this to work correctly. 207 setOperationAction(ISD::UDIV, MVT::v2i8, Expand); 208 setOperationAction(ISD::UDIV, MVT::v4i8, Expand); 209 setOperationAction(ISD::UDIV, MVT::v2i16, Expand); 210 setOperationAction(ISD::UDIV, MVT::v4i16, Expand); 211 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); 212 setOperationAction(ISD::SUBC, MVT::Other, Expand); 213 setOperationAction(ISD::ADDE, MVT::Other, Expand); 214 setOperationAction(ISD::ADDC, MVT::Other, Expand); 215 setOperationAction(ISD::BRCOND, MVT::Other, Custom); 216 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 217 setOperationAction(ISD::BRIND, MVT::Other, Expand); 218 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); 219 220 setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom); 221 222 // Use the default implementation. 223 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal); 224 setOperationAction(ISD::Constant , MVT::i32 , Legal); 225 226 setSchedulingPreference(Sched::RegPressure); 227 setPow2DivIsCheap(false); 228 setPrefLoopAlignment(16); 229 setSelectIsExpensive(true); 230 setJumpIsExpensive(true); 231 232 maxStoresPerMemcpy = 4096; 233 maxStoresPerMemmove = 4096; 234 maxStoresPerMemset = 4096; 235 236 #undef numTypes 237 #undef numIntTypes 238 #undef numVectorTypes 239 #undef numFloatTypes 240 } 241 242 bool 243 AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 244 const CallInst &I, unsigned Intrinsic) const 245 { 246 return false; 247 } 248 // The backend supports 32 and 64 bit floating point immediates 249 bool 250 AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const 251 { 252 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 253 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { 254 return true; 255 } else { 256 return false; 257 } 258 } 259 260 bool 261 AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const 262 { 263 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 264 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { 265 return false; 266 } else { 267 return true; 268 } 269 } 270 271 272 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to 273 // be zero. Op is expected to be a target specific node. Used by DAG 274 // combiner. 275 276 void 277 AMDGPUTargetLowering::computeMaskedBitsForTargetNode( 278 const SDValue Op, 279 APInt &KnownZero, 280 APInt &KnownOne, 281 const SelectionDAG &DAG, 282 unsigned Depth) const 283 { 284 APInt KnownZero2; 285 APInt KnownOne2; 286 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything 287 switch (Op.getOpcode()) { 288 default: break; 289 case ISD::SELECT_CC: 290 DAG.ComputeMaskedBits( 291 Op.getOperand(1), 292 KnownZero, 293 KnownOne, 294 Depth + 1 295 ); 296 DAG.ComputeMaskedBits( 297 Op.getOperand(0), 298 KnownZero2, 299 KnownOne2 300 ); 301 assert((KnownZero & KnownOne) == 0 302 && "Bits known to be one AND zero?"); 303 assert((KnownZero2 & KnownOne2) == 0 304 && "Bits known to be one AND zero?"); 305 // Only known if known in both the LHS and RHS 306 KnownOne &= KnownOne2; 307 KnownZero &= KnownZero2; 308 break; 309 }; 310 } 311 312 //===----------------------------------------------------------------------===// 313 // Other Lowering Hooks 314 //===----------------------------------------------------------------------===// 315 316 SDValue 317 AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const 318 { 319 EVT OVT = Op.getValueType(); 320 SDValue DST; 321 if (OVT.getScalarType() == MVT::i64) { 322 DST = LowerSDIV64(Op, DAG); 323 } else if (OVT.getScalarType() == MVT::i32) { 324 DST = LowerSDIV32(Op, DAG); 325 } else if (OVT.getScalarType() == MVT::i16 326 || OVT.getScalarType() == MVT::i8) { 327 DST = LowerSDIV24(Op, DAG); 328 } else { 329 DST = SDValue(Op.getNode(), 0); 330 } 331 return DST; 332 } 333 334 SDValue 335 AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const 336 { 337 EVT OVT = Op.getValueType(); 338 SDValue DST; 339 if (OVT.getScalarType() == MVT::i64) { 340 DST = LowerSREM64(Op, DAG); 341 } else if (OVT.getScalarType() == MVT::i32) { 342 DST = LowerSREM32(Op, DAG); 343 } else if (OVT.getScalarType() == MVT::i16) { 344 DST = LowerSREM16(Op, DAG); 345 } else if (OVT.getScalarType() == MVT::i8) { 346 DST = LowerSREM8(Op, DAG); 347 } else { 348 DST = SDValue(Op.getNode(), 0); 349 } 350 return DST; 351 } 352 353 SDValue 354 AMDGPUTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const 355 { 356 EVT VT = Op.getValueType(); 357 SDValue Nodes1; 358 SDValue second; 359 SDValue third; 360 SDValue fourth; 361 DebugLoc DL = Op.getDebugLoc(); 362 Nodes1 = DAG.getNode(AMDGPUISD::VBUILD, 363 DL, 364 VT, Op.getOperand(0)); 365 #if 0 366 bool allEqual = true; 367 for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) { 368 if (Op.getOperand(0) != Op.getOperand(x)) { 369 allEqual = false; 370 break; 371 } 372 } 373 if (allEqual) { 374 return Nodes1; 375 } 376 #endif 377 switch(Op.getNumOperands()) { 378 default: 379 case 1: 380 break; 381 case 4: 382 fourth = Op.getOperand(3); 383 if (fourth.getOpcode() != ISD::UNDEF) { 384 Nodes1 = DAG.getNode( 385 ISD::INSERT_VECTOR_ELT, 386 DL, 387 Op.getValueType(), 388 Nodes1, 389 fourth, 390 DAG.getConstant(7, MVT::i32)); 391 } 392 case 3: 393 third = Op.getOperand(2); 394 if (third.getOpcode() != ISD::UNDEF) { 395 Nodes1 = DAG.getNode( 396 ISD::INSERT_VECTOR_ELT, 397 DL, 398 Op.getValueType(), 399 Nodes1, 400 third, 401 DAG.getConstant(6, MVT::i32)); 402 } 403 case 2: 404 second = Op.getOperand(1); 405 if (second.getOpcode() != ISD::UNDEF) { 406 Nodes1 = DAG.getNode( 407 ISD::INSERT_VECTOR_ELT, 408 DL, 409 Op.getValueType(), 410 Nodes1, 411 second, 412 DAG.getConstant(5, MVT::i32)); 413 } 414 break; 415 }; 416 return Nodes1; 417 } 418 419 SDValue 420 AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const 421 { 422 SDValue Data = Op.getOperand(0); 423 VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1)); 424 DebugLoc DL = Op.getDebugLoc(); 425 EVT DVT = Data.getValueType(); 426 EVT BVT = BaseType->getVT(); 427 unsigned baseBits = BVT.getScalarType().getSizeInBits(); 428 unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1; 429 unsigned shiftBits = srcBits - baseBits; 430 if (srcBits < 32) { 431 // If the op is less than 32 bits, then it needs to extend to 32bits 432 // so it can properly keep the upper bits valid. 433 EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1); 434 Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data); 435 shiftBits = 32 - baseBits; 436 DVT = IVT; 437 } 438 SDValue Shift = DAG.getConstant(shiftBits, DVT); 439 // Shift left by 'Shift' bits. 440 Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift); 441 // Signed shift Right by 'Shift' bits. 442 Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift); 443 if (srcBits < 32) { 444 // Once the sign extension is done, the op needs to be converted to 445 // its original type. 446 Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType()); 447 } 448 return Data; 449 } 450 EVT 451 AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const 452 { 453 int iSize = (size * numEle); 454 int vEle = (iSize >> ((size == 64) ? 6 : 5)); 455 if (!vEle) { 456 vEle = 1; 457 } 458 if (size == 64) { 459 if (vEle == 1) { 460 return EVT(MVT::i64); 461 } else { 462 return EVT(MVT::getVectorVT(MVT::i64, vEle)); 463 } 464 } else { 465 if (vEle == 1) { 466 return EVT(MVT::i32); 467 } else { 468 return EVT(MVT::getVectorVT(MVT::i32, vEle)); 469 } 470 } 471 } 472 473 SDValue 474 AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const 475 { 476 SDValue Chain = Op.getOperand(0); 477 SDValue Cond = Op.getOperand(1); 478 SDValue Jump = Op.getOperand(2); 479 SDValue Result; 480 Result = DAG.getNode( 481 AMDGPUISD::BRANCH_COND, 482 Op.getDebugLoc(), 483 Op.getValueType(), 484 Chain, Jump, Cond); 485 return Result; 486 } 487 488 SDValue 489 AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const 490 { 491 DebugLoc DL = Op.getDebugLoc(); 492 EVT OVT = Op.getValueType(); 493 SDValue LHS = Op.getOperand(0); 494 SDValue RHS = Op.getOperand(1); 495 MVT INTTY; 496 MVT FLTTY; 497 if (!OVT.isVector()) { 498 INTTY = MVT::i32; 499 FLTTY = MVT::f32; 500 } else if (OVT.getVectorNumElements() == 2) { 501 INTTY = MVT::v2i32; 502 FLTTY = MVT::v2f32; 503 } else if (OVT.getVectorNumElements() == 4) { 504 INTTY = MVT::v4i32; 505 FLTTY = MVT::v4f32; 506 } 507 unsigned bitsize = OVT.getScalarType().getSizeInBits(); 508 // char|short jq = ia ^ ib; 509 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS); 510 511 // jq = jq >> (bitsize - 2) 512 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT)); 513 514 // jq = jq | 0x1 515 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT)); 516 517 // jq = (int)jq 518 jq = DAG.getSExtOrTrunc(jq, DL, INTTY); 519 520 // int ia = (int)LHS; 521 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY); 522 523 // int ib, (int)RHS; 524 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY); 525 526 // float fa = (float)ia; 527 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); 528 529 // float fb = (float)ib; 530 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); 531 532 // float fq = native_divide(fa, fb); 533 SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb); 534 535 // fq = trunc(fq); 536 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); 537 538 // float fqneg = -fq; 539 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); 540 541 // float fr = mad(fqneg, fb, fa); 542 SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa); 543 544 // int iq = (int)fq; 545 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); 546 547 // fr = fabs(fr); 548 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr); 549 550 // fb = fabs(fb); 551 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb); 552 553 // int cv = fr >= fb; 554 SDValue cv; 555 if (INTTY == MVT::i32) { 556 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); 557 } else { 558 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); 559 } 560 // jq = (cv ? jq : 0); 561 jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq, 562 DAG.getConstant(0, OVT)); 563 // dst = iq + jq; 564 iq = DAG.getSExtOrTrunc(iq, DL, OVT); 565 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq); 566 return iq; 567 } 568 569 SDValue 570 AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const 571 { 572 DebugLoc DL = Op.getDebugLoc(); 573 EVT OVT = Op.getValueType(); 574 SDValue LHS = Op.getOperand(0); 575 SDValue RHS = Op.getOperand(1); 576 // The LowerSDIV32 function generates equivalent to the following IL. 577 // mov r0, LHS 578 // mov r1, RHS 579 // ilt r10, r0, 0 580 // ilt r11, r1, 0 581 // iadd r0, r0, r10 582 // iadd r1, r1, r11 583 // ixor r0, r0, r10 584 // ixor r1, r1, r11 585 // udiv r0, r0, r1 586 // ixor r10, r10, r11 587 // iadd r0, r0, r10 588 // ixor DST, r0, r10 589 590 // mov r0, LHS 591 SDValue r0 = LHS; 592 593 // mov r1, RHS 594 SDValue r1 = RHS; 595 596 // ilt r10, r0, 0 597 SDValue r10 = DAG.getSelectCC(DL, 598 r0, DAG.getConstant(0, OVT), 599 DAG.getConstant(-1, MVT::i32), 600 DAG.getConstant(0, MVT::i32), 601 ISD::SETLT); 602 603 // ilt r11, r1, 0 604 SDValue r11 = DAG.getSelectCC(DL, 605 r1, DAG.getConstant(0, OVT), 606 DAG.getConstant(-1, MVT::i32), 607 DAG.getConstant(0, MVT::i32), 608 ISD::SETLT); 609 610 // iadd r0, r0, r10 611 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 612 613 // iadd r1, r1, r11 614 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); 615 616 // ixor r0, r0, r10 617 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 618 619 // ixor r1, r1, r11 620 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); 621 622 // udiv r0, r0, r1 623 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1); 624 625 // ixor r10, r10, r11 626 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11); 627 628 // iadd r0, r0, r10 629 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 630 631 // ixor DST, r0, r10 632 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 633 return DST; 634 } 635 636 SDValue 637 AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const 638 { 639 return SDValue(Op.getNode(), 0); 640 } 641 642 SDValue 643 AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const 644 { 645 DebugLoc DL = Op.getDebugLoc(); 646 EVT OVT = Op.getValueType(); 647 MVT INTTY = MVT::i32; 648 if (OVT == MVT::v2i8) { 649 INTTY = MVT::v2i32; 650 } else if (OVT == MVT::v4i8) { 651 INTTY = MVT::v4i32; 652 } 653 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); 654 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); 655 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); 656 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); 657 return LHS; 658 } 659 660 SDValue 661 AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const 662 { 663 DebugLoc DL = Op.getDebugLoc(); 664 EVT OVT = Op.getValueType(); 665 MVT INTTY = MVT::i32; 666 if (OVT == MVT::v2i16) { 667 INTTY = MVT::v2i32; 668 } else if (OVT == MVT::v4i16) { 669 INTTY = MVT::v4i32; 670 } 671 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); 672 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); 673 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); 674 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); 675 return LHS; 676 } 677 678 SDValue 679 AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const 680 { 681 DebugLoc DL = Op.getDebugLoc(); 682 EVT OVT = Op.getValueType(); 683 SDValue LHS = Op.getOperand(0); 684 SDValue RHS = Op.getOperand(1); 685 // The LowerSREM32 function generates equivalent to the following IL. 686 // mov r0, LHS 687 // mov r1, RHS 688 // ilt r10, r0, 0 689 // ilt r11, r1, 0 690 // iadd r0, r0, r10 691 // iadd r1, r1, r11 692 // ixor r0, r0, r10 693 // ixor r1, r1, r11 694 // udiv r20, r0, r1 695 // umul r20, r20, r1 696 // sub r0, r0, r20 697 // iadd r0, r0, r10 698 // ixor DST, r0, r10 699 700 // mov r0, LHS 701 SDValue r0 = LHS; 702 703 // mov r1, RHS 704 SDValue r1 = RHS; 705 706 // ilt r10, r0, 0 707 SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT); 708 709 // ilt r11, r1, 0 710 SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT); 711 712 // iadd r0, r0, r10 713 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 714 715 // iadd r1, r1, r11 716 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); 717 718 // ixor r0, r0, r10 719 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 720 721 // ixor r1, r1, r11 722 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); 723 724 // udiv r20, r0, r1 725 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1); 726 727 // umul r20, r20, r1 728 r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1); 729 730 // sub r0, r0, r20 731 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20); 732 733 // iadd r0, r0, r10 734 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 735 736 // ixor DST, r0, r10 737 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 738 return DST; 739 } 740 741 SDValue 742 AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const 743 { 744 return SDValue(Op.getNode(), 0); 745 } 746