1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief TargetLowering functions borrowed from AMDIL. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUISelLowering.h" 16 #include "AMDGPURegisterInfo.h" 17 #include "AMDGPUSubtarget.h" 18 #include "AMDILIntrinsicInfo.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/CodeGen/MachineRegisterInfo.h" 21 #include "llvm/CodeGen/PseudoSourceValue.h" 22 #include "llvm/CodeGen/SelectionDAG.h" 23 #include "llvm/CodeGen/SelectionDAGNodes.h" 24 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 25 #include "llvm/IR/CallingConv.h" 26 #include "llvm/IR/DerivedTypes.h" 27 #include "llvm/IR/Instructions.h" 28 #include "llvm/IR/Intrinsics.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include "llvm/Target/TargetInstrInfo.h" 31 #include "llvm/Target/TargetOptions.h" 32 33 using namespace llvm; 34 //===----------------------------------------------------------------------===// 35 // TargetLowering Implementation Help Functions End 36 //===----------------------------------------------------------------------===// 37 38 //===----------------------------------------------------------------------===// 39 // TargetLowering Class Implementation Begins 40 //===----------------------------------------------------------------------===// 41 void AMDGPUTargetLowering::InitAMDILLowering() { 42 static const int types[] = { 43 (int)MVT::i8, 44 (int)MVT::i16, 45 (int)MVT::i32, 46 (int)MVT::f32, 47 (int)MVT::f64, 48 (int)MVT::i64, 49 (int)MVT::v2i8, 50 (int)MVT::v4i8, 51 (int)MVT::v2i16, 52 (int)MVT::v4i16, 53 (int)MVT::v4f32, 54 (int)MVT::v4i32, 55 (int)MVT::v2f32, 56 (int)MVT::v2i32, 57 (int)MVT::v2f64, 58 (int)MVT::v2i64 59 }; 60 61 static const int IntTypes[] = { 62 (int)MVT::i8, 63 (int)MVT::i16, 64 (int)MVT::i32, 65 (int)MVT::i64 66 }; 67 68 static const int FloatTypes[] = { 69 (int)MVT::f32, 70 (int)MVT::f64 71 }; 72 73 static const int VectorTypes[] = { 74 (int)MVT::v2i8, 75 (int)MVT::v4i8, 76 (int)MVT::v2i16, 77 (int)MVT::v4i16, 78 (int)MVT::v4f32, 79 (int)MVT::v4i32, 80 (int)MVT::v2f32, 81 (int)MVT::v2i32, 82 (int)MVT::v2f64, 83 (int)MVT::v2i64 84 }; 85 const size_t NumTypes = array_lengthof(types); 86 const size_t NumFloatTypes = array_lengthof(FloatTypes); 87 const size_t NumIntTypes = array_lengthof(IntTypes); 88 const size_t NumVectorTypes = array_lengthof(VectorTypes); 89 90 const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>(); 91 // These are the current register classes that are 92 // supported 93 94 for (unsigned int x = 0; x < NumTypes; ++x) { 95 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; 96 97 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types 98 // We cannot sextinreg, expand to shifts 99 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); 100 setOperationAction(ISD::SUBE, VT, Expand); 101 setOperationAction(ISD::SUBC, VT, Expand); 102 setOperationAction(ISD::ADDE, VT, Expand); 103 setOperationAction(ISD::ADDC, VT, Expand); 104 setOperationAction(ISD::BRCOND, VT, Custom); 105 setOperationAction(ISD::BR_JT, VT, Expand); 106 setOperationAction(ISD::BRIND, VT, Expand); 107 // TODO: Implement custom UREM/SREM routines 108 setOperationAction(ISD::SREM, VT, Expand); 109 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 110 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 111 if (VT != MVT::i64 && VT != MVT::v2i64) { 112 setOperationAction(ISD::SDIV, VT, Custom); 113 } 114 } 115 for (unsigned int x = 0; x < NumFloatTypes; ++x) { 116 MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; 117 118 // IL does not have these operations for floating point types 119 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand); 120 setOperationAction(ISD::SETOLT, VT, Expand); 121 setOperationAction(ISD::SETOGE, VT, Expand); 122 setOperationAction(ISD::SETOGT, VT, Expand); 123 setOperationAction(ISD::SETOLE, VT, Expand); 124 setOperationAction(ISD::SETULT, VT, Expand); 125 setOperationAction(ISD::SETUGE, VT, Expand); 126 setOperationAction(ISD::SETUGT, VT, Expand); 127 setOperationAction(ISD::SETULE, VT, Expand); 128 } 129 130 for (unsigned int x = 0; x < NumIntTypes; ++x) { 131 MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x]; 132 133 // GPU also does not have divrem function for signed or unsigned 134 setOperationAction(ISD::SDIVREM, VT, Expand); 135 136 // GPU does not have [S|U]MUL_LOHI functions as a single instruction 137 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 138 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 139 140 setOperationAction(ISD::BSWAP, VT, Expand); 141 142 // GPU doesn't have any counting operators 143 setOperationAction(ISD::CTPOP, VT, Expand); 144 setOperationAction(ISD::CTTZ, VT, Expand); 145 setOperationAction(ISD::CTLZ, VT, Expand); 146 } 147 148 for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) { 149 MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii]; 150 151 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); 152 setOperationAction(ISD::SDIVREM, VT, Expand); 153 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 154 // setOperationAction(ISD::VSETCC, VT, Expand); 155 setOperationAction(ISD::SELECT_CC, VT, Expand); 156 157 } 158 setOperationAction(ISD::MULHU, MVT::i64, Expand); 159 setOperationAction(ISD::MULHU, MVT::v2i64, Expand); 160 setOperationAction(ISD::MULHS, MVT::i64, Expand); 161 setOperationAction(ISD::MULHS, MVT::v2i64, Expand); 162 setOperationAction(ISD::ADD, MVT::v2i64, Expand); 163 setOperationAction(ISD::SREM, MVT::v2i64, Expand); 164 setOperationAction(ISD::Constant , MVT::i64 , Legal); 165 setOperationAction(ISD::SDIV, MVT::v2i64, Expand); 166 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand); 167 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand); 168 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand); 169 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand); 170 if (STM.hasHWFP64()) { 171 // we support loading/storing v2f64 but not operations on the type 172 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 173 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 174 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 175 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand); 176 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); 177 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal); 178 // We want to expand vector conversions into their scalar 179 // counterparts. 180 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand); 181 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand); 182 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand); 183 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand); 184 setOperationAction(ISD::FABS, MVT::f64, Expand); 185 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 186 } 187 // TODO: Fix the UDIV24 algorithm so it works for these 188 // types correctly. This needs vector comparisons 189 // for this to work correctly. 190 setOperationAction(ISD::UDIV, MVT::v2i8, Expand); 191 setOperationAction(ISD::UDIV, MVT::v4i8, Expand); 192 setOperationAction(ISD::UDIV, MVT::v2i16, Expand); 193 setOperationAction(ISD::UDIV, MVT::v4i16, Expand); 194 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); 195 setOperationAction(ISD::SUBC, MVT::Other, Expand); 196 setOperationAction(ISD::ADDE, MVT::Other, Expand); 197 setOperationAction(ISD::ADDC, MVT::Other, Expand); 198 setOperationAction(ISD::BRCOND, MVT::Other, Custom); 199 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 200 setOperationAction(ISD::BRIND, MVT::Other, Expand); 201 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); 202 203 204 // Use the default implementation. 205 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal); 206 setOperationAction(ISD::Constant , MVT::i32 , Legal); 207 208 setSchedulingPreference(Sched::RegPressure); 209 setPow2DivIsCheap(false); 210 setSelectIsExpensive(true); 211 setJumpIsExpensive(true); 212 213 MaxStoresPerMemcpy = 4096; 214 MaxStoresPerMemmove = 4096; 215 MaxStoresPerMemset = 4096; 216 217 } 218 219 bool 220 AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 221 const CallInst &I, unsigned Intrinsic) const { 222 return false; 223 } 224 225 // The backend supports 32 and 64 bit floating point immediates 226 bool 227 AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 228 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 229 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { 230 return true; 231 } else { 232 return false; 233 } 234 } 235 236 bool 237 AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const { 238 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 239 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { 240 return false; 241 } else { 242 return true; 243 } 244 } 245 246 247 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to 248 // be zero. Op is expected to be a target specific node. Used by DAG 249 // combiner. 250 251 void 252 AMDGPUTargetLowering::computeMaskedBitsForTargetNode( 253 const SDValue Op, 254 APInt &KnownZero, 255 APInt &KnownOne, 256 const SelectionDAG &DAG, 257 unsigned Depth) const { 258 APInt KnownZero2; 259 APInt KnownOne2; 260 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything 261 switch (Op.getOpcode()) { 262 default: break; 263 case ISD::SELECT_CC: 264 DAG.ComputeMaskedBits( 265 Op.getOperand(1), 266 KnownZero, 267 KnownOne, 268 Depth + 1 269 ); 270 DAG.ComputeMaskedBits( 271 Op.getOperand(0), 272 KnownZero2, 273 KnownOne2 274 ); 275 assert((KnownZero & KnownOne) == 0 276 && "Bits known to be one AND zero?"); 277 assert((KnownZero2 & KnownOne2) == 0 278 && "Bits known to be one AND zero?"); 279 // Only known if known in both the LHS and RHS 280 KnownOne &= KnownOne2; 281 KnownZero &= KnownZero2; 282 break; 283 }; 284 } 285 286 //===----------------------------------------------------------------------===// 287 // Other Lowering Hooks 288 //===----------------------------------------------------------------------===// 289 290 SDValue 291 AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const { 292 EVT OVT = Op.getValueType(); 293 SDValue DST; 294 if (OVT.getScalarType() == MVT::i64) { 295 DST = LowerSDIV64(Op, DAG); 296 } else if (OVT.getScalarType() == MVT::i32) { 297 DST = LowerSDIV32(Op, DAG); 298 } else if (OVT.getScalarType() == MVT::i16 299 || OVT.getScalarType() == MVT::i8) { 300 DST = LowerSDIV24(Op, DAG); 301 } else { 302 DST = SDValue(Op.getNode(), 0); 303 } 304 return DST; 305 } 306 307 SDValue 308 AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const { 309 EVT OVT = Op.getValueType(); 310 SDValue DST; 311 if (OVT.getScalarType() == MVT::i64) { 312 DST = LowerSREM64(Op, DAG); 313 } else if (OVT.getScalarType() == MVT::i32) { 314 DST = LowerSREM32(Op, DAG); 315 } else if (OVT.getScalarType() == MVT::i16) { 316 DST = LowerSREM16(Op, DAG); 317 } else if (OVT.getScalarType() == MVT::i8) { 318 DST = LowerSREM8(Op, DAG); 319 } else { 320 DST = SDValue(Op.getNode(), 0); 321 } 322 return DST; 323 } 324 325 SDValue 326 AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { 327 SDValue Data = Op.getOperand(0); 328 VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1)); 329 SDLoc DL(Op); 330 EVT DVT = Data.getValueType(); 331 EVT BVT = BaseType->getVT(); 332 unsigned baseBits = BVT.getScalarType().getSizeInBits(); 333 unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1; 334 unsigned shiftBits = srcBits - baseBits; 335 if (srcBits < 32) { 336 // If the op is less than 32 bits, then it needs to extend to 32bits 337 // so it can properly keep the upper bits valid. 338 EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1); 339 Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data); 340 shiftBits = 32 - baseBits; 341 DVT = IVT; 342 } 343 SDValue Shift = DAG.getConstant(shiftBits, DVT); 344 // Shift left by 'Shift' bits. 345 Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift); 346 // Signed shift Right by 'Shift' bits. 347 Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift); 348 if (srcBits < 32) { 349 // Once the sign extension is done, the op needs to be converted to 350 // its original type. 351 Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType()); 352 } 353 return Data; 354 } 355 EVT 356 AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const { 357 int iSize = (size * numEle); 358 int vEle = (iSize >> ((size == 64) ? 6 : 5)); 359 if (!vEle) { 360 vEle = 1; 361 } 362 if (size == 64) { 363 if (vEle == 1) { 364 return EVT(MVT::i64); 365 } else { 366 return EVT(MVT::getVectorVT(MVT::i64, vEle)); 367 } 368 } else { 369 if (vEle == 1) { 370 return EVT(MVT::i32); 371 } else { 372 return EVT(MVT::getVectorVT(MVT::i32, vEle)); 373 } 374 } 375 } 376 377 SDValue 378 AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { 379 SDValue Chain = Op.getOperand(0); 380 SDValue Cond = Op.getOperand(1); 381 SDValue Jump = Op.getOperand(2); 382 SDValue Result; 383 Result = DAG.getNode( 384 AMDGPUISD::BRANCH_COND, 385 SDLoc(Op), 386 Op.getValueType(), 387 Chain, Jump, Cond); 388 return Result; 389 } 390 391 SDValue 392 AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const { 393 SDLoc DL(Op); 394 EVT OVT = Op.getValueType(); 395 SDValue LHS = Op.getOperand(0); 396 SDValue RHS = Op.getOperand(1); 397 MVT INTTY; 398 MVT FLTTY; 399 if (!OVT.isVector()) { 400 INTTY = MVT::i32; 401 FLTTY = MVT::f32; 402 } else if (OVT.getVectorNumElements() == 2) { 403 INTTY = MVT::v2i32; 404 FLTTY = MVT::v2f32; 405 } else if (OVT.getVectorNumElements() == 4) { 406 INTTY = MVT::v4i32; 407 FLTTY = MVT::v4f32; 408 } 409 unsigned bitsize = OVT.getScalarType().getSizeInBits(); 410 // char|short jq = ia ^ ib; 411 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS); 412 413 // jq = jq >> (bitsize - 2) 414 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT)); 415 416 // jq = jq | 0x1 417 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT)); 418 419 // jq = (int)jq 420 jq = DAG.getSExtOrTrunc(jq, DL, INTTY); 421 422 // int ia = (int)LHS; 423 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY); 424 425 // int ib, (int)RHS; 426 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY); 427 428 // float fa = (float)ia; 429 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); 430 431 // float fb = (float)ib; 432 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); 433 434 // float fq = native_divide(fa, fb); 435 SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb); 436 437 // fq = trunc(fq); 438 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); 439 440 // float fqneg = -fq; 441 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); 442 443 // float fr = mad(fqneg, fb, fa); 444 SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY, 445 DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa); 446 447 // int iq = (int)fq; 448 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); 449 450 // fr = fabs(fr); 451 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr); 452 453 // fb = fabs(fb); 454 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb); 455 456 // int cv = fr >= fb; 457 SDValue cv; 458 if (INTTY == MVT::i32) { 459 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); 460 } else { 461 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); 462 } 463 // jq = (cv ? jq : 0); 464 jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq, 465 DAG.getConstant(0, OVT)); 466 // dst = iq + jq; 467 iq = DAG.getSExtOrTrunc(iq, DL, OVT); 468 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq); 469 return iq; 470 } 471 472 SDValue 473 AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const { 474 SDLoc DL(Op); 475 EVT OVT = Op.getValueType(); 476 SDValue LHS = Op.getOperand(0); 477 SDValue RHS = Op.getOperand(1); 478 // The LowerSDIV32 function generates equivalent to the following IL. 479 // mov r0, LHS 480 // mov r1, RHS 481 // ilt r10, r0, 0 482 // ilt r11, r1, 0 483 // iadd r0, r0, r10 484 // iadd r1, r1, r11 485 // ixor r0, r0, r10 486 // ixor r1, r1, r11 487 // udiv r0, r0, r1 488 // ixor r10, r10, r11 489 // iadd r0, r0, r10 490 // ixor DST, r0, r10 491 492 // mov r0, LHS 493 SDValue r0 = LHS; 494 495 // mov r1, RHS 496 SDValue r1 = RHS; 497 498 // ilt r10, r0, 0 499 SDValue r10 = DAG.getSelectCC(DL, 500 r0, DAG.getConstant(0, OVT), 501 DAG.getConstant(-1, MVT::i32), 502 DAG.getConstant(0, MVT::i32), 503 ISD::SETLT); 504 505 // ilt r11, r1, 0 506 SDValue r11 = DAG.getSelectCC(DL, 507 r1, DAG.getConstant(0, OVT), 508 DAG.getConstant(-1, MVT::i32), 509 DAG.getConstant(0, MVT::i32), 510 ISD::SETLT); 511 512 // iadd r0, r0, r10 513 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 514 515 // iadd r1, r1, r11 516 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); 517 518 // ixor r0, r0, r10 519 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 520 521 // ixor r1, r1, r11 522 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); 523 524 // udiv r0, r0, r1 525 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1); 526 527 // ixor r10, r10, r11 528 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11); 529 530 // iadd r0, r0, r10 531 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 532 533 // ixor DST, r0, r10 534 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 535 return DST; 536 } 537 538 SDValue 539 AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const { 540 return SDValue(Op.getNode(), 0); 541 } 542 543 SDValue 544 AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const { 545 SDLoc DL(Op); 546 EVT OVT = Op.getValueType(); 547 MVT INTTY = MVT::i32; 548 if (OVT == MVT::v2i8) { 549 INTTY = MVT::v2i32; 550 } else if (OVT == MVT::v4i8) { 551 INTTY = MVT::v4i32; 552 } 553 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); 554 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); 555 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); 556 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); 557 return LHS; 558 } 559 560 SDValue 561 AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const { 562 SDLoc DL(Op); 563 EVT OVT = Op.getValueType(); 564 MVT INTTY = MVT::i32; 565 if (OVT == MVT::v2i16) { 566 INTTY = MVT::v2i32; 567 } else if (OVT == MVT::v4i16) { 568 INTTY = MVT::v4i32; 569 } 570 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); 571 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); 572 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); 573 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); 574 return LHS; 575 } 576 577 SDValue 578 AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const { 579 SDLoc DL(Op); 580 EVT OVT = Op.getValueType(); 581 SDValue LHS = Op.getOperand(0); 582 SDValue RHS = Op.getOperand(1); 583 // The LowerSREM32 function generates equivalent to the following IL. 584 // mov r0, LHS 585 // mov r1, RHS 586 // ilt r10, r0, 0 587 // ilt r11, r1, 0 588 // iadd r0, r0, r10 589 // iadd r1, r1, r11 590 // ixor r0, r0, r10 591 // ixor r1, r1, r11 592 // udiv r20, r0, r1 593 // umul r20, r20, r1 594 // sub r0, r0, r20 595 // iadd r0, r0, r10 596 // ixor DST, r0, r10 597 598 // mov r0, LHS 599 SDValue r0 = LHS; 600 601 // mov r1, RHS 602 SDValue r1 = RHS; 603 604 // ilt r10, r0, 0 605 SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT); 606 607 // ilt r11, r1, 0 608 SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT); 609 610 // iadd r0, r0, r10 611 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 612 613 // iadd r1, r1, r11 614 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); 615 616 // ixor r0, r0, r10 617 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 618 619 // ixor r1, r1, r11 620 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); 621 622 // udiv r20, r0, r1 623 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1); 624 625 // umul r20, r20, r1 626 r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1); 627 628 // sub r0, r0, r20 629 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20); 630 631 // iadd r0, r0, r10 632 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 633 634 // ixor DST, r0, r10 635 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 636 return DST; 637 } 638 639 SDValue 640 AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const { 641 return SDValue(Op.getNode(), 0); 642 } 643