1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief TargetLowering functions borrowed from AMDIL. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUISelLowering.h" 16 #include "AMDGPURegisterInfo.h" 17 #include "AMDGPUSubtarget.h" 18 #include "AMDILDevices.h" 19 #include "AMDILIntrinsicInfo.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineRegisterInfo.h" 22 #include "llvm/CodeGen/PseudoSourceValue.h" 23 #include "llvm/CodeGen/SelectionDAG.h" 24 #include "llvm/CodeGen/SelectionDAGNodes.h" 25 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 26 #include "llvm/IR/CallingConv.h" 27 #include "llvm/IR/DerivedTypes.h" 28 #include "llvm/IR/Instructions.h" 29 #include "llvm/IR/Intrinsics.h" 30 #include "llvm/Support/raw_ostream.h" 31 #include "llvm/Target/TargetInstrInfo.h" 32 #include "llvm/Target/TargetOptions.h" 33 34 using namespace llvm; 35 //===----------------------------------------------------------------------===// 36 // TargetLowering Implementation Help Functions End 37 //===----------------------------------------------------------------------===// 38 39 //===----------------------------------------------------------------------===// 40 // TargetLowering Class Implementation Begins 41 //===----------------------------------------------------------------------===// 42 void AMDGPUTargetLowering::InitAMDILLowering() { 43 int types[] = { 44 (int)MVT::i8, 45 (int)MVT::i16, 46 (int)MVT::i32, 47 (int)MVT::f32, 48 (int)MVT::f64, 49 (int)MVT::i64, 50 (int)MVT::v2i8, 51 (int)MVT::v4i8, 52 (int)MVT::v2i16, 53 (int)MVT::v4i16, 54 (int)MVT::v4f32, 55 (int)MVT::v4i32, 56 (int)MVT::v2f32, 57 (int)MVT::v2i32, 58 (int)MVT::v2f64, 59 (int)MVT::v2i64 60 }; 61 62 int IntTypes[] = { 63 (int)MVT::i8, 64 (int)MVT::i16, 65 (int)MVT::i32, 66 (int)MVT::i64 67 }; 68 69 int FloatTypes[] = { 70 (int)MVT::f32, 71 (int)MVT::f64 72 }; 73 74 int VectorTypes[] = { 75 (int)MVT::v2i8, 76 (int)MVT::v4i8, 77 (int)MVT::v2i16, 78 (int)MVT::v4i16, 79 (int)MVT::v4f32, 80 (int)MVT::v4i32, 81 (int)MVT::v2f32, 82 (int)MVT::v2i32, 83 (int)MVT::v2f64, 84 (int)MVT::v2i64 85 }; 86 size_t NumTypes = sizeof(types) / sizeof(*types); 87 size_t NumFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes); 88 size_t NumIntTypes = sizeof(IntTypes) / sizeof(*IntTypes); 89 size_t NumVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes); 90 91 const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>(); 92 // These are the current register classes that are 93 // supported 94 95 for (unsigned int x = 0; x < NumTypes; ++x) { 96 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; 97 98 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types 99 // We cannot sextinreg, expand to shifts 100 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); 101 setOperationAction(ISD::SUBE, VT, Expand); 102 setOperationAction(ISD::SUBC, VT, Expand); 103 setOperationAction(ISD::ADDE, VT, Expand); 104 setOperationAction(ISD::ADDC, VT, Expand); 105 setOperationAction(ISD::BRCOND, VT, Custom); 106 setOperationAction(ISD::BR_JT, VT, Expand); 107 setOperationAction(ISD::BRIND, VT, Expand); 108 // TODO: Implement custom UREM/SREM routines 109 setOperationAction(ISD::SREM, VT, Expand); 110 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 111 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 112 if (VT != MVT::i64 && VT != MVT::v2i64) { 113 setOperationAction(ISD::SDIV, VT, Custom); 114 } 115 } 116 for (unsigned int x = 0; x < NumFloatTypes; ++x) { 117 MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; 118 119 // IL does not have these operations for floating point types 120 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand); 121 setOperationAction(ISD::SETOLT, VT, Expand); 122 setOperationAction(ISD::SETOGE, VT, Expand); 123 setOperationAction(ISD::SETOGT, VT, Expand); 124 setOperationAction(ISD::SETOLE, VT, Expand); 125 setOperationAction(ISD::SETULT, VT, Expand); 126 setOperationAction(ISD::SETUGE, VT, Expand); 127 setOperationAction(ISD::SETUGT, VT, Expand); 128 setOperationAction(ISD::SETULE, VT, Expand); 129 } 130 131 for (unsigned int x = 0; x < NumIntTypes; ++x) { 132 MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x]; 133 134 // GPU also does not have divrem function for signed or unsigned 135 setOperationAction(ISD::SDIVREM, VT, Expand); 136 137 // GPU does not have [S|U]MUL_LOHI functions as a single instruction 138 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 139 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 140 141 // GPU doesn't have a rotl, rotr, or byteswap instruction 142 setOperationAction(ISD::ROTR, VT, Expand); 143 setOperationAction(ISD::BSWAP, VT, Expand); 144 145 // GPU doesn't have any counting operators 146 setOperationAction(ISD::CTPOP, VT, Expand); 147 setOperationAction(ISD::CTTZ, VT, Expand); 148 setOperationAction(ISD::CTLZ, VT, Expand); 149 } 150 151 for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) { 152 MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii]; 153 154 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); 155 setOperationAction(ISD::SDIVREM, VT, Expand); 156 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 157 // setOperationAction(ISD::VSETCC, VT, Expand); 158 setOperationAction(ISD::SELECT_CC, VT, Expand); 159 160 } 161 if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) { 162 setOperationAction(ISD::MULHU, MVT::i64, Expand); 163 setOperationAction(ISD::MULHU, MVT::v2i64, Expand); 164 setOperationAction(ISD::MULHS, MVT::i64, Expand); 165 setOperationAction(ISD::MULHS, MVT::v2i64, Expand); 166 setOperationAction(ISD::ADD, MVT::v2i64, Expand); 167 setOperationAction(ISD::SREM, MVT::v2i64, Expand); 168 setOperationAction(ISD::Constant , MVT::i64 , Legal); 169 setOperationAction(ISD::SDIV, MVT::v2i64, Expand); 170 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand); 171 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand); 172 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand); 173 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand); 174 } 175 if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) { 176 // we support loading/storing v2f64 but not operations on the type 177 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 178 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 179 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 180 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand); 181 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); 182 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal); 183 // We want to expand vector conversions into their scalar 184 // counterparts. 185 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand); 186 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand); 187 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand); 188 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand); 189 setOperationAction(ISD::FABS, MVT::f64, Expand); 190 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 191 } 192 // TODO: Fix the UDIV24 algorithm so it works for these 193 // types correctly. This needs vector comparisons 194 // for this to work correctly. 195 setOperationAction(ISD::UDIV, MVT::v2i8, Expand); 196 setOperationAction(ISD::UDIV, MVT::v4i8, Expand); 197 setOperationAction(ISD::UDIV, MVT::v2i16, Expand); 198 setOperationAction(ISD::UDIV, MVT::v4i16, Expand); 199 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); 200 setOperationAction(ISD::SUBC, MVT::Other, Expand); 201 setOperationAction(ISD::ADDE, MVT::Other, Expand); 202 setOperationAction(ISD::ADDC, MVT::Other, Expand); 203 setOperationAction(ISD::BRCOND, MVT::Other, Custom); 204 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 205 setOperationAction(ISD::BRIND, MVT::Other, Expand); 206 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); 207 208 209 // Use the default implementation. 210 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal); 211 setOperationAction(ISD::Constant , MVT::i32 , Legal); 212 213 setSchedulingPreference(Sched::RegPressure); 214 setPow2DivIsCheap(false); 215 setSelectIsExpensive(true); 216 setJumpIsExpensive(true); 217 218 MaxStoresPerMemcpy = 4096; 219 MaxStoresPerMemmove = 4096; 220 MaxStoresPerMemset = 4096; 221 222 } 223 224 bool 225 AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 226 const CallInst &I, unsigned Intrinsic) const { 227 return false; 228 } 229 230 // The backend supports 32 and 64 bit floating point immediates 231 bool 232 AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 233 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 234 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { 235 return true; 236 } else { 237 return false; 238 } 239 } 240 241 bool 242 AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const { 243 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 244 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { 245 return false; 246 } else { 247 return true; 248 } 249 } 250 251 252 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to 253 // be zero. Op is expected to be a target specific node. Used by DAG 254 // combiner. 255 256 void 257 AMDGPUTargetLowering::computeMaskedBitsForTargetNode( 258 const SDValue Op, 259 APInt &KnownZero, 260 APInt &KnownOne, 261 const SelectionDAG &DAG, 262 unsigned Depth) const { 263 APInt KnownZero2; 264 APInt KnownOne2; 265 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything 266 switch (Op.getOpcode()) { 267 default: break; 268 case ISD::SELECT_CC: 269 DAG.ComputeMaskedBits( 270 Op.getOperand(1), 271 KnownZero, 272 KnownOne, 273 Depth + 1 274 ); 275 DAG.ComputeMaskedBits( 276 Op.getOperand(0), 277 KnownZero2, 278 KnownOne2 279 ); 280 assert((KnownZero & KnownOne) == 0 281 && "Bits known to be one AND zero?"); 282 assert((KnownZero2 & KnownOne2) == 0 283 && "Bits known to be one AND zero?"); 284 // Only known if known in both the LHS and RHS 285 KnownOne &= KnownOne2; 286 KnownZero &= KnownZero2; 287 break; 288 }; 289 } 290 291 //===----------------------------------------------------------------------===// 292 // Other Lowering Hooks 293 //===----------------------------------------------------------------------===// 294 295 SDValue 296 AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const { 297 EVT OVT = Op.getValueType(); 298 SDValue DST; 299 if (OVT.getScalarType() == MVT::i64) { 300 DST = LowerSDIV64(Op, DAG); 301 } else if (OVT.getScalarType() == MVT::i32) { 302 DST = LowerSDIV32(Op, DAG); 303 } else if (OVT.getScalarType() == MVT::i16 304 || OVT.getScalarType() == MVT::i8) { 305 DST = LowerSDIV24(Op, DAG); 306 } else { 307 DST = SDValue(Op.getNode(), 0); 308 } 309 return DST; 310 } 311 312 SDValue 313 AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const { 314 EVT OVT = Op.getValueType(); 315 SDValue DST; 316 if (OVT.getScalarType() == MVT::i64) { 317 DST = LowerSREM64(Op, DAG); 318 } else if (OVT.getScalarType() == MVT::i32) { 319 DST = LowerSREM32(Op, DAG); 320 } else if (OVT.getScalarType() == MVT::i16) { 321 DST = LowerSREM16(Op, DAG); 322 } else if (OVT.getScalarType() == MVT::i8) { 323 DST = LowerSREM8(Op, DAG); 324 } else { 325 DST = SDValue(Op.getNode(), 0); 326 } 327 return DST; 328 } 329 330 SDValue 331 AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { 332 SDValue Data = Op.getOperand(0); 333 VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1)); 334 DebugLoc DL = Op.getDebugLoc(); 335 EVT DVT = Data.getValueType(); 336 EVT BVT = BaseType->getVT(); 337 unsigned baseBits = BVT.getScalarType().getSizeInBits(); 338 unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1; 339 unsigned shiftBits = srcBits - baseBits; 340 if (srcBits < 32) { 341 // If the op is less than 32 bits, then it needs to extend to 32bits 342 // so it can properly keep the upper bits valid. 343 EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1); 344 Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data); 345 shiftBits = 32 - baseBits; 346 DVT = IVT; 347 } 348 SDValue Shift = DAG.getConstant(shiftBits, DVT); 349 // Shift left by 'Shift' bits. 350 Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift); 351 // Signed shift Right by 'Shift' bits. 352 Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift); 353 if (srcBits < 32) { 354 // Once the sign extension is done, the op needs to be converted to 355 // its original type. 356 Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType()); 357 } 358 return Data; 359 } 360 EVT 361 AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const { 362 int iSize = (size * numEle); 363 int vEle = (iSize >> ((size == 64) ? 6 : 5)); 364 if (!vEle) { 365 vEle = 1; 366 } 367 if (size == 64) { 368 if (vEle == 1) { 369 return EVT(MVT::i64); 370 } else { 371 return EVT(MVT::getVectorVT(MVT::i64, vEle)); 372 } 373 } else { 374 if (vEle == 1) { 375 return EVT(MVT::i32); 376 } else { 377 return EVT(MVT::getVectorVT(MVT::i32, vEle)); 378 } 379 } 380 } 381 382 SDValue 383 AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { 384 SDValue Chain = Op.getOperand(0); 385 SDValue Cond = Op.getOperand(1); 386 SDValue Jump = Op.getOperand(2); 387 SDValue Result; 388 Result = DAG.getNode( 389 AMDGPUISD::BRANCH_COND, 390 Op.getDebugLoc(), 391 Op.getValueType(), 392 Chain, Jump, Cond); 393 return Result; 394 } 395 396 SDValue 397 AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const { 398 DebugLoc DL = Op.getDebugLoc(); 399 EVT OVT = Op.getValueType(); 400 SDValue LHS = Op.getOperand(0); 401 SDValue RHS = Op.getOperand(1); 402 MVT INTTY; 403 MVT FLTTY; 404 if (!OVT.isVector()) { 405 INTTY = MVT::i32; 406 FLTTY = MVT::f32; 407 } else if (OVT.getVectorNumElements() == 2) { 408 INTTY = MVT::v2i32; 409 FLTTY = MVT::v2f32; 410 } else if (OVT.getVectorNumElements() == 4) { 411 INTTY = MVT::v4i32; 412 FLTTY = MVT::v4f32; 413 } 414 unsigned bitsize = OVT.getScalarType().getSizeInBits(); 415 // char|short jq = ia ^ ib; 416 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS); 417 418 // jq = jq >> (bitsize - 2) 419 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT)); 420 421 // jq = jq | 0x1 422 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT)); 423 424 // jq = (int)jq 425 jq = DAG.getSExtOrTrunc(jq, DL, INTTY); 426 427 // int ia = (int)LHS; 428 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY); 429 430 // int ib, (int)RHS; 431 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY); 432 433 // float fa = (float)ia; 434 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); 435 436 // float fb = (float)ib; 437 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); 438 439 // float fq = native_divide(fa, fb); 440 SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb); 441 442 // fq = trunc(fq); 443 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); 444 445 // float fqneg = -fq; 446 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); 447 448 // float fr = mad(fqneg, fb, fa); 449 SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY, 450 DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa); 451 452 // int iq = (int)fq; 453 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); 454 455 // fr = fabs(fr); 456 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr); 457 458 // fb = fabs(fb); 459 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb); 460 461 // int cv = fr >= fb; 462 SDValue cv; 463 if (INTTY == MVT::i32) { 464 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); 465 } else { 466 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); 467 } 468 // jq = (cv ? jq : 0); 469 jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq, 470 DAG.getConstant(0, OVT)); 471 // dst = iq + jq; 472 iq = DAG.getSExtOrTrunc(iq, DL, OVT); 473 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq); 474 return iq; 475 } 476 477 SDValue 478 AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const { 479 DebugLoc DL = Op.getDebugLoc(); 480 EVT OVT = Op.getValueType(); 481 SDValue LHS = Op.getOperand(0); 482 SDValue RHS = Op.getOperand(1); 483 // The LowerSDIV32 function generates equivalent to the following IL. 484 // mov r0, LHS 485 // mov r1, RHS 486 // ilt r10, r0, 0 487 // ilt r11, r1, 0 488 // iadd r0, r0, r10 489 // iadd r1, r1, r11 490 // ixor r0, r0, r10 491 // ixor r1, r1, r11 492 // udiv r0, r0, r1 493 // ixor r10, r10, r11 494 // iadd r0, r0, r10 495 // ixor DST, r0, r10 496 497 // mov r0, LHS 498 SDValue r0 = LHS; 499 500 // mov r1, RHS 501 SDValue r1 = RHS; 502 503 // ilt r10, r0, 0 504 SDValue r10 = DAG.getSelectCC(DL, 505 r0, DAG.getConstant(0, OVT), 506 DAG.getConstant(-1, MVT::i32), 507 DAG.getConstant(0, MVT::i32), 508 ISD::SETLT); 509 510 // ilt r11, r1, 0 511 SDValue r11 = DAG.getSelectCC(DL, 512 r1, DAG.getConstant(0, OVT), 513 DAG.getConstant(-1, MVT::i32), 514 DAG.getConstant(0, MVT::i32), 515 ISD::SETLT); 516 517 // iadd r0, r0, r10 518 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 519 520 // iadd r1, r1, r11 521 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); 522 523 // ixor r0, r0, r10 524 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 525 526 // ixor r1, r1, r11 527 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); 528 529 // udiv r0, r0, r1 530 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1); 531 532 // ixor r10, r10, r11 533 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11); 534 535 // iadd r0, r0, r10 536 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 537 538 // ixor DST, r0, r10 539 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 540 return DST; 541 } 542 543 SDValue 544 AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const { 545 return SDValue(Op.getNode(), 0); 546 } 547 548 SDValue 549 AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const { 550 DebugLoc DL = Op.getDebugLoc(); 551 EVT OVT = Op.getValueType(); 552 MVT INTTY = MVT::i32; 553 if (OVT == MVT::v2i8) { 554 INTTY = MVT::v2i32; 555 } else if (OVT == MVT::v4i8) { 556 INTTY = MVT::v4i32; 557 } 558 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); 559 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); 560 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); 561 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); 562 return LHS; 563 } 564 565 SDValue 566 AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const { 567 DebugLoc DL = Op.getDebugLoc(); 568 EVT OVT = Op.getValueType(); 569 MVT INTTY = MVT::i32; 570 if (OVT == MVT::v2i16) { 571 INTTY = MVT::v2i32; 572 } else if (OVT == MVT::v4i16) { 573 INTTY = MVT::v4i32; 574 } 575 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); 576 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); 577 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); 578 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); 579 return LHS; 580 } 581 582 SDValue 583 AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const { 584 DebugLoc DL = Op.getDebugLoc(); 585 EVT OVT = Op.getValueType(); 586 SDValue LHS = Op.getOperand(0); 587 SDValue RHS = Op.getOperand(1); 588 // The LowerSREM32 function generates equivalent to the following IL. 589 // mov r0, LHS 590 // mov r1, RHS 591 // ilt r10, r0, 0 592 // ilt r11, r1, 0 593 // iadd r0, r0, r10 594 // iadd r1, r1, r11 595 // ixor r0, r0, r10 596 // ixor r1, r1, r11 597 // udiv r20, r0, r1 598 // umul r20, r20, r1 599 // sub r0, r0, r20 600 // iadd r0, r0, r10 601 // ixor DST, r0, r10 602 603 // mov r0, LHS 604 SDValue r0 = LHS; 605 606 // mov r1, RHS 607 SDValue r1 = RHS; 608 609 // ilt r10, r0, 0 610 SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT); 611 612 // ilt r11, r1, 0 613 SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT); 614 615 // iadd r0, r0, r10 616 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 617 618 // iadd r1, r1, r11 619 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); 620 621 // ixor r0, r0, r10 622 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 623 624 // ixor r1, r1, r11 625 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); 626 627 // udiv r20, r0, r1 628 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1); 629 630 // umul r20, r20, r1 631 r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1); 632 633 // sub r0, r0, r20 634 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20); 635 636 // iadd r0, r0, r10 637 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 638 639 // ixor DST, r0, r10 640 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 641 return DST; 642 } 643 644 SDValue 645 AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const { 646 return SDValue(Op.getNode(), 0); 647 } 648