Home | History | Annotate | Download | only in radeon
      1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //==-----------------------------------------------------------------------===//
      9 //
     10 // This file contains TargetLowering functions borrowed from AMDLI.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "AMDGPUISelLowering.h"
     15 #include "AMDGPURegisterInfo.h"
     16 #include "AMDILDevices.h"
     17 #include "AMDILIntrinsicInfo.h"
     18 #include "AMDGPUSubtarget.h"
     19 #include "AMDILUtilityFunctions.h"
     20 #include "llvm/CallingConv.h"
     21 #include "llvm/CodeGen/MachineFrameInfo.h"
     22 #include "llvm/CodeGen/MachineRegisterInfo.h"
     23 #include "llvm/CodeGen/PseudoSourceValue.h"
     24 #include "llvm/CodeGen/SelectionDAG.h"
     25 #include "llvm/CodeGen/SelectionDAGNodes.h"
     26 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
     27 #include "llvm/DerivedTypes.h"
     28 #include "llvm/Instructions.h"
     29 #include "llvm/Intrinsics.h"
     30 #include "llvm/Support/raw_ostream.h"
     31 #include "llvm/Target/TargetInstrInfo.h"
     32 #include "llvm/Target/TargetOptions.h"
     33 
     34 using namespace llvm;
     35 //===----------------------------------------------------------------------===//
     36 // Calling Convention Implementation
     37 //===----------------------------------------------------------------------===//
     38 #include "AMDGPUGenCallingConv.inc"
     39 
     40 //===----------------------------------------------------------------------===//
     41 // TargetLowering Implementation Help Functions End
     42 //===----------------------------------------------------------------------===//
     43 
     44 //===----------------------------------------------------------------------===//
     45 // TargetLowering Class Implementation Begins
     46 //===----------------------------------------------------------------------===//
     47 void AMDGPUTargetLowering::InitAMDILLowering()
     48 {
     49   int types[] =
     50   {
     51     (int)MVT::i8,
     52     (int)MVT::i16,
     53     (int)MVT::i32,
     54     (int)MVT::f32,
     55     (int)MVT::f64,
     56     (int)MVT::i64,
     57     (int)MVT::v2i8,
     58     (int)MVT::v4i8,
     59     (int)MVT::v2i16,
     60     (int)MVT::v4i16,
     61     (int)MVT::v4f32,
     62     (int)MVT::v4i32,
     63     (int)MVT::v2f32,
     64     (int)MVT::v2i32,
     65     (int)MVT::v2f64,
     66     (int)MVT::v2i64
     67   };
     68 
     69   int IntTypes[] =
     70   {
     71     (int)MVT::i8,
     72     (int)MVT::i16,
     73     (int)MVT::i32,
     74     (int)MVT::i64
     75   };
     76 
     77   int FloatTypes[] =
     78   {
     79     (int)MVT::f32,
     80     (int)MVT::f64
     81   };
     82 
     83   int VectorTypes[] =
     84   {
     85     (int)MVT::v2i8,
     86     (int)MVT::v4i8,
     87     (int)MVT::v2i16,
     88     (int)MVT::v4i16,
     89     (int)MVT::v4f32,
     90     (int)MVT::v4i32,
     91     (int)MVT::v2f32,
     92     (int)MVT::v2i32,
     93     (int)MVT::v2f64,
     94     (int)MVT::v2i64
     95   };
     96   size_t numTypes = sizeof(types) / sizeof(*types);
     97   size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
     98   size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
     99   size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
    100 
    101   const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
    102   // These are the current register classes that are
    103   // supported
    104 
    105   for (unsigned int x  = 0; x < numTypes; ++x) {
    106     MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
    107 
    108     //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
    109     // We cannot sextinreg, expand to shifts
    110     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
    111     setOperationAction(ISD::SUBE, VT, Expand);
    112     setOperationAction(ISD::SUBC, VT, Expand);
    113     setOperationAction(ISD::ADDE, VT, Expand);
    114     setOperationAction(ISD::ADDC, VT, Expand);
    115     setOperationAction(ISD::BRCOND, VT, Custom);
    116     setOperationAction(ISD::BR_JT, VT, Expand);
    117     setOperationAction(ISD::BRIND, VT, Expand);
    118     // TODO: Implement custom UREM/SREM routines
    119     setOperationAction(ISD::SREM, VT, Expand);
    120     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
    121     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
    122     if (VT != MVT::i64 && VT != MVT::v2i64) {
    123       setOperationAction(ISD::SDIV, VT, Custom);
    124     }
    125   }
    126   for (unsigned int x = 0; x < numFloatTypes; ++x) {
    127     MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
    128 
    129     // IL does not have these operations for floating point types
    130     setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
    131     setOperationAction(ISD::SETOLT, VT, Expand);
    132     setOperationAction(ISD::SETOGE, VT, Expand);
    133     setOperationAction(ISD::SETOGT, VT, Expand);
    134     setOperationAction(ISD::SETOLE, VT, Expand);
    135     setOperationAction(ISD::SETULT, VT, Expand);
    136     setOperationAction(ISD::SETUGE, VT, Expand);
    137     setOperationAction(ISD::SETUGT, VT, Expand);
    138     setOperationAction(ISD::SETULE, VT, Expand);
    139   }
    140 
    141   for (unsigned int x = 0; x < numIntTypes; ++x) {
    142     MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
    143 
    144     // GPU also does not have divrem function for signed or unsigned
    145     setOperationAction(ISD::SDIVREM, VT, Expand);
    146 
    147     // GPU does not have [S|U]MUL_LOHI functions as a single instruction
    148     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
    149     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
    150 
    151     // GPU doesn't have a rotl, rotr, or byteswap instruction
    152     setOperationAction(ISD::ROTR, VT, Expand);
    153     setOperationAction(ISD::BSWAP, VT, Expand);
    154 
    155     // GPU doesn't have any counting operators
    156     setOperationAction(ISD::CTPOP, VT, Expand);
    157     setOperationAction(ISD::CTTZ, VT, Expand);
    158     setOperationAction(ISD::CTLZ, VT, Expand);
    159   }
    160 
    161   for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
    162   {
    163     MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
    164 
    165     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
    166     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
    167     setOperationAction(ISD::SDIVREM, VT, Expand);
    168     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
    169     // setOperationAction(ISD::VSETCC, VT, Expand);
    170     setOperationAction(ISD::SELECT_CC, VT, Expand);
    171 
    172   }
    173   if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
    174     setOperationAction(ISD::MULHU, MVT::i64, Expand);
    175     setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
    176     setOperationAction(ISD::MULHS, MVT::i64, Expand);
    177     setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
    178     setOperationAction(ISD::ADD, MVT::v2i64, Expand);
    179     setOperationAction(ISD::SREM, MVT::v2i64, Expand);
    180     setOperationAction(ISD::Constant          , MVT::i64  , Legal);
    181     setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
    182     setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
    183     setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
    184     setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
    185     setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
    186   }
    187   if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
    188     // we support loading/storing v2f64 but not operations on the type
    189     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
    190     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
    191     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
    192     setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
    193     setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
    194     setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
    195     // We want to expand vector conversions into their scalar
    196     // counterparts.
    197     setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
    198     setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
    199     setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
    200     setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
    201     setOperationAction(ISD::FABS, MVT::f64, Expand);
    202     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
    203   }
    204   // TODO: Fix the UDIV24 algorithm so it works for these
    205   // types correctly. This needs vector comparisons
    206   // for this to work correctly.
    207   setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
    208   setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
    209   setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
    210   setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
    211   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
    212   setOperationAction(ISD::SUBC, MVT::Other, Expand);
    213   setOperationAction(ISD::ADDE, MVT::Other, Expand);
    214   setOperationAction(ISD::ADDC, MVT::Other, Expand);
    215   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
    216   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
    217   setOperationAction(ISD::BRIND, MVT::Other, Expand);
    218   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
    219 
    220   setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
    221 
    222   // Use the default implementation.
    223   setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
    224   setOperationAction(ISD::Constant          , MVT::i32    , Legal);
    225 
    226   setSchedulingPreference(Sched::RegPressure);
    227   setPow2DivIsCheap(false);
    228   setPrefLoopAlignment(16);
    229   setSelectIsExpensive(true);
    230   setJumpIsExpensive(true);
    231 
    232   maxStoresPerMemcpy  = 4096;
    233   maxStoresPerMemmove = 4096;
    234   maxStoresPerMemset  = 4096;
    235 
    236 #undef numTypes
    237 #undef numIntTypes
    238 #undef numVectorTypes
    239 #undef numFloatTypes
    240 }
    241 
    242 bool
    243 AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
    244     const CallInst &I, unsigned Intrinsic) const
    245 {
    246   return false;
    247 }
    248 // The backend supports 32 and 64 bit floating point immediates
    249 bool
    250 AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
    251 {
    252   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
    253       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
    254     return true;
    255   } else {
    256     return false;
    257   }
    258 }
    259 
    260 bool
    261 AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const
    262 {
    263   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
    264       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
    265     return false;
    266   } else {
    267     return true;
    268   }
    269 }
    270 
    271 
    272 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
    273 // be zero. Op is expected to be a target specific node. Used by DAG
    274 // combiner.
    275 
    276 void
    277 AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
    278     const SDValue Op,
    279     APInt &KnownZero,
    280     APInt &KnownOne,
    281     const SelectionDAG &DAG,
    282     unsigned Depth) const
    283 {
    284   APInt KnownZero2;
    285   APInt KnownOne2;
    286   KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
    287   switch (Op.getOpcode()) {
    288     default: break;
    289     case ISD::SELECT_CC:
    290              DAG.ComputeMaskedBits(
    291                  Op.getOperand(1),
    292                  KnownZero,
    293                  KnownOne,
    294                  Depth + 1
    295                  );
    296              DAG.ComputeMaskedBits(
    297                  Op.getOperand(0),
    298                  KnownZero2,
    299                  KnownOne2
    300                  );
    301              assert((KnownZero & KnownOne) == 0
    302                  && "Bits known to be one AND zero?");
    303              assert((KnownZero2 & KnownOne2) == 0
    304                  && "Bits known to be one AND zero?");
    305              // Only known if known in both the LHS and RHS
    306              KnownOne &= KnownOne2;
    307              KnownZero &= KnownZero2;
    308              break;
    309   };
    310 }
    311 
    312 //===----------------------------------------------------------------------===//
    313 //                           Other Lowering Hooks
    314 //===----------------------------------------------------------------------===//
    315 
    316 SDValue
    317 AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
    318 {
    319   EVT OVT = Op.getValueType();
    320   SDValue DST;
    321   if (OVT.getScalarType() == MVT::i64) {
    322     DST = LowerSDIV64(Op, DAG);
    323   } else if (OVT.getScalarType() == MVT::i32) {
    324     DST = LowerSDIV32(Op, DAG);
    325   } else if (OVT.getScalarType() == MVT::i16
    326       || OVT.getScalarType() == MVT::i8) {
    327     DST = LowerSDIV24(Op, DAG);
    328   } else {
    329     DST = SDValue(Op.getNode(), 0);
    330   }
    331   return DST;
    332 }
    333 
    334 SDValue
    335 AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
    336 {
    337   EVT OVT = Op.getValueType();
    338   SDValue DST;
    339   if (OVT.getScalarType() == MVT::i64) {
    340     DST = LowerSREM64(Op, DAG);
    341   } else if (OVT.getScalarType() == MVT::i32) {
    342     DST = LowerSREM32(Op, DAG);
    343   } else if (OVT.getScalarType() == MVT::i16) {
    344     DST = LowerSREM16(Op, DAG);
    345   } else if (OVT.getScalarType() == MVT::i8) {
    346     DST = LowerSREM8(Op, DAG);
    347   } else {
    348     DST = SDValue(Op.getNode(), 0);
    349   }
    350   return DST;
    351 }
    352 
    353 SDValue
    354 AMDGPUTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
    355 {
    356   EVT VT = Op.getValueType();
    357   SDValue Nodes1;
    358   SDValue second;
    359   SDValue third;
    360   SDValue fourth;
    361   DebugLoc DL = Op.getDebugLoc();
    362   Nodes1 = DAG.getNode(AMDGPUISD::VBUILD,
    363       DL,
    364       VT, Op.getOperand(0));
    365 #if 0
    366   bool allEqual = true;
    367   for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
    368     if (Op.getOperand(0) != Op.getOperand(x)) {
    369       allEqual = false;
    370       break;
    371     }
    372   }
    373   if (allEqual) {
    374     return Nodes1;
    375   }
    376 #endif
    377   switch(Op.getNumOperands()) {
    378     default:
    379     case 1:
    380       break;
    381     case 4:
    382       fourth = Op.getOperand(3);
    383       if (fourth.getOpcode() != ISD::UNDEF) {
    384         Nodes1 = DAG.getNode(
    385             ISD::INSERT_VECTOR_ELT,
    386             DL,
    387             Op.getValueType(),
    388             Nodes1,
    389             fourth,
    390             DAG.getConstant(7, MVT::i32));
    391       }
    392     case 3:
    393       third = Op.getOperand(2);
    394       if (third.getOpcode() != ISD::UNDEF) {
    395         Nodes1 = DAG.getNode(
    396             ISD::INSERT_VECTOR_ELT,
    397             DL,
    398             Op.getValueType(),
    399             Nodes1,
    400             third,
    401             DAG.getConstant(6, MVT::i32));
    402       }
    403     case 2:
    404       second = Op.getOperand(1);
    405       if (second.getOpcode() != ISD::UNDEF) {
    406         Nodes1 = DAG.getNode(
    407             ISD::INSERT_VECTOR_ELT,
    408             DL,
    409             Op.getValueType(),
    410             Nodes1,
    411             second,
    412             DAG.getConstant(5, MVT::i32));
    413       }
    414       break;
    415   };
    416   return Nodes1;
    417 }
    418 
    419 SDValue
    420 AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
    421 {
    422   SDValue Data = Op.getOperand(0);
    423   VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
    424   DebugLoc DL = Op.getDebugLoc();
    425   EVT DVT = Data.getValueType();
    426   EVT BVT = BaseType->getVT();
    427   unsigned baseBits = BVT.getScalarType().getSizeInBits();
    428   unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
    429   unsigned shiftBits = srcBits - baseBits;
    430   if (srcBits < 32) {
    431     // If the op is less than 32 bits, then it needs to extend to 32bits
    432     // so it can properly keep the upper bits valid.
    433     EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
    434     Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
    435     shiftBits = 32 - baseBits;
    436     DVT = IVT;
    437   }
    438   SDValue Shift = DAG.getConstant(shiftBits, DVT);
    439   // Shift left by 'Shift' bits.
    440   Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
    441   // Signed shift Right by 'Shift' bits.
    442   Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
    443   if (srcBits < 32) {
    444     // Once the sign extension is done, the op needs to be converted to
    445     // its original type.
    446     Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
    447   }
    448   return Data;
    449 }
    450 EVT
    451 AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
    452 {
    453   int iSize = (size * numEle);
    454   int vEle = (iSize >> ((size == 64) ? 6 : 5));
    455   if (!vEle) {
    456     vEle = 1;
    457   }
    458   if (size == 64) {
    459     if (vEle == 1) {
    460       return EVT(MVT::i64);
    461     } else {
    462       return EVT(MVT::getVectorVT(MVT::i64, vEle));
    463     }
    464   } else {
    465     if (vEle == 1) {
    466       return EVT(MVT::i32);
    467     } else {
    468       return EVT(MVT::getVectorVT(MVT::i32, vEle));
    469     }
    470   }
    471 }
    472 
    473 SDValue
    474 AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
    475 {
    476   SDValue Chain = Op.getOperand(0);
    477   SDValue Cond  = Op.getOperand(1);
    478   SDValue Jump  = Op.getOperand(2);
    479   SDValue Result;
    480   Result = DAG.getNode(
    481       AMDGPUISD::BRANCH_COND,
    482       Op.getDebugLoc(),
    483       Op.getValueType(),
    484       Chain, Jump, Cond);
    485   return Result;
    486 }
    487 
    488 SDValue
    489 AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
    490 {
    491   DebugLoc DL = Op.getDebugLoc();
    492   EVT OVT = Op.getValueType();
    493   SDValue LHS = Op.getOperand(0);
    494   SDValue RHS = Op.getOperand(1);
    495   MVT INTTY;
    496   MVT FLTTY;
    497   if (!OVT.isVector()) {
    498     INTTY = MVT::i32;
    499     FLTTY = MVT::f32;
    500   } else if (OVT.getVectorNumElements() == 2) {
    501     INTTY = MVT::v2i32;
    502     FLTTY = MVT::v2f32;
    503   } else if (OVT.getVectorNumElements() == 4) {
    504     INTTY = MVT::v4i32;
    505     FLTTY = MVT::v4f32;
    506   }
    507   unsigned bitsize = OVT.getScalarType().getSizeInBits();
    508   // char|short jq = ia ^ ib;
    509   SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
    510 
    511   // jq = jq >> (bitsize - 2)
    512   jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
    513 
    514   // jq = jq | 0x1
    515   jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
    516 
    517   // jq = (int)jq
    518   jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
    519 
    520   // int ia = (int)LHS;
    521   SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
    522 
    523   // int ib, (int)RHS;
    524   SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
    525 
    526   // float fa = (float)ia;
    527   SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
    528 
    529   // float fb = (float)ib;
    530   SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
    531 
    532   // float fq = native_divide(fa, fb);
    533   SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
    534 
    535   // fq = trunc(fq);
    536   fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
    537 
    538   // float fqneg = -fq;
    539   SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
    540 
    541   // float fr = mad(fqneg, fb, fa);
    542   SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
    543 
    544   // int iq = (int)fq;
    545   SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
    546 
    547   // fr = fabs(fr);
    548   fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
    549 
    550   // fb = fabs(fb);
    551   fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
    552 
    553   // int cv = fr >= fb;
    554   SDValue cv;
    555   if (INTTY == MVT::i32) {
    556     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
    557   } else {
    558     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
    559   }
    560   // jq = (cv ? jq : 0);
    561   jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
    562       DAG.getConstant(0, OVT));
    563   // dst = iq + jq;
    564   iq = DAG.getSExtOrTrunc(iq, DL, OVT);
    565   iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
    566   return iq;
    567 }
    568 
    569 SDValue
    570 AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
    571 {
    572   DebugLoc DL = Op.getDebugLoc();
    573   EVT OVT = Op.getValueType();
    574   SDValue LHS = Op.getOperand(0);
    575   SDValue RHS = Op.getOperand(1);
    576   // The LowerSDIV32 function generates equivalent to the following IL.
    577   // mov r0, LHS
    578   // mov r1, RHS
    579   // ilt r10, r0, 0
    580   // ilt r11, r1, 0
    581   // iadd r0, r0, r10
    582   // iadd r1, r1, r11
    583   // ixor r0, r0, r10
    584   // ixor r1, r1, r11
    585   // udiv r0, r0, r1
    586   // ixor r10, r10, r11
    587   // iadd r0, r0, r10
    588   // ixor DST, r0, r10
    589 
    590   // mov r0, LHS
    591   SDValue r0 = LHS;
    592 
    593   // mov r1, RHS
    594   SDValue r1 = RHS;
    595 
    596   // ilt r10, r0, 0
    597   SDValue r10 = DAG.getSelectCC(DL,
    598       r0, DAG.getConstant(0, OVT),
    599       DAG.getConstant(-1, MVT::i32),
    600       DAG.getConstant(0, MVT::i32),
    601       ISD::SETLT);
    602 
    603   // ilt r11, r1, 0
    604   SDValue r11 = DAG.getSelectCC(DL,
    605       r1, DAG.getConstant(0, OVT),
    606       DAG.getConstant(-1, MVT::i32),
    607       DAG.getConstant(0, MVT::i32),
    608       ISD::SETLT);
    609 
    610   // iadd r0, r0, r10
    611   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
    612 
    613   // iadd r1, r1, r11
    614   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
    615 
    616   // ixor r0, r0, r10
    617   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
    618 
    619   // ixor r1, r1, r11
    620   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
    621 
    622   // udiv r0, r0, r1
    623   r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
    624 
    625   // ixor r10, r10, r11
    626   r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
    627 
    628   // iadd r0, r0, r10
    629   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
    630 
    631   // ixor DST, r0, r10
    632   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
    633   return DST;
    634 }
    635 
    636 SDValue
    637 AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
    638 {
    639   return SDValue(Op.getNode(), 0);
    640 }
    641 
    642 SDValue
    643 AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
    644 {
    645   DebugLoc DL = Op.getDebugLoc();
    646   EVT OVT = Op.getValueType();
    647   MVT INTTY = MVT::i32;
    648   if (OVT == MVT::v2i8) {
    649     INTTY = MVT::v2i32;
    650   } else if (OVT == MVT::v4i8) {
    651     INTTY = MVT::v4i32;
    652   }
    653   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
    654   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
    655   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
    656   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
    657   return LHS;
    658 }
    659 
    660 SDValue
    661 AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
    662 {
    663   DebugLoc DL = Op.getDebugLoc();
    664   EVT OVT = Op.getValueType();
    665   MVT INTTY = MVT::i32;
    666   if (OVT == MVT::v2i16) {
    667     INTTY = MVT::v2i32;
    668   } else if (OVT == MVT::v4i16) {
    669     INTTY = MVT::v4i32;
    670   }
    671   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
    672   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
    673   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
    674   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
    675   return LHS;
    676 }
    677 
    678 SDValue
    679 AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
    680 {
    681   DebugLoc DL = Op.getDebugLoc();
    682   EVT OVT = Op.getValueType();
    683   SDValue LHS = Op.getOperand(0);
    684   SDValue RHS = Op.getOperand(1);
    685   // The LowerSREM32 function generates equivalent to the following IL.
    686   // mov r0, LHS
    687   // mov r1, RHS
    688   // ilt r10, r0, 0
    689   // ilt r11, r1, 0
    690   // iadd r0, r0, r10
    691   // iadd r1, r1, r11
    692   // ixor r0, r0, r10
    693   // ixor r1, r1, r11
    694   // udiv r20, r0, r1
    695   // umul r20, r20, r1
    696   // sub r0, r0, r20
    697   // iadd r0, r0, r10
    698   // ixor DST, r0, r10
    699 
    700   // mov r0, LHS
    701   SDValue r0 = LHS;
    702 
    703   // mov r1, RHS
    704   SDValue r1 = RHS;
    705 
    706   // ilt r10, r0, 0
    707   SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
    708 
    709   // ilt r11, r1, 0
    710   SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
    711 
    712   // iadd r0, r0, r10
    713   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
    714 
    715   // iadd r1, r1, r11
    716   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
    717 
    718   // ixor r0, r0, r10
    719   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
    720 
    721   // ixor r1, r1, r11
    722   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
    723 
    724   // udiv r20, r0, r1
    725   SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
    726 
    727   // umul r20, r20, r1
    728   r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
    729 
    730   // sub r0, r0, r20
    731   r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
    732 
    733   // iadd r0, r0, r10
    734   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
    735 
    736   // ixor DST, r0, r10
    737   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
    738   return DST;
    739 }
    740 
    741 SDValue
    742 AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
    743 {
    744   return SDValue(Op.getNode(), 0);
    745 }
    746