Home | History | Annotate | Download | only in R600
      1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //==-----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// \brief TargetLowering functions borrowed from AMDIL.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "AMDGPUISelLowering.h"
     16 #include "AMDGPURegisterInfo.h"
     17 #include "AMDGPUSubtarget.h"
     18 #include "AMDILIntrinsicInfo.h"
     19 #include "llvm/CodeGen/MachineFrameInfo.h"
     20 #include "llvm/CodeGen/MachineRegisterInfo.h"
     21 #include "llvm/CodeGen/PseudoSourceValue.h"
     22 #include "llvm/CodeGen/SelectionDAG.h"
     23 #include "llvm/CodeGen/SelectionDAGNodes.h"
     24 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
     25 #include "llvm/IR/CallingConv.h"
     26 #include "llvm/IR/DerivedTypes.h"
     27 #include "llvm/IR/Instructions.h"
     28 #include "llvm/IR/Intrinsics.h"
     29 #include "llvm/Support/raw_ostream.h"
     30 #include "llvm/Target/TargetInstrInfo.h"
     31 #include "llvm/Target/TargetOptions.h"
     32 
     33 using namespace llvm;
     34 //===----------------------------------------------------------------------===//
     35 // TargetLowering Implementation Help Functions End
     36 //===----------------------------------------------------------------------===//
     37 
     38 //===----------------------------------------------------------------------===//
     39 // TargetLowering Class Implementation Begins
     40 //===----------------------------------------------------------------------===//
     41 void AMDGPUTargetLowering::InitAMDILLowering() {
     42   static const int types[] = {
     43     (int)MVT::i8,
     44     (int)MVT::i16,
     45     (int)MVT::i32,
     46     (int)MVT::f32,
     47     (int)MVT::f64,
     48     (int)MVT::i64,
     49     (int)MVT::v2i8,
     50     (int)MVT::v4i8,
     51     (int)MVT::v2i16,
     52     (int)MVT::v4i16,
     53     (int)MVT::v4f32,
     54     (int)MVT::v4i32,
     55     (int)MVT::v2f32,
     56     (int)MVT::v2i32,
     57     (int)MVT::v2f64,
     58     (int)MVT::v2i64
     59   };
     60 
     61   static const int IntTypes[] = {
     62     (int)MVT::i8,
     63     (int)MVT::i16,
     64     (int)MVT::i32,
     65     (int)MVT::i64
     66   };
     67 
     68   static const int FloatTypes[] = {
     69     (int)MVT::f32,
     70     (int)MVT::f64
     71   };
     72 
     73   static const int VectorTypes[] = {
     74     (int)MVT::v2i8,
     75     (int)MVT::v4i8,
     76     (int)MVT::v2i16,
     77     (int)MVT::v4i16,
     78     (int)MVT::v4f32,
     79     (int)MVT::v4i32,
     80     (int)MVT::v2f32,
     81     (int)MVT::v2i32,
     82     (int)MVT::v2f64,
     83     (int)MVT::v2i64
     84   };
     85   const size_t NumTypes = array_lengthof(types);
     86   const size_t NumFloatTypes = array_lengthof(FloatTypes);
     87   const size_t NumIntTypes = array_lengthof(IntTypes);
     88   const size_t NumVectorTypes = array_lengthof(VectorTypes);
     89 
     90   const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
     91   // These are the current register classes that are
     92   // supported
     93 
     94   for (unsigned int x  = 0; x < NumTypes; ++x) {
     95     MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
     96 
     97     //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
     98     // We cannot sextinreg, expand to shifts
     99     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
    100     setOperationAction(ISD::SUBE, VT, Expand);
    101     setOperationAction(ISD::SUBC, VT, Expand);
    102     setOperationAction(ISD::ADDE, VT, Expand);
    103     setOperationAction(ISD::ADDC, VT, Expand);
    104     setOperationAction(ISD::BRCOND, VT, Custom);
    105     setOperationAction(ISD::BR_JT, VT, Expand);
    106     setOperationAction(ISD::BRIND, VT, Expand);
    107     // TODO: Implement custom UREM/SREM routines
    108     setOperationAction(ISD::SREM, VT, Expand);
    109     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
    110     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
    111     if (VT != MVT::i64 && VT != MVT::v2i64) {
    112       setOperationAction(ISD::SDIV, VT, Custom);
    113     }
    114   }
    115   for (unsigned int x = 0; x < NumFloatTypes; ++x) {
    116     MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
    117 
    118     // IL does not have these operations for floating point types
    119     setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
    120     setOperationAction(ISD::SETOLT, VT, Expand);
    121     setOperationAction(ISD::SETOGE, VT, Expand);
    122     setOperationAction(ISD::SETOGT, VT, Expand);
    123     setOperationAction(ISD::SETOLE, VT, Expand);
    124     setOperationAction(ISD::SETULT, VT, Expand);
    125     setOperationAction(ISD::SETUGE, VT, Expand);
    126     setOperationAction(ISD::SETUGT, VT, Expand);
    127     setOperationAction(ISD::SETULE, VT, Expand);
    128   }
    129 
    130   for (unsigned int x = 0; x < NumIntTypes; ++x) {
    131     MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
    132 
    133     // GPU also does not have divrem function for signed or unsigned
    134     setOperationAction(ISD::SDIVREM, VT, Expand);
    135 
    136     // GPU does not have [S|U]MUL_LOHI functions as a single instruction
    137     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
    138     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
    139 
    140     setOperationAction(ISD::BSWAP, VT, Expand);
    141 
    142     // GPU doesn't have any counting operators
    143     setOperationAction(ISD::CTPOP, VT, Expand);
    144     setOperationAction(ISD::CTTZ, VT, Expand);
    145     setOperationAction(ISD::CTLZ, VT, Expand);
    146   }
    147 
    148   for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) {
    149     MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
    150 
    151     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
    152     setOperationAction(ISD::SDIVREM, VT, Expand);
    153     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
    154     // setOperationAction(ISD::VSETCC, VT, Expand);
    155     setOperationAction(ISD::SELECT_CC, VT, Expand);
    156 
    157   }
    158   setOperationAction(ISD::MULHU, MVT::i64, Expand);
    159   setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
    160   setOperationAction(ISD::MULHS, MVT::i64, Expand);
    161   setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
    162   setOperationAction(ISD::ADD, MVT::v2i64, Expand);
    163   setOperationAction(ISD::SREM, MVT::v2i64, Expand);
    164   setOperationAction(ISD::Constant          , MVT::i64  , Legal);
    165   setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
    166   setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
    167   setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
    168   setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
    169   setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
    170   if (STM.hasHWFP64()) {
    171     // we support loading/storing v2f64 but not operations on the type
    172     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
    173     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
    174     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
    175     setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
    176     setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
    177     setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
    178     // We want to expand vector conversions into their scalar
    179     // counterparts.
    180     setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
    181     setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
    182     setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
    183     setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
    184     setOperationAction(ISD::FABS, MVT::f64, Expand);
    185     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
    186   }
    187   // TODO: Fix the UDIV24 algorithm so it works for these
    188   // types correctly. This needs vector comparisons
    189   // for this to work correctly.
    190   setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
    191   setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
    192   setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
    193   setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
    194   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
    195   setOperationAction(ISD::SUBC, MVT::Other, Expand);
    196   setOperationAction(ISD::ADDE, MVT::Other, Expand);
    197   setOperationAction(ISD::ADDC, MVT::Other, Expand);
    198   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
    199   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
    200   setOperationAction(ISD::BRIND, MVT::Other, Expand);
    201   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
    202 
    203 
    204   // Use the default implementation.
    205   setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
    206   setOperationAction(ISD::Constant          , MVT::i32    , Legal);
    207 
    208   setSchedulingPreference(Sched::RegPressure);
    209   setPow2DivIsCheap(false);
    210   setSelectIsExpensive(true);
    211   setJumpIsExpensive(true);
    212 
    213   MaxStoresPerMemcpy  = 4096;
    214   MaxStoresPerMemmove = 4096;
    215   MaxStoresPerMemset  = 4096;
    216 
    217 }
    218 
    219 bool
    220 AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
    221     const CallInst &I, unsigned Intrinsic) const {
    222   return false;
    223 }
    224 
    225 // The backend supports 32 and 64 bit floating point immediates
    226 bool
    227 AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
    228   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
    229       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
    230     return true;
    231   } else {
    232     return false;
    233   }
    234 }
    235 
    236 bool
    237 AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
    238   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
    239       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
    240     return false;
    241   } else {
    242     return true;
    243   }
    244 }
    245 
    246 
    247 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
    248 // be zero. Op is expected to be a target specific node. Used by DAG
    249 // combiner.
    250 
    251 void
    252 AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
    253     const SDValue Op,
    254     APInt &KnownZero,
    255     APInt &KnownOne,
    256     const SelectionDAG &DAG,
    257     unsigned Depth) const {
    258   APInt KnownZero2;
    259   APInt KnownOne2;
    260   KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
    261   switch (Op.getOpcode()) {
    262     default: break;
    263     case ISD::SELECT_CC:
    264              DAG.ComputeMaskedBits(
    265                  Op.getOperand(1),
    266                  KnownZero,
    267                  KnownOne,
    268                  Depth + 1
    269                  );
    270              DAG.ComputeMaskedBits(
    271                  Op.getOperand(0),
    272                  KnownZero2,
    273                  KnownOne2
    274                  );
    275              assert((KnownZero & KnownOne) == 0
    276                  && "Bits known to be one AND zero?");
    277              assert((KnownZero2 & KnownOne2) == 0
    278                  && "Bits known to be one AND zero?");
    279              // Only known if known in both the LHS and RHS
    280              KnownOne &= KnownOne2;
    281              KnownZero &= KnownZero2;
    282              break;
    283   };
    284 }
    285 
    286 //===----------------------------------------------------------------------===//
    287 //                           Other Lowering Hooks
    288 //===----------------------------------------------------------------------===//
    289 
    290 SDValue
    291 AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
    292   EVT OVT = Op.getValueType();
    293   SDValue DST;
    294   if (OVT.getScalarType() == MVT::i64) {
    295     DST = LowerSDIV64(Op, DAG);
    296   } else if (OVT.getScalarType() == MVT::i32) {
    297     DST = LowerSDIV32(Op, DAG);
    298   } else if (OVT.getScalarType() == MVT::i16
    299       || OVT.getScalarType() == MVT::i8) {
    300     DST = LowerSDIV24(Op, DAG);
    301   } else {
    302     DST = SDValue(Op.getNode(), 0);
    303   }
    304   return DST;
    305 }
    306 
    307 SDValue
    308 AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
    309   EVT OVT = Op.getValueType();
    310   SDValue DST;
    311   if (OVT.getScalarType() == MVT::i64) {
    312     DST = LowerSREM64(Op, DAG);
    313   } else if (OVT.getScalarType() == MVT::i32) {
    314     DST = LowerSREM32(Op, DAG);
    315   } else if (OVT.getScalarType() == MVT::i16) {
    316     DST = LowerSREM16(Op, DAG);
    317   } else if (OVT.getScalarType() == MVT::i8) {
    318     DST = LowerSREM8(Op, DAG);
    319   } else {
    320     DST = SDValue(Op.getNode(), 0);
    321   }
    322   return DST;
    323 }
    324 
    325 SDValue
    326 AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const {
    327   SDValue Data = Op.getOperand(0);
    328   VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
    329   SDLoc DL(Op);
    330   EVT DVT = Data.getValueType();
    331   EVT BVT = BaseType->getVT();
    332   unsigned baseBits = BVT.getScalarType().getSizeInBits();
    333   unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
    334   unsigned shiftBits = srcBits - baseBits;
    335   if (srcBits < 32) {
    336     // If the op is less than 32 bits, then it needs to extend to 32bits
    337     // so it can properly keep the upper bits valid.
    338     EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
    339     Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
    340     shiftBits = 32 - baseBits;
    341     DVT = IVT;
    342   }
    343   SDValue Shift = DAG.getConstant(shiftBits, DVT);
    344   // Shift left by 'Shift' bits.
    345   Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
    346   // Signed shift Right by 'Shift' bits.
    347   Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
    348   if (srcBits < 32) {
    349     // Once the sign extension is done, the op needs to be converted to
    350     // its original type.
    351     Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
    352   }
    353   return Data;
    354 }
    355 EVT
    356 AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
    357   int iSize = (size * numEle);
    358   int vEle = (iSize >> ((size == 64) ? 6 : 5));
    359   if (!vEle) {
    360     vEle = 1;
    361   }
    362   if (size == 64) {
    363     if (vEle == 1) {
    364       return EVT(MVT::i64);
    365     } else {
    366       return EVT(MVT::getVectorVT(MVT::i64, vEle));
    367     }
    368   } else {
    369     if (vEle == 1) {
    370       return EVT(MVT::i32);
    371     } else {
    372       return EVT(MVT::getVectorVT(MVT::i32, vEle));
    373     }
    374   }
    375 }
    376 
    377 SDValue
    378 AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
    379   SDValue Chain = Op.getOperand(0);
    380   SDValue Cond  = Op.getOperand(1);
    381   SDValue Jump  = Op.getOperand(2);
    382   SDValue Result;
    383   Result = DAG.getNode(
    384       AMDGPUISD::BRANCH_COND,
    385       SDLoc(Op),
    386       Op.getValueType(),
    387       Chain, Jump, Cond);
    388   return Result;
    389 }
    390 
    391 SDValue
    392 AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
    393   SDLoc DL(Op);
    394   EVT OVT = Op.getValueType();
    395   SDValue LHS = Op.getOperand(0);
    396   SDValue RHS = Op.getOperand(1);
    397   MVT INTTY;
    398   MVT FLTTY;
    399   if (!OVT.isVector()) {
    400     INTTY = MVT::i32;
    401     FLTTY = MVT::f32;
    402   } else if (OVT.getVectorNumElements() == 2) {
    403     INTTY = MVT::v2i32;
    404     FLTTY = MVT::v2f32;
    405   } else if (OVT.getVectorNumElements() == 4) {
    406     INTTY = MVT::v4i32;
    407     FLTTY = MVT::v4f32;
    408   }
    409   unsigned bitsize = OVT.getScalarType().getSizeInBits();
    410   // char|short jq = ia ^ ib;
    411   SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
    412 
    413   // jq = jq >> (bitsize - 2)
    414   jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
    415 
    416   // jq = jq | 0x1
    417   jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
    418 
    419   // jq = (int)jq
    420   jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
    421 
    422   // int ia = (int)LHS;
    423   SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
    424 
    425   // int ib, (int)RHS;
    426   SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
    427 
    428   // float fa = (float)ia;
    429   SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
    430 
    431   // float fb = (float)ib;
    432   SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
    433 
    434   // float fq = native_divide(fa, fb);
    435   SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
    436 
    437   // fq = trunc(fq);
    438   fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
    439 
    440   // float fqneg = -fq;
    441   SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
    442 
    443   // float fr = mad(fqneg, fb, fa);
    444   SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
    445       DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
    446 
    447   // int iq = (int)fq;
    448   SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
    449 
    450   // fr = fabs(fr);
    451   fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
    452 
    453   // fb = fabs(fb);
    454   fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
    455 
    456   // int cv = fr >= fb;
    457   SDValue cv;
    458   if (INTTY == MVT::i32) {
    459     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
    460   } else {
    461     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
    462   }
    463   // jq = (cv ? jq : 0);
    464   jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
    465       DAG.getConstant(0, OVT));
    466   // dst = iq + jq;
    467   iq = DAG.getSExtOrTrunc(iq, DL, OVT);
    468   iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
    469   return iq;
    470 }
    471 
    472 SDValue
    473 AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
    474   SDLoc DL(Op);
    475   EVT OVT = Op.getValueType();
    476   SDValue LHS = Op.getOperand(0);
    477   SDValue RHS = Op.getOperand(1);
    478   // The LowerSDIV32 function generates equivalent to the following IL.
    479   // mov r0, LHS
    480   // mov r1, RHS
    481   // ilt r10, r0, 0
    482   // ilt r11, r1, 0
    483   // iadd r0, r0, r10
    484   // iadd r1, r1, r11
    485   // ixor r0, r0, r10
    486   // ixor r1, r1, r11
    487   // udiv r0, r0, r1
    488   // ixor r10, r10, r11
    489   // iadd r0, r0, r10
    490   // ixor DST, r0, r10
    491 
    492   // mov r0, LHS
    493   SDValue r0 = LHS;
    494 
    495   // mov r1, RHS
    496   SDValue r1 = RHS;
    497 
    498   // ilt r10, r0, 0
    499   SDValue r10 = DAG.getSelectCC(DL,
    500       r0, DAG.getConstant(0, OVT),
    501       DAG.getConstant(-1, MVT::i32),
    502       DAG.getConstant(0, MVT::i32),
    503       ISD::SETLT);
    504 
    505   // ilt r11, r1, 0
    506   SDValue r11 = DAG.getSelectCC(DL,
    507       r1, DAG.getConstant(0, OVT),
    508       DAG.getConstant(-1, MVT::i32),
    509       DAG.getConstant(0, MVT::i32),
    510       ISD::SETLT);
    511 
    512   // iadd r0, r0, r10
    513   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
    514 
    515   // iadd r1, r1, r11
    516   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
    517 
    518   // ixor r0, r0, r10
    519   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
    520 
    521   // ixor r1, r1, r11
    522   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
    523 
    524   // udiv r0, r0, r1
    525   r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
    526 
    527   // ixor r10, r10, r11
    528   r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
    529 
    530   // iadd r0, r0, r10
    531   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
    532 
    533   // ixor DST, r0, r10
    534   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
    535   return DST;
    536 }
    537 
    538 SDValue
    539 AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
    540   return SDValue(Op.getNode(), 0);
    541 }
    542 
    543 SDValue
    544 AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
    545   SDLoc DL(Op);
    546   EVT OVT = Op.getValueType();
    547   MVT INTTY = MVT::i32;
    548   if (OVT == MVT::v2i8) {
    549     INTTY = MVT::v2i32;
    550   } else if (OVT == MVT::v4i8) {
    551     INTTY = MVT::v4i32;
    552   }
    553   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
    554   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
    555   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
    556   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
    557   return LHS;
    558 }
    559 
    560 SDValue
    561 AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
    562   SDLoc DL(Op);
    563   EVT OVT = Op.getValueType();
    564   MVT INTTY = MVT::i32;
    565   if (OVT == MVT::v2i16) {
    566     INTTY = MVT::v2i32;
    567   } else if (OVT == MVT::v4i16) {
    568     INTTY = MVT::v4i32;
    569   }
    570   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
    571   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
    572   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
    573   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
    574   return LHS;
    575 }
    576 
    577 SDValue
    578 AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
    579   SDLoc DL(Op);
    580   EVT OVT = Op.getValueType();
    581   SDValue LHS = Op.getOperand(0);
    582   SDValue RHS = Op.getOperand(1);
    583   // The LowerSREM32 function generates equivalent to the following IL.
    584   // mov r0, LHS
    585   // mov r1, RHS
    586   // ilt r10, r0, 0
    587   // ilt r11, r1, 0
    588   // iadd r0, r0, r10
    589   // iadd r1, r1, r11
    590   // ixor r0, r0, r10
    591   // ixor r1, r1, r11
    592   // udiv r20, r0, r1
    593   // umul r20, r20, r1
    594   // sub r0, r0, r20
    595   // iadd r0, r0, r10
    596   // ixor DST, r0, r10
    597 
    598   // mov r0, LHS
    599   SDValue r0 = LHS;
    600 
    601   // mov r1, RHS
    602   SDValue r1 = RHS;
    603 
    604   // ilt r10, r0, 0
    605   SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
    606 
    607   // ilt r11, r1, 0
    608   SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
    609 
    610   // iadd r0, r0, r10
    611   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
    612 
    613   // iadd r1, r1, r11
    614   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
    615 
    616   // ixor r0, r0, r10
    617   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
    618 
    619   // ixor r1, r1, r11
    620   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
    621 
    622   // udiv r20, r0, r1
    623   SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
    624 
    625   // umul r20, r20, r1
    626   r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
    627 
    628   // sub r0, r0, r20
    629   r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
    630 
    631   // iadd r0, r0, r10
    632   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
    633 
    634   // ixor DST, r0, r10
    635   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
    636   return DST;
    637 }
    638 
    639 SDValue
    640 AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
    641   return SDValue(Op.getNode(), 0);
    642 }
    643