Home | History | Annotate | Download | only in R600
      1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //==-----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// \brief TargetLowering functions borrowed from AMDIL.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "AMDGPUISelLowering.h"
     16 #include "AMDGPURegisterInfo.h"
     17 #include "AMDGPUSubtarget.h"
     18 #include "AMDILDevices.h"
     19 #include "AMDILIntrinsicInfo.h"
     20 #include "llvm/CodeGen/MachineFrameInfo.h"
     21 #include "llvm/CodeGen/MachineRegisterInfo.h"
     22 #include "llvm/CodeGen/PseudoSourceValue.h"
     23 #include "llvm/CodeGen/SelectionDAG.h"
     24 #include "llvm/CodeGen/SelectionDAGNodes.h"
     25 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
     26 #include "llvm/IR/CallingConv.h"
     27 #include "llvm/IR/DerivedTypes.h"
     28 #include "llvm/IR/Instructions.h"
     29 #include "llvm/IR/Intrinsics.h"
     30 #include "llvm/Support/raw_ostream.h"
     31 #include "llvm/Target/TargetInstrInfo.h"
     32 #include "llvm/Target/TargetOptions.h"
     33 
     34 using namespace llvm;
     35 //===----------------------------------------------------------------------===//
     36 // TargetLowering Implementation Help Functions End
     37 //===----------------------------------------------------------------------===//
     38 
     39 //===----------------------------------------------------------------------===//
     40 // TargetLowering Class Implementation Begins
     41 //===----------------------------------------------------------------------===//
     42 void AMDGPUTargetLowering::InitAMDILLowering() {
     43   int types[] = {
     44     (int)MVT::i8,
     45     (int)MVT::i16,
     46     (int)MVT::i32,
     47     (int)MVT::f32,
     48     (int)MVT::f64,
     49     (int)MVT::i64,
     50     (int)MVT::v2i8,
     51     (int)MVT::v4i8,
     52     (int)MVT::v2i16,
     53     (int)MVT::v4i16,
     54     (int)MVT::v4f32,
     55     (int)MVT::v4i32,
     56     (int)MVT::v2f32,
     57     (int)MVT::v2i32,
     58     (int)MVT::v2f64,
     59     (int)MVT::v2i64
     60   };
     61 
     62   int IntTypes[] = {
     63     (int)MVT::i8,
     64     (int)MVT::i16,
     65     (int)MVT::i32,
     66     (int)MVT::i64
     67   };
     68 
     69   int FloatTypes[] = {
     70     (int)MVT::f32,
     71     (int)MVT::f64
     72   };
     73 
     74   int VectorTypes[] = {
     75     (int)MVT::v2i8,
     76     (int)MVT::v4i8,
     77     (int)MVT::v2i16,
     78     (int)MVT::v4i16,
     79     (int)MVT::v4f32,
     80     (int)MVT::v4i32,
     81     (int)MVT::v2f32,
     82     (int)MVT::v2i32,
     83     (int)MVT::v2f64,
     84     (int)MVT::v2i64
     85   };
     86   size_t NumTypes = sizeof(types) / sizeof(*types);
     87   size_t NumFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
     88   size_t NumIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
     89   size_t NumVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
     90 
     91   const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
     92   // These are the current register classes that are
     93   // supported
     94 
     95   for (unsigned int x  = 0; x < NumTypes; ++x) {
     96     MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
     97 
     98     //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
     99     // We cannot sextinreg, expand to shifts
    100     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
    101     setOperationAction(ISD::SUBE, VT, Expand);
    102     setOperationAction(ISD::SUBC, VT, Expand);
    103     setOperationAction(ISD::ADDE, VT, Expand);
    104     setOperationAction(ISD::ADDC, VT, Expand);
    105     setOperationAction(ISD::BRCOND, VT, Custom);
    106     setOperationAction(ISD::BR_JT, VT, Expand);
    107     setOperationAction(ISD::BRIND, VT, Expand);
    108     // TODO: Implement custom UREM/SREM routines
    109     setOperationAction(ISD::SREM, VT, Expand);
    110     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
    111     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
    112     if (VT != MVT::i64 && VT != MVT::v2i64) {
    113       setOperationAction(ISD::SDIV, VT, Custom);
    114     }
    115   }
    116   for (unsigned int x = 0; x < NumFloatTypes; ++x) {
    117     MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
    118 
    119     // IL does not have these operations for floating point types
    120     setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
    121     setOperationAction(ISD::SETOLT, VT, Expand);
    122     setOperationAction(ISD::SETOGE, VT, Expand);
    123     setOperationAction(ISD::SETOGT, VT, Expand);
    124     setOperationAction(ISD::SETOLE, VT, Expand);
    125     setOperationAction(ISD::SETULT, VT, Expand);
    126     setOperationAction(ISD::SETUGE, VT, Expand);
    127     setOperationAction(ISD::SETUGT, VT, Expand);
    128     setOperationAction(ISD::SETULE, VT, Expand);
    129   }
    130 
    131   for (unsigned int x = 0; x < NumIntTypes; ++x) {
    132     MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
    133 
    134     // GPU also does not have divrem function for signed or unsigned
    135     setOperationAction(ISD::SDIVREM, VT, Expand);
    136 
    137     // GPU does not have [S|U]MUL_LOHI functions as a single instruction
    138     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
    139     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
    140 
    141     // GPU doesn't have a rotl, rotr, or byteswap instruction
    142     setOperationAction(ISD::ROTR, VT, Expand);
    143     setOperationAction(ISD::BSWAP, VT, Expand);
    144 
    145     // GPU doesn't have any counting operators
    146     setOperationAction(ISD::CTPOP, VT, Expand);
    147     setOperationAction(ISD::CTTZ, VT, Expand);
    148     setOperationAction(ISD::CTLZ, VT, Expand);
    149   }
    150 
    151   for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) {
    152     MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
    153 
    154     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
    155     setOperationAction(ISD::SDIVREM, VT, Expand);
    156     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
    157     // setOperationAction(ISD::VSETCC, VT, Expand);
    158     setOperationAction(ISD::SELECT_CC, VT, Expand);
    159 
    160   }
    161   if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
    162     setOperationAction(ISD::MULHU, MVT::i64, Expand);
    163     setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
    164     setOperationAction(ISD::MULHS, MVT::i64, Expand);
    165     setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
    166     setOperationAction(ISD::ADD, MVT::v2i64, Expand);
    167     setOperationAction(ISD::SREM, MVT::v2i64, Expand);
    168     setOperationAction(ISD::Constant          , MVT::i64  , Legal);
    169     setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
    170     setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
    171     setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
    172     setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
    173     setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
    174   }
    175   if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
    176     // we support loading/storing v2f64 but not operations on the type
    177     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
    178     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
    179     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
    180     setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
    181     setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
    182     setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
    183     // We want to expand vector conversions into their scalar
    184     // counterparts.
    185     setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
    186     setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
    187     setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
    188     setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
    189     setOperationAction(ISD::FABS, MVT::f64, Expand);
    190     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
    191   }
    192   // TODO: Fix the UDIV24 algorithm so it works for these
    193   // types correctly. This needs vector comparisons
    194   // for this to work correctly.
    195   setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
    196   setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
    197   setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
    198   setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
    199   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
    200   setOperationAction(ISD::SUBC, MVT::Other, Expand);
    201   setOperationAction(ISD::ADDE, MVT::Other, Expand);
    202   setOperationAction(ISD::ADDC, MVT::Other, Expand);
    203   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
    204   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
    205   setOperationAction(ISD::BRIND, MVT::Other, Expand);
    206   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
    207 
    208 
    209   // Use the default implementation.
    210   setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
    211   setOperationAction(ISD::Constant          , MVT::i32    , Legal);
    212 
    213   setSchedulingPreference(Sched::RegPressure);
    214   setPow2DivIsCheap(false);
    215   setSelectIsExpensive(true);
    216   setJumpIsExpensive(true);
    217 
    218   MaxStoresPerMemcpy  = 4096;
    219   MaxStoresPerMemmove = 4096;
    220   MaxStoresPerMemset  = 4096;
    221 
    222 }
    223 
    224 bool
    225 AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
    226     const CallInst &I, unsigned Intrinsic) const {
    227   return false;
    228 }
    229 
    230 // The backend supports 32 and 64 bit floating point immediates
    231 bool
    232 AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
    233   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
    234       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
    235     return true;
    236   } else {
    237     return false;
    238   }
    239 }
    240 
    241 bool
    242 AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
    243   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
    244       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
    245     return false;
    246   } else {
    247     return true;
    248   }
    249 }
    250 
    251 
    252 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
    253 // be zero. Op is expected to be a target specific node. Used by DAG
    254 // combiner.
    255 
    256 void
    257 AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
    258     const SDValue Op,
    259     APInt &KnownZero,
    260     APInt &KnownOne,
    261     const SelectionDAG &DAG,
    262     unsigned Depth) const {
    263   APInt KnownZero2;
    264   APInt KnownOne2;
    265   KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
    266   switch (Op.getOpcode()) {
    267     default: break;
    268     case ISD::SELECT_CC:
    269              DAG.ComputeMaskedBits(
    270                  Op.getOperand(1),
    271                  KnownZero,
    272                  KnownOne,
    273                  Depth + 1
    274                  );
    275              DAG.ComputeMaskedBits(
    276                  Op.getOperand(0),
    277                  KnownZero2,
    278                  KnownOne2
    279                  );
    280              assert((KnownZero & KnownOne) == 0
    281                  && "Bits known to be one AND zero?");
    282              assert((KnownZero2 & KnownOne2) == 0
    283                  && "Bits known to be one AND zero?");
    284              // Only known if known in both the LHS and RHS
    285              KnownOne &= KnownOne2;
    286              KnownZero &= KnownZero2;
    287              break;
    288   };
    289 }
    290 
    291 //===----------------------------------------------------------------------===//
    292 //                           Other Lowering Hooks
    293 //===----------------------------------------------------------------------===//
    294 
    295 SDValue
    296 AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
    297   EVT OVT = Op.getValueType();
    298   SDValue DST;
    299   if (OVT.getScalarType() == MVT::i64) {
    300     DST = LowerSDIV64(Op, DAG);
    301   } else if (OVT.getScalarType() == MVT::i32) {
    302     DST = LowerSDIV32(Op, DAG);
    303   } else if (OVT.getScalarType() == MVT::i16
    304       || OVT.getScalarType() == MVT::i8) {
    305     DST = LowerSDIV24(Op, DAG);
    306   } else {
    307     DST = SDValue(Op.getNode(), 0);
    308   }
    309   return DST;
    310 }
    311 
    312 SDValue
    313 AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
    314   EVT OVT = Op.getValueType();
    315   SDValue DST;
    316   if (OVT.getScalarType() == MVT::i64) {
    317     DST = LowerSREM64(Op, DAG);
    318   } else if (OVT.getScalarType() == MVT::i32) {
    319     DST = LowerSREM32(Op, DAG);
    320   } else if (OVT.getScalarType() == MVT::i16) {
    321     DST = LowerSREM16(Op, DAG);
    322   } else if (OVT.getScalarType() == MVT::i8) {
    323     DST = LowerSREM8(Op, DAG);
    324   } else {
    325     DST = SDValue(Op.getNode(), 0);
    326   }
    327   return DST;
    328 }
    329 
    330 SDValue
    331 AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const {
    332   SDValue Data = Op.getOperand(0);
    333   VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
    334   DebugLoc DL = Op.getDebugLoc();
    335   EVT DVT = Data.getValueType();
    336   EVT BVT = BaseType->getVT();
    337   unsigned baseBits = BVT.getScalarType().getSizeInBits();
    338   unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
    339   unsigned shiftBits = srcBits - baseBits;
    340   if (srcBits < 32) {
    341     // If the op is less than 32 bits, then it needs to extend to 32bits
    342     // so it can properly keep the upper bits valid.
    343     EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
    344     Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
    345     shiftBits = 32 - baseBits;
    346     DVT = IVT;
    347   }
    348   SDValue Shift = DAG.getConstant(shiftBits, DVT);
    349   // Shift left by 'Shift' bits.
    350   Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
    351   // Signed shift Right by 'Shift' bits.
    352   Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
    353   if (srcBits < 32) {
    354     // Once the sign extension is done, the op needs to be converted to
    355     // its original type.
    356     Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
    357   }
    358   return Data;
    359 }
    360 EVT
    361 AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
    362   int iSize = (size * numEle);
    363   int vEle = (iSize >> ((size == 64) ? 6 : 5));
    364   if (!vEle) {
    365     vEle = 1;
    366   }
    367   if (size == 64) {
    368     if (vEle == 1) {
    369       return EVT(MVT::i64);
    370     } else {
    371       return EVT(MVT::getVectorVT(MVT::i64, vEle));
    372     }
    373   } else {
    374     if (vEle == 1) {
    375       return EVT(MVT::i32);
    376     } else {
    377       return EVT(MVT::getVectorVT(MVT::i32, vEle));
    378     }
    379   }
    380 }
    381 
    382 SDValue
    383 AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
    384   SDValue Chain = Op.getOperand(0);
    385   SDValue Cond  = Op.getOperand(1);
    386   SDValue Jump  = Op.getOperand(2);
    387   SDValue Result;
    388   Result = DAG.getNode(
    389       AMDGPUISD::BRANCH_COND,
    390       Op.getDebugLoc(),
    391       Op.getValueType(),
    392       Chain, Jump, Cond);
    393   return Result;
    394 }
    395 
    396 SDValue
    397 AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
    398   DebugLoc DL = Op.getDebugLoc();
    399   EVT OVT = Op.getValueType();
    400   SDValue LHS = Op.getOperand(0);
    401   SDValue RHS = Op.getOperand(1);
    402   MVT INTTY;
    403   MVT FLTTY;
    404   if (!OVT.isVector()) {
    405     INTTY = MVT::i32;
    406     FLTTY = MVT::f32;
    407   } else if (OVT.getVectorNumElements() == 2) {
    408     INTTY = MVT::v2i32;
    409     FLTTY = MVT::v2f32;
    410   } else if (OVT.getVectorNumElements() == 4) {
    411     INTTY = MVT::v4i32;
    412     FLTTY = MVT::v4f32;
    413   }
    414   unsigned bitsize = OVT.getScalarType().getSizeInBits();
    415   // char|short jq = ia ^ ib;
    416   SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
    417 
    418   // jq = jq >> (bitsize - 2)
    419   jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
    420 
    421   // jq = jq | 0x1
    422   jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
    423 
    424   // jq = (int)jq
    425   jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
    426 
    427   // int ia = (int)LHS;
    428   SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
    429 
    430   // int ib, (int)RHS;
    431   SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
    432 
    433   // float fa = (float)ia;
    434   SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
    435 
    436   // float fb = (float)ib;
    437   SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
    438 
    439   // float fq = native_divide(fa, fb);
    440   SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
    441 
    442   // fq = trunc(fq);
    443   fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
    444 
    445   // float fqneg = -fq;
    446   SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
    447 
    448   // float fr = mad(fqneg, fb, fa);
    449   SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
    450       DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
    451 
    452   // int iq = (int)fq;
    453   SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
    454 
    455   // fr = fabs(fr);
    456   fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
    457 
    458   // fb = fabs(fb);
    459   fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
    460 
    461   // int cv = fr >= fb;
    462   SDValue cv;
    463   if (INTTY == MVT::i32) {
    464     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
    465   } else {
    466     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
    467   }
    468   // jq = (cv ? jq : 0);
    469   jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
    470       DAG.getConstant(0, OVT));
    471   // dst = iq + jq;
    472   iq = DAG.getSExtOrTrunc(iq, DL, OVT);
    473   iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
    474   return iq;
    475 }
    476 
    477 SDValue
    478 AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
    479   DebugLoc DL = Op.getDebugLoc();
    480   EVT OVT = Op.getValueType();
    481   SDValue LHS = Op.getOperand(0);
    482   SDValue RHS = Op.getOperand(1);
    483   // The LowerSDIV32 function generates equivalent to the following IL.
    484   // mov r0, LHS
    485   // mov r1, RHS
    486   // ilt r10, r0, 0
    487   // ilt r11, r1, 0
    488   // iadd r0, r0, r10
    489   // iadd r1, r1, r11
    490   // ixor r0, r0, r10
    491   // ixor r1, r1, r11
    492   // udiv r0, r0, r1
    493   // ixor r10, r10, r11
    494   // iadd r0, r0, r10
    495   // ixor DST, r0, r10
    496 
    497   // mov r0, LHS
    498   SDValue r0 = LHS;
    499 
    500   // mov r1, RHS
    501   SDValue r1 = RHS;
    502 
    503   // ilt r10, r0, 0
    504   SDValue r10 = DAG.getSelectCC(DL,
    505       r0, DAG.getConstant(0, OVT),
    506       DAG.getConstant(-1, MVT::i32),
    507       DAG.getConstant(0, MVT::i32),
    508       ISD::SETLT);
    509 
    510   // ilt r11, r1, 0
    511   SDValue r11 = DAG.getSelectCC(DL,
    512       r1, DAG.getConstant(0, OVT),
    513       DAG.getConstant(-1, MVT::i32),
    514       DAG.getConstant(0, MVT::i32),
    515       ISD::SETLT);
    516 
    517   // iadd r0, r0, r10
    518   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
    519 
    520   // iadd r1, r1, r11
    521   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
    522 
    523   // ixor r0, r0, r10
    524   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
    525 
    526   // ixor r1, r1, r11
    527   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
    528 
    529   // udiv r0, r0, r1
    530   r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
    531 
    532   // ixor r10, r10, r11
    533   r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
    534 
    535   // iadd r0, r0, r10
    536   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
    537 
    538   // ixor DST, r0, r10
    539   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
    540   return DST;
    541 }
    542 
    543 SDValue
    544 AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
    545   return SDValue(Op.getNode(), 0);
    546 }
    547 
    548 SDValue
    549 AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
    550   DebugLoc DL = Op.getDebugLoc();
    551   EVT OVT = Op.getValueType();
    552   MVT INTTY = MVT::i32;
    553   if (OVT == MVT::v2i8) {
    554     INTTY = MVT::v2i32;
    555   } else if (OVT == MVT::v4i8) {
    556     INTTY = MVT::v4i32;
    557   }
    558   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
    559   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
    560   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
    561   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
    562   return LHS;
    563 }
    564 
    565 SDValue
    566 AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
    567   DebugLoc DL = Op.getDebugLoc();
    568   EVT OVT = Op.getValueType();
    569   MVT INTTY = MVT::i32;
    570   if (OVT == MVT::v2i16) {
    571     INTTY = MVT::v2i32;
    572   } else if (OVT == MVT::v4i16) {
    573     INTTY = MVT::v4i32;
    574   }
    575   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
    576   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
    577   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
    578   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
    579   return LHS;
    580 }
    581 
    582 SDValue
    583 AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
    584   DebugLoc DL = Op.getDebugLoc();
    585   EVT OVT = Op.getValueType();
    586   SDValue LHS = Op.getOperand(0);
    587   SDValue RHS = Op.getOperand(1);
    588   // The LowerSREM32 function generates equivalent to the following IL.
    589   // mov r0, LHS
    590   // mov r1, RHS
    591   // ilt r10, r0, 0
    592   // ilt r11, r1, 0
    593   // iadd r0, r0, r10
    594   // iadd r1, r1, r11
    595   // ixor r0, r0, r10
    596   // ixor r1, r1, r11
    597   // udiv r20, r0, r1
    598   // umul r20, r20, r1
    599   // sub r0, r0, r20
    600   // iadd r0, r0, r10
    601   // ixor DST, r0, r10
    602 
    603   // mov r0, LHS
    604   SDValue r0 = LHS;
    605 
    606   // mov r1, RHS
    607   SDValue r1 = RHS;
    608 
    609   // ilt r10, r0, 0
    610   SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
    611 
    612   // ilt r11, r1, 0
    613   SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
    614 
    615   // iadd r0, r0, r10
    616   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
    617 
    618   // iadd r1, r1, r11
    619   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
    620 
    621   // ixor r0, r0, r10
    622   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
    623 
    624   // ixor r1, r1, r11
    625   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
    626 
    627   // udiv r20, r0, r1
    628   SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
    629 
    630   // umul r20, r20, r1
    631   r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
    632 
    633   // sub r0, r0, r20
    634   r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
    635 
    636   // iadd r0, r0, r10
    637   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
    638 
    639   // ixor DST, r0, r10
    640   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
    641   return DST;
    642 }
    643 
    644 SDValue
    645 AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
    646   return SDValue(Op.getNode(), 0);
    647 }
    648