Home | History | Annotate | Download | only in radeon
      1 //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This is the parent TargetLowering class for hardware code gen targets.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "AMDGPUISelLowering.h"
     15 #include "AMDILIntrinsicInfo.h"
     16 #include "llvm/CodeGen/MachineFunction.h"
     17 #include "llvm/CodeGen/MachineRegisterInfo.h"
     18 #include "llvm/CodeGen/SelectionDAG.h"
     19 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
     20 
     21 using namespace llvm;
     22 
     23 AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
     24   TargetLowering(TM, new TargetLoweringObjectFileELF())
     25 {
     26 
     27   // Initialize target lowering borrowed from AMDIL
     28   InitAMDILLowering();
     29 
     30   // We need to custom lower some of the intrinsics
     31   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
     32 
     33   // Library functions.  These default to Expand, but we have instructions
     34   // for them.
     35   setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
     36   setOperationAction(ISD::FEXP2,  MVT::f32, Legal);
     37   setOperationAction(ISD::FRINT,  MVT::f32, Legal);
     38 
     39   setOperationAction(ISD::UDIV, MVT::i32, Expand);
     40   setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
     41   setOperationAction(ISD::UREM, MVT::i32, Expand);
     42 }
     43 
     44 //===---------------------------------------------------------------------===//
     45 // TargetLowering Callbacks
     46 //===---------------------------------------------------------------------===//
     47 
     48 SDValue AMDGPUTargetLowering::LowerFormalArguments(
     49                                       SDValue Chain,
     50                                       CallingConv::ID CallConv,
     51                                       bool isVarArg,
     52                                       const SmallVectorImpl<ISD::InputArg> &Ins,
     53                                       DebugLoc DL, SelectionDAG &DAG,
     54                                       SmallVectorImpl<SDValue> &InVals) const
     55 {
     56   // Lowering of arguments happens in R600LowerKernelParameters, so we can
     57   // ignore the arguments here.
     58   for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
     59     InVals.push_back(SDValue());
     60   }
     61   return Chain;
     62 }
     63 
     64 SDValue AMDGPUTargetLowering::LowerReturn(
     65                                      SDValue Chain,
     66                                      CallingConv::ID CallConv,
     67                                      bool isVarArg,
     68                                      const SmallVectorImpl<ISD::OutputArg> &Outs,
     69                                      const SmallVectorImpl<SDValue> &OutVals,
     70                                      DebugLoc DL, SelectionDAG &DAG) const
     71 {
     72   return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
     73 }
     74 
     75 //===---------------------------------------------------------------------===//
     76 // Target specific lowering
     77 //===---------------------------------------------------------------------===//
     78 
     79 SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
     80     const
     81 {
     82   switch (Op.getOpcode()) {
     83   default:
     84     Op.getNode()->dump();
     85     assert(0 && "Custom lowering code for this"
     86         "instruction is not implemented yet!");
     87     break;
     88   // AMDIL DAG lowering
     89   case ISD::SDIV: return LowerSDIV(Op, DAG);
     90   case ISD::SREM: return LowerSREM(Op, DAG);
     91   case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
     92   case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
     93   case ISD::BRCOND: return LowerBRCOND(Op, DAG);
     94   // AMDGPU DAG lowering
     95   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
     96   case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
     97   }
     98   return Op;
     99 }
    100 
    101 SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
    102     SelectionDAG &DAG) const
    103 {
    104   unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
    105   DebugLoc DL = Op.getDebugLoc();
    106   EVT VT = Op.getValueType();
    107 
    108   switch (IntrinsicID) {
    109     default: return Op;
    110     case AMDGPUIntrinsic::AMDIL_abs:
    111       return LowerIntrinsicIABS(Op, DAG);
    112     case AMDGPUIntrinsic::AMDIL_exp:
    113       return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
    114     case AMDGPUIntrinsic::AMDIL_fabs:
    115       return DAG.getNode(ISD::FABS, DL, VT, Op.getOperand(1));
    116     case AMDGPUIntrinsic::AMDGPU_lrp:
    117       return LowerIntrinsicLRP(Op, DAG);
    118     case AMDGPUIntrinsic::AMDIL_fraction:
    119       return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
    120     case AMDGPUIntrinsic::AMDIL_mad:
    121       return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
    122                               Op.getOperand(2), Op.getOperand(3));
    123     case AMDGPUIntrinsic::AMDIL_max:
    124       return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
    125                                                   Op.getOperand(2));
    126     case AMDGPUIntrinsic::AMDGPU_imax:
    127       return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
    128                                                   Op.getOperand(2));
    129     case AMDGPUIntrinsic::AMDGPU_umax:
    130       return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
    131                                                   Op.getOperand(2));
    132     case AMDGPUIntrinsic::AMDIL_min:
    133       return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
    134                                                   Op.getOperand(2));
    135     case AMDGPUIntrinsic::AMDGPU_imin:
    136       return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
    137                                                   Op.getOperand(2));
    138     case AMDGPUIntrinsic::AMDGPU_umin:
    139       return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
    140                                                   Op.getOperand(2));
    141     case AMDGPUIntrinsic::AMDIL_round_nearest:
    142       return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
    143     case AMDGPUIntrinsic::AMDIL_round_posinf:
    144       return DAG.getNode(ISD::FCEIL, DL, VT, Op.getOperand(1));
    145   }
    146 }
    147 
    148 ///IABS(a) = SMAX(sub(0, a), a)
    149 SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
    150     SelectionDAG &DAG) const
    151 {
    152 
    153   DebugLoc DL = Op.getDebugLoc();
    154   EVT VT = Op.getValueType();
    155   SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
    156                                               Op.getOperand(1));
    157 
    158   return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
    159 }
    160 
    161 /// Linear Interpolation
    162 /// LRP(a, b, c) = muladd(a,  b, (1 - a) * c)
    163 SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
    164     SelectionDAG &DAG) const
    165 {
    166   DebugLoc DL = Op.getDebugLoc();
    167   EVT VT = Op.getValueType();
    168   SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
    169                                 DAG.getConstantFP(1.0f, MVT::f32),
    170                                 Op.getOperand(1));
    171   SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
    172                                                     Op.getOperand(3));
    173   return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
    174                                                Op.getOperand(2),
    175                                                OneSubAC);
    176 }
    177 
    178 
    179 
    180 SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
    181     SelectionDAG &DAG) const
    182 {
    183   DebugLoc DL = Op.getDebugLoc();
    184   EVT VT = Op.getValueType();
    185 
    186   SDValue Num = Op.getOperand(0);
    187   SDValue Den = Op.getOperand(1);
    188 
    189   SmallVector<SDValue, 8> Results;
    190 
    191   // RCP =  URECIP(Den) = 2^32 / Den + e
    192   // e is rounding error.
    193   SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
    194 
    195   // RCP_LO = umulo(RCP, Den) */
    196   SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
    197 
    198   // RCP_HI = mulhu (RCP, Den) */
    199   SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
    200 
    201   // NEG_RCP_LO = -RCP_LO
    202   SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
    203                                                      RCP_LO);
    204 
    205   // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
    206   SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
    207                                            NEG_RCP_LO, RCP_LO,
    208                                            ISD::SETEQ);
    209   // Calculate the rounding error from the URECIP instruction
    210   // E = mulhu(ABS_RCP_LO, RCP)
    211   SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
    212 
    213   // RCP_A_E = RCP + E
    214   SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
    215 
    216   // RCP_S_E = RCP - E
    217   SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
    218 
    219   // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
    220   SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
    221                                      RCP_A_E, RCP_S_E,
    222                                      ISD::SETEQ);
    223   // Quotient = mulhu(Tmp0, Num)
    224   SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
    225 
    226   // Num_S_Remainder = Quotient * Den
    227   SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
    228 
    229   // Remainder = Num - Num_S_Remainder
    230   SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
    231 
    232   // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
    233   SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
    234                                                  DAG.getConstant(-1, VT),
    235                                                  DAG.getConstant(0, VT),
    236                                                  ISD::SETGE);
    237   // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
    238   SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
    239                                                   DAG.getConstant(0, VT),
    240                                                   DAG.getConstant(-1, VT),
    241                                                   DAG.getConstant(0, VT),
    242                                                   ISD::SETGE);
    243   // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
    244   SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
    245                                                Remainder_GE_Zero);
    246 
    247   // Calculate Division result:
    248 
    249   // Quotient_A_One = Quotient + 1
    250   SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
    251                                                          DAG.getConstant(1, VT));
    252 
    253   // Quotient_S_One = Quotient - 1
    254   SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
    255                                                          DAG.getConstant(1, VT));
    256 
    257   // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
    258   SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
    259                                      Quotient, Quotient_A_One, ISD::SETEQ);
    260 
    261   // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
    262   Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
    263                             Quotient_S_One, Div, ISD::SETEQ);
    264 
    265   // Calculate Rem result:
    266 
    267   // Remainder_S_Den = Remainder - Den
    268   SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
    269 
    270   // Remainder_A_Den = Remainder + Den
    271   SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
    272 
    273   // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
    274   SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
    275                                     Remainder, Remainder_S_Den, ISD::SETEQ);
    276 
    277   // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
    278   Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
    279                             Remainder_A_Den, Rem, ISD::SETEQ);
    280 
    281   DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Div);
    282   DAG.ReplaceAllUsesWith(Op.getValue(1).getNode(), &Rem);
    283 
    284   return Op;
    285 }
    286 
    287 //===----------------------------------------------------------------------===//
    288 // Helper functions
    289 //===----------------------------------------------------------------------===//
    290 
    291 bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const
    292 {
    293   if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
    294     return CFP->isExactlyValue(1.0);
    295   }
    296   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
    297     return C->isAllOnesValue();
    298   }
    299   return false;
    300 }
    301 
    302 bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const
    303 {
    304   if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
    305     return CFP->getValueAPF().isZero();
    306   }
    307   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
    308     return C->isNullValue();
    309   }
    310   return false;
    311 }
    312 
    313 SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
    314                                                   const TargetRegisterClass *RC,
    315                                                    unsigned Reg, EVT VT) const {
    316   MachineFunction &MF = DAG.getMachineFunction();
    317   MachineRegisterInfo &MRI = MF.getRegInfo();
    318   unsigned VirtualRegister;
    319   if (!MRI.isLiveIn(Reg)) {
    320     VirtualRegister = MRI.createVirtualRegister(RC);
    321     MRI.addLiveIn(Reg, VirtualRegister);
    322   } else {
    323     VirtualRegister = MRI.getLiveInVirtReg(Reg);
    324   }
    325   return DAG.getRegister(VirtualRegister, VT);
    326 }
    327 
    328 #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
    329 
    330 const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
    331 {
    332   switch (Opcode) {
    333   default: return 0;
    334   // AMDIL DAG nodes
    335   NODE_NAME_CASE(MAD);
    336   NODE_NAME_CASE(CALL);
    337   NODE_NAME_CASE(UMUL);
    338   NODE_NAME_CASE(DIV_INF);
    339   NODE_NAME_CASE(VBUILD);
    340   NODE_NAME_CASE(RET_FLAG);
    341   NODE_NAME_CASE(BRANCH_COND);
    342 
    343   // AMDGPU DAG nodes
    344   NODE_NAME_CASE(FRACT)
    345   NODE_NAME_CASE(FMAX)
    346   NODE_NAME_CASE(SMAX)
    347   NODE_NAME_CASE(UMAX)
    348   NODE_NAME_CASE(FMIN)
    349   NODE_NAME_CASE(SMIN)
    350   NODE_NAME_CASE(UMIN)
    351   NODE_NAME_CASE(URECIP)
    352   }
    353 }
    354