Home | History | Annotate | Download | only in R600
      1 //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// \brief This is the parent TargetLowering class for hardware code gen
     12 /// targets.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "AMDGPUISelLowering.h"
     17 #include "AMDGPURegisterInfo.h"
     18 #include "AMDILIntrinsicInfo.h"
     19 #include "AMDGPUSubtarget.h"
     20 #include "llvm/CodeGen/CallingConvLower.h"
     21 #include "llvm/CodeGen/MachineFunction.h"
     22 #include "llvm/CodeGen/MachineRegisterInfo.h"
     23 #include "llvm/CodeGen/SelectionDAG.h"
     24 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
     25 
     26 using namespace llvm;
     27 
     28 #include "AMDGPUGenCallingConv.inc"
     29 
     30 AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
     31   TargetLowering(TM, new TargetLoweringObjectFileELF()) {
     32 
     33   // Initialize target lowering borrowed from AMDIL
     34   InitAMDILLowering();
     35 
     36   // We need to custom lower some of the intrinsics
     37   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
     38 
     39   // Library functions.  These default to Expand, but we have instructions
     40   // for them.
     41   setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
     42   setOperationAction(ISD::FEXP2,  MVT::f32, Legal);
     43   setOperationAction(ISD::FPOW,   MVT::f32, Legal);
     44   setOperationAction(ISD::FLOG2,  MVT::f32, Legal);
     45   setOperationAction(ISD::FABS,   MVT::f32, Legal);
     46   setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
     47   setOperationAction(ISD::FRINT,  MVT::f32, Legal);
     48 
     49   // Lower floating point store/load to integer store/load to reduce the number
     50   // of patterns in tablegen.
     51   setOperationAction(ISD::STORE, MVT::f32, Promote);
     52   AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
     53 
     54   setOperationAction(ISD::STORE, MVT::v4f32, Promote);
     55   AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
     56 
     57   setOperationAction(ISD::LOAD, MVT::f32, Promote);
     58   AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
     59 
     60   setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
     61   AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
     62 
     63   setOperationAction(ISD::UDIV, MVT::i32, Expand);
     64   setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
     65   setOperationAction(ISD::UREM, MVT::i32, Expand);
     66 }
     67 
     68 //===---------------------------------------------------------------------===//
     69 // TargetLowering Callbacks
     70 //===---------------------------------------------------------------------===//
     71 
     72 void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
     73                              const SmallVectorImpl<ISD::InputArg> &Ins) const {
     74 
     75   State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
     76 }
     77 
     78 SDValue AMDGPUTargetLowering::LowerReturn(
     79                                      SDValue Chain,
     80                                      CallingConv::ID CallConv,
     81                                      bool isVarArg,
     82                                      const SmallVectorImpl<ISD::OutputArg> &Outs,
     83                                      const SmallVectorImpl<SDValue> &OutVals,
     84                                      DebugLoc DL, SelectionDAG &DAG) const {
     85   return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
     86 }
     87 
     88 //===---------------------------------------------------------------------===//
     89 // Target specific lowering
     90 //===---------------------------------------------------------------------===//
     91 
     92 SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
     93     const {
     94   switch (Op.getOpcode()) {
     95   default:
     96     Op.getNode()->dump();
     97     assert(0 && "Custom lowering code for this"
     98         "instruction is not implemented yet!");
     99     break;
    100   // AMDIL DAG lowering
    101   case ISD::SDIV: return LowerSDIV(Op, DAG);
    102   case ISD::SREM: return LowerSREM(Op, DAG);
    103   case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
    104   case ISD::BRCOND: return LowerBRCOND(Op, DAG);
    105   // AMDGPU DAG lowering
    106   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
    107   case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
    108   }
    109   return Op;
    110 }
    111 
    112 SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
    113     SelectionDAG &DAG) const {
    114   unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
    115   DebugLoc DL = Op.getDebugLoc();
    116   EVT VT = Op.getValueType();
    117 
    118   switch (IntrinsicID) {
    119     default: return Op;
    120     case AMDGPUIntrinsic::AMDIL_abs:
    121       return LowerIntrinsicIABS(Op, DAG);
    122     case AMDGPUIntrinsic::AMDIL_exp:
    123       return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
    124     case AMDGPUIntrinsic::AMDGPU_lrp:
    125       return LowerIntrinsicLRP(Op, DAG);
    126     case AMDGPUIntrinsic::AMDIL_fraction:
    127       return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
    128     case AMDGPUIntrinsic::AMDIL_max:
    129       return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
    130                                                   Op.getOperand(2));
    131     case AMDGPUIntrinsic::AMDGPU_imax:
    132       return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
    133                                                   Op.getOperand(2));
    134     case AMDGPUIntrinsic::AMDGPU_umax:
    135       return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
    136                                                   Op.getOperand(2));
    137     case AMDGPUIntrinsic::AMDIL_min:
    138       return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
    139                                                   Op.getOperand(2));
    140     case AMDGPUIntrinsic::AMDGPU_imin:
    141       return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
    142                                                   Op.getOperand(2));
    143     case AMDGPUIntrinsic::AMDGPU_umin:
    144       return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
    145                                                   Op.getOperand(2));
    146     case AMDGPUIntrinsic::AMDIL_round_nearest:
    147       return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
    148   }
    149 }
    150 
    151 ///IABS(a) = SMAX(sub(0, a), a)
    152 SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
    153     SelectionDAG &DAG) const {
    154 
    155   DebugLoc DL = Op.getDebugLoc();
    156   EVT VT = Op.getValueType();
    157   SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
    158                                               Op.getOperand(1));
    159 
    160   return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
    161 }
    162 
    163 /// Linear Interpolation
    164 /// LRP(a, b, c) = muladd(a,  b, (1 - a) * c)
    165 SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
    166     SelectionDAG &DAG) const {
    167   DebugLoc DL = Op.getDebugLoc();
    168   EVT VT = Op.getValueType();
    169   SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
    170                                 DAG.getConstantFP(1.0f, MVT::f32),
    171                                 Op.getOperand(1));
    172   SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
    173                                                     Op.getOperand(3));
    174   return DAG.getNode(ISD::FADD, DL, VT,
    175       DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
    176       OneSubAC);
    177 }
    178 
    179 /// \brief Generate Min/Max node
    180 SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
    181     SelectionDAG &DAG) const {
    182   DebugLoc DL = Op.getDebugLoc();
    183   EVT VT = Op.getValueType();
    184 
    185   SDValue LHS = Op.getOperand(0);
    186   SDValue RHS = Op.getOperand(1);
    187   SDValue True = Op.getOperand(2);
    188   SDValue False = Op.getOperand(3);
    189   SDValue CC = Op.getOperand(4);
    190 
    191   if (VT != MVT::f32 ||
    192       !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
    193     return SDValue();
    194   }
    195 
    196   ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
    197   switch (CCOpcode) {
    198   case ISD::SETOEQ:
    199   case ISD::SETONE:
    200   case ISD::SETUNE:
    201   case ISD::SETNE:
    202   case ISD::SETUEQ:
    203   case ISD::SETEQ:
    204   case ISD::SETFALSE:
    205   case ISD::SETFALSE2:
    206   case ISD::SETTRUE:
    207   case ISD::SETTRUE2:
    208   case ISD::SETUO:
    209   case ISD::SETO:
    210     assert(0 && "Operation should already be optimised !");
    211   case ISD::SETULE:
    212   case ISD::SETULT:
    213   case ISD::SETOLE:
    214   case ISD::SETOLT:
    215   case ISD::SETLE:
    216   case ISD::SETLT: {
    217     if (LHS == True)
    218       return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
    219     else
    220       return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
    221   }
    222   case ISD::SETGT:
    223   case ISD::SETGE:
    224   case ISD::SETUGE:
    225   case ISD::SETOGE:
    226   case ISD::SETUGT:
    227   case ISD::SETOGT: {
    228     if (LHS == True)
    229       return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
    230     else
    231       return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
    232   }
    233   case ISD::SETCC_INVALID:
    234     assert(0 && "Invalid setcc condcode !");
    235   }
    236   return Op;
    237 }
    238 
    239 
    240 
    241 SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
    242     SelectionDAG &DAG) const {
    243   DebugLoc DL = Op.getDebugLoc();
    244   EVT VT = Op.getValueType();
    245 
    246   SDValue Num = Op.getOperand(0);
    247   SDValue Den = Op.getOperand(1);
    248 
    249   SmallVector<SDValue, 8> Results;
    250 
    251   // RCP =  URECIP(Den) = 2^32 / Den + e
    252   // e is rounding error.
    253   SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
    254 
    255   // RCP_LO = umulo(RCP, Den) */
    256   SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
    257 
    258   // RCP_HI = mulhu (RCP, Den) */
    259   SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
    260 
    261   // NEG_RCP_LO = -RCP_LO
    262   SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
    263                                                      RCP_LO);
    264 
    265   // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
    266   SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
    267                                            NEG_RCP_LO, RCP_LO,
    268                                            ISD::SETEQ);
    269   // Calculate the rounding error from the URECIP instruction
    270   // E = mulhu(ABS_RCP_LO, RCP)
    271   SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
    272 
    273   // RCP_A_E = RCP + E
    274   SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
    275 
    276   // RCP_S_E = RCP - E
    277   SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
    278 
    279   // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
    280   SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
    281                                      RCP_A_E, RCP_S_E,
    282                                      ISD::SETEQ);
    283   // Quotient = mulhu(Tmp0, Num)
    284   SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
    285 
    286   // Num_S_Remainder = Quotient * Den
    287   SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
    288 
    289   // Remainder = Num - Num_S_Remainder
    290   SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
    291 
    292   // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
    293   SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
    294                                                  DAG.getConstant(-1, VT),
    295                                                  DAG.getConstant(0, VT),
    296                                                  ISD::SETGE);
    297   // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
    298   SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
    299                                                   DAG.getConstant(0, VT),
    300                                                   DAG.getConstant(-1, VT),
    301                                                   DAG.getConstant(0, VT),
    302                                                   ISD::SETGE);
    303   // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
    304   SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
    305                                                Remainder_GE_Zero);
    306 
    307   // Calculate Division result:
    308 
    309   // Quotient_A_One = Quotient + 1
    310   SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
    311                                                          DAG.getConstant(1, VT));
    312 
    313   // Quotient_S_One = Quotient - 1
    314   SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
    315                                                          DAG.getConstant(1, VT));
    316 
    317   // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
    318   SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
    319                                      Quotient, Quotient_A_One, ISD::SETEQ);
    320 
    321   // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
    322   Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
    323                             Quotient_S_One, Div, ISD::SETEQ);
    324 
    325   // Calculate Rem result:
    326 
    327   // Remainder_S_Den = Remainder - Den
    328   SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
    329 
    330   // Remainder_A_Den = Remainder + Den
    331   SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
    332 
    333   // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
    334   SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
    335                                     Remainder, Remainder_S_Den, ISD::SETEQ);
    336 
    337   // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
    338   Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
    339                             Remainder_A_Den, Rem, ISD::SETEQ);
    340   SDValue Ops[2];
    341   Ops[0] = Div;
    342   Ops[1] = Rem;
    343   return DAG.getMergeValues(Ops, 2, DL);
    344 }
    345 
    346 //===----------------------------------------------------------------------===//
    347 // Helper functions
    348 //===----------------------------------------------------------------------===//
    349 
    350 bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
    351   if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
    352     return CFP->isExactlyValue(1.0);
    353   }
    354   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
    355     return C->isAllOnesValue();
    356   }
    357   return false;
    358 }
    359 
    360 bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
    361   if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
    362     return CFP->getValueAPF().isZero();
    363   }
    364   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
    365     return C->isNullValue();
    366   }
    367   return false;
    368 }
    369 
    370 SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
    371                                                   const TargetRegisterClass *RC,
    372                                                    unsigned Reg, EVT VT) const {
    373   MachineFunction &MF = DAG.getMachineFunction();
    374   MachineRegisterInfo &MRI = MF.getRegInfo();
    375   unsigned VirtualRegister;
    376   if (!MRI.isLiveIn(Reg)) {
    377     VirtualRegister = MRI.createVirtualRegister(RC);
    378     MRI.addLiveIn(Reg, VirtualRegister);
    379   } else {
    380     VirtualRegister = MRI.getLiveInVirtReg(Reg);
    381   }
    382   return DAG.getRegister(VirtualRegister, VT);
    383 }
    384 
    385 #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
    386 
    387 const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
    388   switch (Opcode) {
    389   default: return 0;
    390   // AMDIL DAG nodes
    391   NODE_NAME_CASE(CALL);
    392   NODE_NAME_CASE(UMUL);
    393   NODE_NAME_CASE(DIV_INF);
    394   NODE_NAME_CASE(RET_FLAG);
    395   NODE_NAME_CASE(BRANCH_COND);
    396 
    397   // AMDGPU DAG nodes
    398   NODE_NAME_CASE(DWORDADDR)
    399   NODE_NAME_CASE(FRACT)
    400   NODE_NAME_CASE(FMAX)
    401   NODE_NAME_CASE(SMAX)
    402   NODE_NAME_CASE(UMAX)
    403   NODE_NAME_CASE(FMIN)
    404   NODE_NAME_CASE(SMIN)
    405   NODE_NAME_CASE(UMIN)
    406   NODE_NAME_CASE(URECIP)
    407   NODE_NAME_CASE(EXPORT)
    408   NODE_NAME_CASE(CONST_ADDRESS)
    409   NODE_NAME_CASE(REGISTER_LOAD)
    410   NODE_NAME_CASE(REGISTER_STORE)
    411   }
    412 }
    413