Home | History | Annotate | Download | only in R600
      1 //===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //==-----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// \brief Defines an instruction selector for the AMDGPU target.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 #include "AMDGPUInstrInfo.h"
     15 #include "AMDGPUISelLowering.h" // For AMDGPUISD
     16 #include "AMDGPURegisterInfo.h"
     17 #include "AMDGPUSubtarget.h"
     18 #include "R600InstrInfo.h"
     19 #include "SIISelLowering.h"
     20 #include "llvm/CodeGen/FunctionLoweringInfo.h"
     21 #include "llvm/CodeGen/PseudoSourceValue.h"
     22 #include "llvm/CodeGen/SelectionDAG.h"
     23 #include "llvm/CodeGen/SelectionDAGISel.h"
     24 #include "llvm/IR/Function.h"
     25 
     26 using namespace llvm;
     27 
     28 //===----------------------------------------------------------------------===//
     29 // Instruction Selector Implementation
     30 //===----------------------------------------------------------------------===//
     31 
     32 namespace {
     33 /// AMDGPU specific code to select AMDGPU machine instructions for
     34 /// SelectionDAG operations.
     35 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
     36   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
     37   // make the right decision when generating code for different targets.
     38   const AMDGPUSubtarget &Subtarget;
     39 public:
     40   AMDGPUDAGToDAGISel(TargetMachine &TM);
     41   virtual ~AMDGPUDAGToDAGISel();
     42 
     43   SDNode *Select(SDNode *N) override;
     44   const char *getPassName() const override;
     45   void PostprocessISelDAG() override;
     46 
     47 private:
     48   bool isInlineImmediate(SDNode *N) const;
     49   inline SDValue getSmallIPtrImm(unsigned Imm);
     50   bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
     51                    const R600InstrInfo *TII);
     52   bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
     53   bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
     54 
     55   // Complex pattern selectors
     56   bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
     57   bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
     58   bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
     59 
     60   static bool checkType(const Value *ptr, unsigned int addrspace);
     61   static bool checkPrivateAddress(const MachineMemOperand *Op);
     62 
     63   static bool isGlobalStore(const StoreSDNode *N);
     64   static bool isPrivateStore(const StoreSDNode *N);
     65   static bool isLocalStore(const StoreSDNode *N);
     66   static bool isRegionStore(const StoreSDNode *N);
     67 
     68   bool isCPLoad(const LoadSDNode *N) const;
     69   bool isConstantLoad(const LoadSDNode *N, int cbID) const;
     70   bool isGlobalLoad(const LoadSDNode *N) const;
     71   bool isParamLoad(const LoadSDNode *N) const;
     72   bool isPrivateLoad(const LoadSDNode *N) const;
     73   bool isLocalLoad(const LoadSDNode *N) const;
     74   bool isRegionLoad(const LoadSDNode *N) const;
     75 
     76   /// \returns True if the current basic block being selected is at control
     77   ///          flow depth 0.  Meaning that the current block dominates the
     78   //           exit block.
     79   bool isCFDepth0() const;
     80 
     81   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
     82   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
     83   bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
     84                                        SDValue& Offset);
     85   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
     86   bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
     87   bool SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr, SDValue &Offset,
     88                         SDValue &ImmOffset) const;
     89 
     90   SDNode *SelectADD_SUB_I64(SDNode *N);
     91   SDNode *SelectDIV_SCALE(SDNode *N);
     92 
     93   // Include the pieces autogenerated from the target description.
     94 #include "AMDGPUGenDAGISel.inc"
     95 };
     96 }  // end anonymous namespace
     97 
     98 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
     99 // DAG, ready for instruction scheduling.
    100 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
    101   return new AMDGPUDAGToDAGISel(TM);
    102 }
    103 
    104 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
    105   : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>()) {
    106 }
    107 
    108 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
    109 }
    110 
    111 bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
    112   const SITargetLowering *TL
    113       = static_cast<const SITargetLowering *>(getTargetLowering());
    114   return TL->analyzeImmediate(N) == 0;
    115 }
    116 
    117 /// \brief Determine the register class for \p OpNo
    118 /// \returns The register class of the virtual register that will be used for
    119 /// the given operand number \OpNo or NULL if the register class cannot be
    120 /// determined.
    121 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
    122                                                           unsigned OpNo) const {
    123   if (!N->isMachineOpcode())
    124     return nullptr;
    125 
    126   switch (N->getMachineOpcode()) {
    127   default: {
    128     const MCInstrDesc &Desc = TM.getInstrInfo()->get(N->getMachineOpcode());
    129     unsigned OpIdx = Desc.getNumDefs() + OpNo;
    130     if (OpIdx >= Desc.getNumOperands())
    131       return nullptr;
    132     int RegClass = Desc.OpInfo[OpIdx].RegClass;
    133     if (RegClass == -1)
    134       return nullptr;
    135 
    136     return TM.getRegisterInfo()->getRegClass(RegClass);
    137   }
    138   case AMDGPU::REG_SEQUENCE: {
    139     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
    140     const TargetRegisterClass *SuperRC = TM.getRegisterInfo()->getRegClass(RCID);
    141 
    142     SDValue SubRegOp = N->getOperand(OpNo + 1);
    143     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
    144     return TM.getRegisterInfo()->getSubClassWithSubReg(SuperRC, SubRegIdx);
    145   }
    146   }
    147 }
    148 
    149 SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
    150   return CurDAG->getTargetConstant(Imm, MVT::i32);
    151 }
    152 
    153 bool AMDGPUDAGToDAGISel::SelectADDRParam(
    154   SDValue Addr, SDValue& R1, SDValue& R2) {
    155 
    156   if (Addr.getOpcode() == ISD::FrameIndex) {
    157     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
    158       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
    159       R2 = CurDAG->getTargetConstant(0, MVT::i32);
    160     } else {
    161       R1 = Addr;
    162       R2 = CurDAG->getTargetConstant(0, MVT::i32);
    163     }
    164   } else if (Addr.getOpcode() == ISD::ADD) {
    165     R1 = Addr.getOperand(0);
    166     R2 = Addr.getOperand(1);
    167   } else {
    168     R1 = Addr;
    169     R2 = CurDAG->getTargetConstant(0, MVT::i32);
    170   }
    171   return true;
    172 }
    173 
    174 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
    175   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
    176       Addr.getOpcode() == ISD::TargetGlobalAddress) {
    177     return false;
    178   }
    179   return SelectADDRParam(Addr, R1, R2);
    180 }
    181 
    182 
    183 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
    184   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
    185       Addr.getOpcode() == ISD::TargetGlobalAddress) {
    186     return false;
    187   }
    188 
    189   if (Addr.getOpcode() == ISD::FrameIndex) {
    190     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
    191       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
    192       R2 = CurDAG->getTargetConstant(0, MVT::i64);
    193     } else {
    194       R1 = Addr;
    195       R2 = CurDAG->getTargetConstant(0, MVT::i64);
    196     }
    197   } else if (Addr.getOpcode() == ISD::ADD) {
    198     R1 = Addr.getOperand(0);
    199     R2 = Addr.getOperand(1);
    200   } else {
    201     R1 = Addr;
    202     R2 = CurDAG->getTargetConstant(0, MVT::i64);
    203   }
    204   return true;
    205 }
    206 
    207 SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
    208   unsigned int Opc = N->getOpcode();
    209   if (N->isMachineOpcode()) {
    210     N->setNodeId(-1);
    211     return nullptr;   // Already selected.
    212   }
    213 
    214   const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    215   switch (Opc) {
    216   default: break;
    217   // We are selecting i64 ADD here instead of custom lower it during
    218   // DAG legalization, so we can fold some i64 ADDs used for address
    219   // calculation into the LOAD and STORE instructions.
    220   case ISD::ADD:
    221   case ISD::SUB: {
    222     if (N->getValueType(0) != MVT::i64 ||
    223         ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
    224       break;
    225 
    226     return SelectADD_SUB_I64(N);
    227   }
    228   case ISD::SCALAR_TO_VECTOR:
    229   case AMDGPUISD::BUILD_VERTICAL_VECTOR:
    230   case ISD::BUILD_VECTOR: {
    231     unsigned RegClassID;
    232     const AMDGPURegisterInfo *TRI =
    233                    static_cast<const AMDGPURegisterInfo*>(TM.getRegisterInfo());
    234     const SIRegisterInfo *SIRI =
    235                    static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
    236     EVT VT = N->getValueType(0);
    237     unsigned NumVectorElts = VT.getVectorNumElements();
    238     EVT EltVT = VT.getVectorElementType();
    239     assert(EltVT.bitsEq(MVT::i32));
    240     if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
    241       bool UseVReg = true;
    242       for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
    243                                                     U != E; ++U) {
    244         if (!U->isMachineOpcode()) {
    245           continue;
    246         }
    247         const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
    248         if (!RC) {
    249           continue;
    250         }
    251         if (SIRI->isSGPRClass(RC)) {
    252           UseVReg = false;
    253         }
    254       }
    255       switch(NumVectorElts) {
    256       case 1: RegClassID = UseVReg ? AMDGPU::VReg_32RegClassID :
    257                                      AMDGPU::SReg_32RegClassID;
    258         break;
    259       case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID :
    260                                      AMDGPU::SReg_64RegClassID;
    261         break;
    262       case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID :
    263                                      AMDGPU::SReg_128RegClassID;
    264         break;
    265       case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID :
    266                                      AMDGPU::SReg_256RegClassID;
    267         break;
    268       case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID :
    269                                       AMDGPU::SReg_512RegClassID;
    270         break;
    271       default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
    272       }
    273     } else {
    274       // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
    275       // that adds a 128 bits reg copy when going through TwoAddressInstructions
    276       // pass. We want to avoid 128 bits copies as much as possible because they
    277       // can't be bundled by our scheduler.
    278       switch(NumVectorElts) {
    279       case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
    280       case 4:
    281         if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
    282           RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
    283         else
    284           RegClassID = AMDGPU::R600_Reg128RegClassID;
    285         break;
    286       default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
    287       }
    288     }
    289 
    290     SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32);
    291 
    292     if (NumVectorElts == 1) {
    293       return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT,
    294                                   N->getOperand(0), RegClass);
    295     }
    296 
    297     assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
    298                                   "supported yet");
    299     // 16 = Max Num Vector Elements
    300     // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
    301     // 1 = Vector Register Class
    302     SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
    303 
    304     RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32);
    305     bool IsRegSeq = true;
    306     unsigned NOps = N->getNumOperands();
    307     for (unsigned i = 0; i < NOps; i++) {
    308       // XXX: Why is this here?
    309       if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
    310         IsRegSeq = false;
    311         break;
    312       }
    313       RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
    314       RegSeqArgs[1 + (2 * i) + 1] =
    315               CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
    316     }
    317 
    318     if (NOps != NumVectorElts) {
    319       // Fill in the missing undef elements if this was a scalar_to_vector.
    320       assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
    321 
    322       MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
    323                                                      SDLoc(N), EltVT);
    324       for (unsigned i = NOps; i < NumVectorElts; ++i) {
    325         RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
    326         RegSeqArgs[1 + (2 * i) + 1] =
    327           CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
    328       }
    329     }
    330 
    331     if (!IsRegSeq)
    332       break;
    333     return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
    334                                 RegSeqArgs);
    335   }
    336   case ISD::BUILD_PAIR: {
    337     SDValue RC, SubReg0, SubReg1;
    338     if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
    339       break;
    340     }
    341     if (N->getValueType(0) == MVT::i128) {
    342       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32);
    343       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
    344       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
    345     } else if (N->getValueType(0) == MVT::i64) {
    346       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32);
    347       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
    348       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
    349     } else {
    350       llvm_unreachable("Unhandled value type for BUILD_PAIR");
    351     }
    352     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
    353                             N->getOperand(1), SubReg1 };
    354     return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
    355                                   SDLoc(N), N->getValueType(0), Ops);
    356   }
    357 
    358   case ISD::Constant:
    359   case ISD::ConstantFP: {
    360     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    361     if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
    362         N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
    363       break;
    364 
    365     uint64_t Imm;
    366     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
    367       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
    368     else {
    369       ConstantSDNode *C = cast<ConstantSDNode>(N);
    370       Imm = C->getZExtValue();
    371     }
    372 
    373     SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
    374                                 CurDAG->getConstant(Imm & 0xFFFFFFFF, MVT::i32));
    375     SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
    376                                 CurDAG->getConstant(Imm >> 32, MVT::i32));
    377     const SDValue Ops[] = {
    378       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
    379       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
    380       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32)
    381     };
    382 
    383     return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N),
    384                                   N->getValueType(0), Ops);
    385   }
    386 
    387   case AMDGPUISD::REGISTER_LOAD: {
    388     if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
    389       break;
    390     SDValue Addr, Offset;
    391 
    392     SelectADDRIndirect(N->getOperand(1), Addr, Offset);
    393     const SDValue Ops[] = {
    394       Addr,
    395       Offset,
    396       CurDAG->getTargetConstant(0, MVT::i32),
    397       N->getOperand(0),
    398     };
    399     return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, SDLoc(N),
    400                                   CurDAG->getVTList(MVT::i32, MVT::i64, MVT::Other),
    401                                   Ops);
    402   }
    403   case AMDGPUISD::REGISTER_STORE: {
    404     if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
    405       break;
    406     SDValue Addr, Offset;
    407     SelectADDRIndirect(N->getOperand(2), Addr, Offset);
    408     const SDValue Ops[] = {
    409       N->getOperand(1),
    410       Addr,
    411       Offset,
    412       CurDAG->getTargetConstant(0, MVT::i32),
    413       N->getOperand(0),
    414     };
    415     return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, SDLoc(N),
    416                                         CurDAG->getVTList(MVT::Other),
    417                                         Ops);
    418   }
    419 
    420   case AMDGPUISD::BFE_I32:
    421   case AMDGPUISD::BFE_U32: {
    422     if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
    423       break;
    424 
    425     // There is a scalar version available, but unlike the vector version which
    426     // has a separate operand for the offset and width, the scalar version packs
    427     // the width and offset into a single operand. Try to move to the scalar
    428     // version if the offsets are constant, so that we can try to keep extended
    429     // loads of kernel arguments in SGPRs.
    430 
    431     // TODO: Technically we could try to pattern match scalar bitshifts of
    432     // dynamic values, but it's probably not useful.
    433     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
    434     if (!Offset)
    435       break;
    436 
    437     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
    438     if (!Width)
    439       break;
    440 
    441     bool Signed = Opc == AMDGPUISD::BFE_I32;
    442 
    443     // Transformation function, pack the offset and width of a BFE into
    444     // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
    445     // source, bits [5:0] contain the offset and bits [22:16] the width.
    446 
    447     uint32_t OffsetVal = Offset->getZExtValue();
    448     uint32_t WidthVal = Width->getZExtValue();
    449 
    450     uint32_t PackedVal = OffsetVal | WidthVal << 16;
    451 
    452     SDValue PackedOffsetWidth = CurDAG->getTargetConstant(PackedVal, MVT::i32);
    453     return CurDAG->getMachineNode(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
    454                                   SDLoc(N),
    455                                   MVT::i32,
    456                                   N->getOperand(0),
    457                                   PackedOffsetWidth);
    458 
    459   }
    460   case AMDGPUISD::DIV_SCALE: {
    461     return SelectDIV_SCALE(N);
    462   }
    463   }
    464   return SelectCode(N);
    465 }
    466 
    467 
    468 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) {
    469   assert(AS != 0 && "Use checkPrivateAddress instead.");
    470   if (!Ptr)
    471     return false;
    472 
    473   return Ptr->getType()->getPointerAddressSpace() == AS;
    474 }
    475 
    476 bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) {
    477   if (Op->getPseudoValue())
    478     return true;
    479 
    480   if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType()))
    481     return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
    482 
    483   return false;
    484 }
    485 
    486 bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
    487   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
    488 }
    489 
    490 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
    491   const Value *MemVal = N->getMemOperand()->getValue();
    492   return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
    493           !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
    494           !checkType(MemVal, AMDGPUAS::REGION_ADDRESS));
    495 }
    496 
    497 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
    498   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
    499 }
    500 
    501 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
    502   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
    503 }
    504 
    505 bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
    506   const Value *MemVal = N->getMemOperand()->getValue();
    507   if (CbId == -1)
    508     return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS);
    509 
    510   return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
    511 }
    512 
    513 bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
    514   if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) {
    515     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
    516     if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
    517         N->getMemoryVT().bitsLT(MVT::i32)) {
    518       return true;
    519     }
    520   }
    521   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
    522 }
    523 
    524 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const {
    525   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS);
    526 }
    527 
    528 bool AMDGPUDAGToDAGISel::isLocalLoad(const  LoadSDNode *N) const {
    529   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
    530 }
    531 
    532 bool AMDGPUDAGToDAGISel::isRegionLoad(const  LoadSDNode *N) const {
    533   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
    534 }
    535 
    536 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
    537   MachineMemOperand *MMO = N->getMemOperand();
    538   if (checkPrivateAddress(N->getMemOperand())) {
    539     if (MMO) {
    540       const PseudoSourceValue *PSV = MMO->getPseudoValue();
    541       if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
    542         return true;
    543       }
    544     }
    545   }
    546   return false;
    547 }
    548 
    549 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
    550   if (checkPrivateAddress(N->getMemOperand())) {
    551     // Check to make sure we are not a constant pool load or a constant load
    552     // that is marked as a private load
    553     if (isCPLoad(N) || isConstantLoad(N, -1)) {
    554       return false;
    555     }
    556   }
    557 
    558   const Value *MemVal = N->getMemOperand()->getValue();
    559   if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
    560       !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
    561       !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) &&
    562       !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) &&
    563       !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) &&
    564       !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)){
    565     return true;
    566   }
    567   return false;
    568 }
    569 
    570 bool AMDGPUDAGToDAGISel::isCFDepth0() const {
    571   // FIXME: Figure out a way to use DominatorTree analysis here.
    572   const BasicBlock *CurBlock = FuncInfo->MBB->getBasicBlock();
    573   const Function *Fn = FuncInfo->Fn;
    574   return &Fn->front() == CurBlock || &Fn->back() == CurBlock;
    575 }
    576 
    577 
    578 const char *AMDGPUDAGToDAGISel::getPassName() const {
    579   return "AMDGPU DAG->DAG Pattern Instruction Selection";
    580 }
    581 
    582 #ifdef DEBUGTMP
    583 #undef INT64_C
    584 #endif
    585 #undef DEBUGTMP
    586 
    587 //===----------------------------------------------------------------------===//
    588 // Complex Patterns
    589 //===----------------------------------------------------------------------===//
    590 
    591 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
    592                                                          SDValue& IntPtr) {
    593   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
    594     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true);
    595     return true;
    596   }
    597   return false;
    598 }
    599 
    600 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
    601     SDValue& BaseReg, SDValue &Offset) {
    602   if (!isa<ConstantSDNode>(Addr)) {
    603     BaseReg = Addr;
    604     Offset = CurDAG->getIntPtrConstant(0, true);
    605     return true;
    606   }
    607   return false;
    608 }
    609 
    610 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
    611                                            SDValue &Offset) {
    612   ConstantSDNode *IMMOffset;
    613 
    614   if (Addr.getOpcode() == ISD::ADD
    615       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
    616       && isInt<16>(IMMOffset->getZExtValue())) {
    617 
    618       Base = Addr.getOperand(0);
    619       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
    620       return true;
    621   // If the pointer address is constant, we can move it to the offset field.
    622   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
    623              && isInt<16>(IMMOffset->getZExtValue())) {
    624     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
    625                                   SDLoc(CurDAG->getEntryNode()),
    626                                   AMDGPU::ZERO, MVT::i32);
    627     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
    628     return true;
    629   }
    630 
    631   // Default case, no offset
    632   Base = Addr;
    633   Offset = CurDAG->getTargetConstant(0, MVT::i32);
    634   return true;
    635 }
    636 
    637 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
    638                                             SDValue &Offset) {
    639   ConstantSDNode *C;
    640 
    641   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
    642     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
    643     Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
    644   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
    645             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
    646     Base = Addr.getOperand(0);
    647     Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
    648   } else {
    649     Base = Addr;
    650     Offset = CurDAG->getTargetConstant(0, MVT::i32);
    651   }
    652 
    653   return true;
    654 }
    655 
    656 SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
    657   SDLoc DL(N);
    658   SDValue LHS = N->getOperand(0);
    659   SDValue RHS = N->getOperand(1);
    660 
    661   bool IsAdd = (N->getOpcode() == ISD::ADD);
    662 
    663   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
    664   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
    665 
    666   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
    667                                        DL, MVT::i32, LHS, Sub0);
    668   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
    669                                        DL, MVT::i32, LHS, Sub1);
    670 
    671   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
    672                                        DL, MVT::i32, RHS, Sub0);
    673   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
    674                                        DL, MVT::i32, RHS, Sub1);
    675 
    676   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
    677   SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
    678 
    679 
    680   unsigned Opc = IsAdd ? AMDGPU::S_ADD_I32 : AMDGPU::S_SUB_I32;
    681   unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
    682 
    683   if (!isCFDepth0()) {
    684     Opc = IsAdd ? AMDGPU::V_ADD_I32_e32 : AMDGPU::V_SUB_I32_e32;
    685     CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e32 : AMDGPU::V_SUBB_U32_e32;
    686   }
    687 
    688   SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
    689   SDValue Carry(AddLo, 1);
    690   SDNode *AddHi
    691     = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
    692                              SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
    693 
    694   SDValue Args[5] = {
    695     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
    696     SDValue(AddLo,0),
    697     Sub0,
    698     SDValue(AddHi,0),
    699     Sub1,
    700   };
    701   return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
    702 }
    703 
    704 SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
    705   SDLoc SL(N);
    706   EVT VT = N->getValueType(0);
    707 
    708   assert(VT == MVT::f32 || VT == MVT::f64);
    709 
    710   unsigned Opc
    711     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
    712 
    713   const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
    714 
    715   SDValue Ops[] = {
    716     N->getOperand(0),
    717     N->getOperand(1),
    718     N->getOperand(2),
    719     Zero,
    720     Zero,
    721     Zero,
    722     Zero
    723   };
    724 
    725   return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
    726 }
    727 
    728 static SDValue wrapAddr64Rsrc(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) {
    729   return SDValue(DAG->getMachineNode(AMDGPU::SI_ADDR64_RSRC, DL, MVT::v4i32,
    730                                      Ptr), 0);
    731 }
    732 
    733 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr,
    734                                            SDValue &Offset,
    735                                            SDValue &ImmOffset) const {
    736   SDLoc DL(Addr);
    737 
    738   if (CurDAG->isBaseWithConstantOffset(Addr)) {
    739     SDValue N0 = Addr.getOperand(0);
    740     SDValue N1 = Addr.getOperand(1);
    741     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
    742 
    743     if (isUInt<12>(C1->getZExtValue())) {
    744 
    745       if (N0.getOpcode() == ISD::ADD) {
    746         // (add (add N2, N3), C1)
    747         SDValue N2 = N0.getOperand(0);
    748         SDValue N3 = N0.getOperand(1);
    749         Ptr = wrapAddr64Rsrc(CurDAG, DL, N2);
    750         Offset = N3;
    751         ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
    752         return true;
    753       }
    754 
    755       // (add N0, C1)
    756       Ptr = wrapAddr64Rsrc(CurDAG, DL, CurDAG->getTargetConstant(0, MVT::i64));;
    757       Offset = N0;
    758       ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
    759       return true;
    760     }
    761   }
    762   if (Addr.getOpcode() == ISD::ADD) {
    763     // (add N0, N1)
    764     SDValue N0 = Addr.getOperand(0);
    765     SDValue N1 = Addr.getOperand(1);
    766     Ptr = wrapAddr64Rsrc(CurDAG, DL, N0);
    767     Offset = N1;
    768     ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
    769     return true;
    770   }
    771 
    772   // default case
    773   Ptr = wrapAddr64Rsrc(CurDAG, DL, CurDAG->getConstant(0, MVT::i64));
    774   Offset = Addr;
    775   ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
    776   return true;
    777 }
    778 
    779 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
    780   const AMDGPUTargetLowering& Lowering =
    781     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
    782   bool IsModified = false;
    783   do {
    784     IsModified = false;
    785     // Go over all selected nodes and try to fold them a bit more
    786     for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
    787          E = CurDAG->allnodes_end(); I != E; ++I) {
    788 
    789       SDNode *Node = I;
    790 
    791       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
    792       if (!MachineNode)
    793         continue;
    794 
    795       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
    796       if (ResNode != Node) {
    797         ReplaceUses(Node, ResNode);
    798         IsModified = true;
    799       }
    800     }
    801     CurDAG->RemoveDeadNodes();
    802   } while (IsModified);
    803 }
    804