Home | History | Annotate | Download | only in AMDGPU
      1 //===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //==-----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// \brief Defines an instruction selector for the AMDGPU target.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "AMDGPUDiagnosticInfoUnsupported.h"
     16 #include "AMDGPUInstrInfo.h"
     17 #include "AMDGPUISelLowering.h" // For AMDGPUISD
     18 #include "AMDGPURegisterInfo.h"
     19 #include "AMDGPUSubtarget.h"
     20 #include "R600InstrInfo.h"
     21 #include "SIDefines.h"
     22 #include "SIISelLowering.h"
     23 #include "SIMachineFunctionInfo.h"
     24 #include "llvm/CodeGen/FunctionLoweringInfo.h"
     25 #include "llvm/CodeGen/MachineFrameInfo.h"
     26 #include "llvm/CodeGen/MachineRegisterInfo.h"
     27 #include "llvm/CodeGen/PseudoSourceValue.h"
     28 #include "llvm/CodeGen/SelectionDAG.h"
     29 #include "llvm/CodeGen/SelectionDAGISel.h"
     30 #include "llvm/IR/Function.h"
     31 
     32 using namespace llvm;
     33 
     34 //===----------------------------------------------------------------------===//
     35 // Instruction Selector Implementation
     36 //===----------------------------------------------------------------------===//
     37 
     38 namespace {
     39 /// AMDGPU specific code to select AMDGPU machine instructions for
     40 /// SelectionDAG operations.
     41 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
     42   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
     43   // make the right decision when generating code for different targets.
     44   const AMDGPUSubtarget *Subtarget;
     45 
     46 public:
     47   AMDGPUDAGToDAGISel(TargetMachine &TM);
     48   virtual ~AMDGPUDAGToDAGISel();
     49   bool runOnMachineFunction(MachineFunction &MF) override;
     50   SDNode *Select(SDNode *N) override;
     51   const char *getPassName() const override;
     52   void PreprocessISelDAG() override;
     53   void PostprocessISelDAG() override;
     54 
     55 private:
     56   bool isInlineImmediate(SDNode *N) const;
     57   bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
     58                    const R600InstrInfo *TII);
     59   bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
     60   bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
     61 
     62   // Complex pattern selectors
     63   bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
     64   bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
     65   bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
     66 
     67   static bool checkType(const Value *ptr, unsigned int addrspace);
     68   static bool checkPrivateAddress(const MachineMemOperand *Op);
     69 
     70   static bool isGlobalStore(const StoreSDNode *N);
     71   static bool isFlatStore(const StoreSDNode *N);
     72   static bool isPrivateStore(const StoreSDNode *N);
     73   static bool isLocalStore(const StoreSDNode *N);
     74   static bool isRegionStore(const StoreSDNode *N);
     75 
     76   bool isCPLoad(const LoadSDNode *N) const;
     77   bool isConstantLoad(const LoadSDNode *N, int cbID) const;
     78   bool isGlobalLoad(const LoadSDNode *N) const;
     79   bool isFlatLoad(const LoadSDNode *N) const;
     80   bool isParamLoad(const LoadSDNode *N) const;
     81   bool isPrivateLoad(const LoadSDNode *N) const;
     82   bool isLocalLoad(const LoadSDNode *N) const;
     83   bool isRegionLoad(const LoadSDNode *N) const;
     84 
     85   SDNode *glueCopyToM0(SDNode *N) const;
     86 
     87   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
     88   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
     89   bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
     90                                        SDValue& Offset);
     91   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
     92   bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
     93   bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
     94                        unsigned OffsetBits) const;
     95   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
     96   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
     97                                  SDValue &Offset1) const;
     98   void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
     99                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
    100                    SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
    101                    SDValue &TFE) const;
    102   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
    103                          SDValue &SOffset, SDValue &Offset, SDValue &GLC,
    104                          SDValue &SLC, SDValue &TFE) const;
    105   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
    106                          SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
    107                          SDValue &SLC) const;
    108   bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
    109                           SDValue &SOffset, SDValue &ImmOffset) const;
    110   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
    111                          SDValue &Offset, SDValue &GLC, SDValue &SLC,
    112                          SDValue &TFE) const;
    113   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
    114                          SDValue &Offset, SDValue &GLC) const;
    115   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
    116                         bool &Imm) const;
    117   bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
    118                   bool &Imm) const;
    119   bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
    120   bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
    121   bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
    122   bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
    123   bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
    124   bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const;
    125   SDNode *SelectAddrSpaceCast(SDNode *N);
    126   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
    127   bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
    128   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
    129                        SDValue &Clamp, SDValue &Omod) const;
    130   bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
    131                          SDValue &Clamp, SDValue &Omod) const;
    132 
    133   bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
    134                             SDValue &Omod) const;
    135   bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
    136                                  SDValue &Clamp,
    137                                  SDValue &Omod) const;
    138 
    139   SDNode *SelectADD_SUB_I64(SDNode *N);
    140   SDNode *SelectDIV_SCALE(SDNode *N);
    141 
    142   SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
    143                    uint32_t Offset, uint32_t Width);
    144   SDNode *SelectS_BFEFromShifts(SDNode *N);
    145   SDNode *SelectS_BFE(SDNode *N);
    146 
    147   // Include the pieces autogenerated from the target description.
    148 #include "AMDGPUGenDAGISel.inc"
    149 };
    150 }  // end anonymous namespace
    151 
    152 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
    153 // DAG, ready for instruction scheduling.
    154 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
    155   return new AMDGPUDAGToDAGISel(TM);
    156 }
    157 
    158 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
    159     : SelectionDAGISel(TM) {}
    160 
    161 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
    162   Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget());
    163   return SelectionDAGISel::runOnMachineFunction(MF);
    164 }
    165 
    166 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
    167 }
    168 
    169 bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
    170   const SITargetLowering *TL
    171       = static_cast<const SITargetLowering *>(getTargetLowering());
    172   return TL->analyzeImmediate(N) == 0;
    173 }
    174 
    175 /// \brief Determine the register class for \p OpNo
    176 /// \returns The register class of the virtual register that will be used for
    177 /// the given operand number \OpNo or NULL if the register class cannot be
    178 /// determined.
    179 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
    180                                                           unsigned OpNo) const {
    181   if (!N->isMachineOpcode())
    182     return nullptr;
    183 
    184   switch (N->getMachineOpcode()) {
    185   default: {
    186     const MCInstrDesc &Desc =
    187         Subtarget->getInstrInfo()->get(N->getMachineOpcode());
    188     unsigned OpIdx = Desc.getNumDefs() + OpNo;
    189     if (OpIdx >= Desc.getNumOperands())
    190       return nullptr;
    191     int RegClass = Desc.OpInfo[OpIdx].RegClass;
    192     if (RegClass == -1)
    193       return nullptr;
    194 
    195     return Subtarget->getRegisterInfo()->getRegClass(RegClass);
    196   }
    197   case AMDGPU::REG_SEQUENCE: {
    198     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
    199     const TargetRegisterClass *SuperRC =
    200         Subtarget->getRegisterInfo()->getRegClass(RCID);
    201 
    202     SDValue SubRegOp = N->getOperand(OpNo + 1);
    203     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
    204     return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
    205                                                               SubRegIdx);
    206   }
    207   }
    208 }
    209 
    210 bool AMDGPUDAGToDAGISel::SelectADDRParam(
    211   SDValue Addr, SDValue& R1, SDValue& R2) {
    212 
    213   if (Addr.getOpcode() == ISD::FrameIndex) {
    214     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
    215       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
    216       R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
    217     } else {
    218       R1 = Addr;
    219       R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
    220     }
    221   } else if (Addr.getOpcode() == ISD::ADD) {
    222     R1 = Addr.getOperand(0);
    223     R2 = Addr.getOperand(1);
    224   } else {
    225     R1 = Addr;
    226     R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
    227   }
    228   return true;
    229 }
    230 
    231 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
    232   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
    233       Addr.getOpcode() == ISD::TargetGlobalAddress) {
    234     return false;
    235   }
    236   return SelectADDRParam(Addr, R1, R2);
    237 }
    238 
    239 
    240 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
    241   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
    242       Addr.getOpcode() == ISD::TargetGlobalAddress) {
    243     return false;
    244   }
    245 
    246   if (Addr.getOpcode() == ISD::FrameIndex) {
    247     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
    248       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
    249       R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
    250     } else {
    251       R1 = Addr;
    252       R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
    253     }
    254   } else if (Addr.getOpcode() == ISD::ADD) {
    255     R1 = Addr.getOperand(0);
    256     R2 = Addr.getOperand(1);
    257   } else {
    258     R1 = Addr;
    259     R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
    260   }
    261   return true;
    262 }
    263 
    264 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
    265   if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
    266       !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(),
    267                  AMDGPUAS::LOCAL_ADDRESS))
    268     return N;
    269 
    270   const SITargetLowering& Lowering =
    271       *static_cast<const SITargetLowering*>(getTargetLowering());
    272 
    273   // Write max value to m0 before each load operation
    274 
    275   SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
    276                                  CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
    277 
    278   SDValue Glue = M0.getValue(1);
    279 
    280   SmallVector <SDValue, 8> Ops;
    281   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
    282      Ops.push_back(N->getOperand(i));
    283   }
    284   Ops.push_back(Glue);
    285   CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
    286 
    287   return N;
    288 }
    289 
    290 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
    291   switch (NumVectorElts) {
    292   case 1:
    293     return AMDGPU::SReg_32RegClassID;
    294   case 2:
    295     return AMDGPU::SReg_64RegClassID;
    296   case 4:
    297     return AMDGPU::SReg_128RegClassID;
    298   case 8:
    299     return AMDGPU::SReg_256RegClassID;
    300   case 16:
    301     return AMDGPU::SReg_512RegClassID;
    302   }
    303 
    304   llvm_unreachable("invalid vector size");
    305 }
    306 
    307 SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
    308   unsigned int Opc = N->getOpcode();
    309   if (N->isMachineOpcode()) {
    310     N->setNodeId(-1);
    311     return nullptr;   // Already selected.
    312   }
    313 
    314   if (isa<AtomicSDNode>(N))
    315     N = glueCopyToM0(N);
    316 
    317   switch (Opc) {
    318   default: break;
    319   // We are selecting i64 ADD here instead of custom lower it during
    320   // DAG legalization, so we can fold some i64 ADDs used for address
    321   // calculation into the LOAD and STORE instructions.
    322   case ISD::ADD:
    323   case ISD::SUB: {
    324     if (N->getValueType(0) != MVT::i64 ||
    325         Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
    326       break;
    327 
    328     return SelectADD_SUB_I64(N);
    329   }
    330   case ISD::SCALAR_TO_VECTOR:
    331   case AMDGPUISD::BUILD_VERTICAL_VECTOR:
    332   case ISD::BUILD_VECTOR: {
    333     unsigned RegClassID;
    334     const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
    335     EVT VT = N->getValueType(0);
    336     unsigned NumVectorElts = VT.getVectorNumElements();
    337     EVT EltVT = VT.getVectorElementType();
    338     assert(EltVT.bitsEq(MVT::i32));
    339     if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
    340       RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
    341     } else {
    342       // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
    343       // that adds a 128 bits reg copy when going through TwoAddressInstructions
    344       // pass. We want to avoid 128 bits copies as much as possible because they
    345       // can't be bundled by our scheduler.
    346       switch(NumVectorElts) {
    347       case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
    348       case 4:
    349         if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
    350           RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
    351         else
    352           RegClassID = AMDGPU::R600_Reg128RegClassID;
    353         break;
    354       default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
    355       }
    356     }
    357 
    358     SDLoc DL(N);
    359     SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
    360 
    361     if (NumVectorElts == 1) {
    362       return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT,
    363                                   N->getOperand(0), RegClass);
    364     }
    365 
    366     assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
    367                                   "supported yet");
    368     // 16 = Max Num Vector Elements
    369     // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
    370     // 1 = Vector Register Class
    371     SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
    372 
    373     RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
    374     bool IsRegSeq = true;
    375     unsigned NOps = N->getNumOperands();
    376     for (unsigned i = 0; i < NOps; i++) {
    377       // XXX: Why is this here?
    378       if (isa<RegisterSDNode>(N->getOperand(i))) {
    379         IsRegSeq = false;
    380         break;
    381       }
    382       RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
    383       RegSeqArgs[1 + (2 * i) + 1] =
    384               CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
    385                                         MVT::i32);
    386     }
    387 
    388     if (NOps != NumVectorElts) {
    389       // Fill in the missing undef elements if this was a scalar_to_vector.
    390       assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
    391 
    392       MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
    393                                                      DL, EltVT);
    394       for (unsigned i = NOps; i < NumVectorElts; ++i) {
    395         RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
    396         RegSeqArgs[1 + (2 * i) + 1] =
    397           CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
    398       }
    399     }
    400 
    401     if (!IsRegSeq)
    402       break;
    403     return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
    404                                 RegSeqArgs);
    405   }
    406   case ISD::BUILD_PAIR: {
    407     SDValue RC, SubReg0, SubReg1;
    408     if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
    409       break;
    410     }
    411     SDLoc DL(N);
    412     if (N->getValueType(0) == MVT::i128) {
    413       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
    414       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
    415       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
    416     } else if (N->getValueType(0) == MVT::i64) {
    417       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
    418       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
    419       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
    420     } else {
    421       llvm_unreachable("Unhandled value type for BUILD_PAIR");
    422     }
    423     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
    424                             N->getOperand(1), SubReg1 };
    425     return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
    426                                   DL, N->getValueType(0), Ops);
    427   }
    428 
    429   case ISD::Constant:
    430   case ISD::ConstantFP: {
    431     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
    432         N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
    433       break;
    434 
    435     uint64_t Imm;
    436     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
    437       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
    438     else {
    439       ConstantSDNode *C = cast<ConstantSDNode>(N);
    440       Imm = C->getZExtValue();
    441     }
    442 
    443     SDLoc DL(N);
    444     SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
    445                                 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
    446                                                     MVT::i32));
    447     SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
    448                                 CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
    449     const SDValue Ops[] = {
    450       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
    451       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
    452       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
    453     };
    454 
    455     return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
    456                                   N->getValueType(0), Ops);
    457   }
    458   case ISD::LOAD:
    459   case ISD::STORE: {
    460     N = glueCopyToM0(N);
    461     break;
    462   }
    463 
    464   case AMDGPUISD::BFE_I32:
    465   case AMDGPUISD::BFE_U32: {
    466     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
    467       break;
    468 
    469     // There is a scalar version available, but unlike the vector version which
    470     // has a separate operand for the offset and width, the scalar version packs
    471     // the width and offset into a single operand. Try to move to the scalar
    472     // version if the offsets are constant, so that we can try to keep extended
    473     // loads of kernel arguments in SGPRs.
    474 
    475     // TODO: Technically we could try to pattern match scalar bitshifts of
    476     // dynamic values, but it's probably not useful.
    477     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
    478     if (!Offset)
    479       break;
    480 
    481     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
    482     if (!Width)
    483       break;
    484 
    485     bool Signed = Opc == AMDGPUISD::BFE_I32;
    486 
    487     uint32_t OffsetVal = Offset->getZExtValue();
    488     uint32_t WidthVal = Width->getZExtValue();
    489 
    490     return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N),
    491                     N->getOperand(0), OffsetVal, WidthVal);
    492   }
    493   case AMDGPUISD::DIV_SCALE: {
    494     return SelectDIV_SCALE(N);
    495   }
    496   case ISD::CopyToReg: {
    497     const SITargetLowering& Lowering =
    498       *static_cast<const SITargetLowering*>(getTargetLowering());
    499     Lowering.legalizeTargetIndependentNode(N, *CurDAG);
    500     break;
    501   }
    502   case ISD::ADDRSPACECAST:
    503     return SelectAddrSpaceCast(N);
    504   case ISD::AND:
    505   case ISD::SRL:
    506   case ISD::SRA:
    507     if (N->getValueType(0) != MVT::i32 ||
    508         Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
    509       break;
    510 
    511     return SelectS_BFE(N);
    512   }
    513 
    514   return SelectCode(N);
    515 }
    516 
    517 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) {
    518   assert(AS != 0 && "Use checkPrivateAddress instead.");
    519   if (!Ptr)
    520     return false;
    521 
    522   return Ptr->getType()->getPointerAddressSpace() == AS;
    523 }
    524 
    525 bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) {
    526   if (Op->getPseudoValue())
    527     return true;
    528 
    529   if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType()))
    530     return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
    531 
    532   return false;
    533 }
    534 
    535 bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
    536   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
    537 }
    538 
    539 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
    540   const Value *MemVal = N->getMemOperand()->getValue();
    541   return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
    542           !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
    543           !checkType(MemVal, AMDGPUAS::REGION_ADDRESS));
    544 }
    545 
    546 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
    547   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
    548 }
    549 
    550 bool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) {
    551   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
    552 }
    553 
    554 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
    555   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
    556 }
    557 
    558 bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
    559   const Value *MemVal = N->getMemOperand()->getValue();
    560   if (CbId == -1)
    561     return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS);
    562 
    563   return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
    564 }
    565 
    566 bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
    567   if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
    568     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
    569         N->getMemoryVT().bitsLT(MVT::i32))
    570       return true;
    571 
    572   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
    573 }
    574 
    575 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const {
    576   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS);
    577 }
    578 
    579 bool AMDGPUDAGToDAGISel::isLocalLoad(const  LoadSDNode *N) const {
    580   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
    581 }
    582 
    583 bool AMDGPUDAGToDAGISel::isFlatLoad(const  LoadSDNode *N) const {
    584   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
    585 }
    586 
    587 bool AMDGPUDAGToDAGISel::isRegionLoad(const  LoadSDNode *N) const {
    588   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
    589 }
    590 
    591 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
    592   MachineMemOperand *MMO = N->getMemOperand();
    593   if (checkPrivateAddress(N->getMemOperand())) {
    594     if (MMO) {
    595       const PseudoSourceValue *PSV = MMO->getPseudoValue();
    596       if (PSV && PSV->isConstantPool()) {
    597         return true;
    598       }
    599     }
    600   }
    601   return false;
    602 }
    603 
    604 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
    605   if (checkPrivateAddress(N->getMemOperand())) {
    606     // Check to make sure we are not a constant pool load or a constant load
    607     // that is marked as a private load
    608     if (isCPLoad(N) || isConstantLoad(N, -1)) {
    609       return false;
    610     }
    611   }
    612 
    613   const Value *MemVal = N->getMemOperand()->getValue();
    614   if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
    615       !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
    616       !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) &&
    617       !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) &&
    618       !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) &&
    619       !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) &&
    620       !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)) {
    621     return true;
    622   }
    623   return false;
    624 }
    625 
    626 const char *AMDGPUDAGToDAGISel::getPassName() const {
    627   return "AMDGPU DAG->DAG Pattern Instruction Selection";
    628 }
    629 
    630 #ifdef DEBUGTMP
    631 #undef INT64_C
    632 #endif
    633 #undef DEBUGTMP
    634 
    635 //===----------------------------------------------------------------------===//
    636 // Complex Patterns
    637 //===----------------------------------------------------------------------===//
    638 
    639 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
    640                                                          SDValue& IntPtr) {
    641   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
    642     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
    643                                        true);
    644     return true;
    645   }
    646   return false;
    647 }
    648 
    649 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
    650     SDValue& BaseReg, SDValue &Offset) {
    651   if (!isa<ConstantSDNode>(Addr)) {
    652     BaseReg = Addr;
    653     Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
    654     return true;
    655   }
    656   return false;
    657 }
    658 
    659 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
    660                                            SDValue &Offset) {
    661   ConstantSDNode *IMMOffset;
    662 
    663   if (Addr.getOpcode() == ISD::ADD
    664       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
    665       && isInt<16>(IMMOffset->getZExtValue())) {
    666 
    667       Base = Addr.getOperand(0);
    668       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
    669                                          MVT::i32);
    670       return true;
    671   // If the pointer address is constant, we can move it to the offset field.
    672   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
    673              && isInt<16>(IMMOffset->getZExtValue())) {
    674     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
    675                                   SDLoc(CurDAG->getEntryNode()),
    676                                   AMDGPU::ZERO, MVT::i32);
    677     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
    678                                        MVT::i32);
    679     return true;
    680   }
    681 
    682   // Default case, no offset
    683   Base = Addr;
    684   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
    685   return true;
    686 }
    687 
    688 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
    689                                             SDValue &Offset) {
    690   ConstantSDNode *C;
    691   SDLoc DL(Addr);
    692 
    693   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
    694     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
    695     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
    696   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
    697             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
    698     Base = Addr.getOperand(0);
    699     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
    700   } else {
    701     Base = Addr;
    702     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
    703   }
    704 
    705   return true;
    706 }
    707 
    708 SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
    709   SDLoc DL(N);
    710   SDValue LHS = N->getOperand(0);
    711   SDValue RHS = N->getOperand(1);
    712 
    713   bool IsAdd = (N->getOpcode() == ISD::ADD);
    714 
    715   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
    716   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
    717 
    718   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
    719                                        DL, MVT::i32, LHS, Sub0);
    720   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
    721                                        DL, MVT::i32, LHS, Sub1);
    722 
    723   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
    724                                        DL, MVT::i32, RHS, Sub0);
    725   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
    726                                        DL, MVT::i32, RHS, Sub1);
    727 
    728   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
    729   SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
    730 
    731 
    732   unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
    733   unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
    734 
    735   SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
    736   SDValue Carry(AddLo, 1);
    737   SDNode *AddHi
    738     = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
    739                              SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
    740 
    741   SDValue Args[5] = {
    742     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
    743     SDValue(AddLo,0),
    744     Sub0,
    745     SDValue(AddHi,0),
    746     Sub1,
    747   };
    748   return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
    749 }
    750 
    751 // We need to handle this here because tablegen doesn't support matching
    752 // instructions with multiple outputs.
    753 SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
    754   SDLoc SL(N);
    755   EVT VT = N->getValueType(0);
    756 
    757   assert(VT == MVT::f32 || VT == MVT::f64);
    758 
    759   unsigned Opc
    760     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
    761 
    762   // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
    763   // omod
    764   SDValue Ops[8];
    765 
    766   SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
    767   SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
    768   SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
    769   return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
    770 }
    771 
    772 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
    773                                          unsigned OffsetBits) const {
    774   if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
    775       (OffsetBits == 8 && !isUInt<8>(Offset)))
    776     return false;
    777 
    778   if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
    779       Subtarget->unsafeDSOffsetFoldingEnabled())
    780     return true;
    781 
    782   // On Southern Islands instruction with a negative base value and an offset
    783   // don't seem to work.
    784   return CurDAG->SignBitIsZero(Base);
    785 }
    786 
    787 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
    788                                               SDValue &Offset) const {
    789   if (CurDAG->isBaseWithConstantOffset(Addr)) {
    790     SDValue N0 = Addr.getOperand(0);
    791     SDValue N1 = Addr.getOperand(1);
    792     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
    793     if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
    794       // (add n0, c0)
    795       Base = N0;
    796       Offset = N1;
    797       return true;
    798     }
    799   } else if (Addr.getOpcode() == ISD::SUB) {
    800     // sub C, x -> add (sub 0, x), C
    801     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
    802       int64_t ByteOffset = C->getSExtValue();
    803       if (isUInt<16>(ByteOffset)) {
    804         SDLoc DL(Addr);
    805         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
    806 
    807         // XXX - This is kind of hacky. Create a dummy sub node so we can check
    808         // the known bits in isDSOffsetLegal. We need to emit the selected node
    809         // here, so this is thrown away.
    810         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
    811                                       Zero, Addr.getOperand(1));
    812 
    813         if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
    814           MachineSDNode *MachineSub
    815             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
    816                                      Zero, Addr.getOperand(1));
    817 
    818           Base = SDValue(MachineSub, 0);
    819           Offset = Addr.getOperand(0);
    820           return true;
    821         }
    822       }
    823     }
    824   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
    825     // If we have a constant address, prefer to put the constant into the
    826     // offset. This can save moves to load the constant address since multiple
    827     // operations can share the zero base address register, and enables merging
    828     // into read2 / write2 instructions.
    829 
    830     SDLoc DL(Addr);
    831 
    832     if (isUInt<16>(CAddr->getZExtValue())) {
    833       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
    834       MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
    835                                  DL, MVT::i32, Zero);
    836       Base = SDValue(MovZero, 0);
    837       Offset = Addr;
    838       return true;
    839     }
    840   }
    841 
    842   // default case
    843   Base = Addr;
    844   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
    845   return true;
    846 }
    847 
    848 // TODO: If offset is too big, put low 16-bit into offset.
    849 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
    850                                                    SDValue &Offset0,
    851                                                    SDValue &Offset1) const {
    852   SDLoc DL(Addr);
    853 
    854   if (CurDAG->isBaseWithConstantOffset(Addr)) {
    855     SDValue N0 = Addr.getOperand(0);
    856     SDValue N1 = Addr.getOperand(1);
    857     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
    858     unsigned DWordOffset0 = C1->getZExtValue() / 4;
    859     unsigned DWordOffset1 = DWordOffset0 + 1;
    860     // (add n0, c0)
    861     if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
    862       Base = N0;
    863       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
    864       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
    865       return true;
    866     }
    867   } else if (Addr.getOpcode() == ISD::SUB) {
    868     // sub C, x -> add (sub 0, x), C
    869     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
    870       unsigned DWordOffset0 = C->getZExtValue() / 4;
    871       unsigned DWordOffset1 = DWordOffset0 + 1;
    872 
    873       if (isUInt<8>(DWordOffset0)) {
    874         SDLoc DL(Addr);
    875         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
    876 
    877         // XXX - This is kind of hacky. Create a dummy sub node so we can check
    878         // the known bits in isDSOffsetLegal. We need to emit the selected node
    879         // here, so this is thrown away.
    880         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
    881                                       Zero, Addr.getOperand(1));
    882 
    883         if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
    884           MachineSDNode *MachineSub
    885             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
    886                                      Zero, Addr.getOperand(1));
    887 
    888           Base = SDValue(MachineSub, 0);
    889           Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
    890           Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
    891           return true;
    892         }
    893       }
    894     }
    895   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
    896     unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
    897     unsigned DWordOffset1 = DWordOffset0 + 1;
    898     assert(4 * DWordOffset0 == CAddr->getZExtValue());
    899 
    900     if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
    901       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
    902       MachineSDNode *MovZero
    903         = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
    904                                  DL, MVT::i32, Zero);
    905       Base = SDValue(MovZero, 0);
    906       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
    907       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
    908       return true;
    909     }
    910   }
    911 
    912   // default case
    913   Base = Addr;
    914   Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
    915   Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
    916   return true;
    917 }
    918 
    919 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
    920   return isUInt<12>(Imm->getZExtValue());
    921 }
    922 
    923 void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
    924                                      SDValue &VAddr, SDValue &SOffset,
    925                                      SDValue &Offset, SDValue &Offen,
    926                                      SDValue &Idxen, SDValue &Addr64,
    927                                      SDValue &GLC, SDValue &SLC,
    928                                      SDValue &TFE) const {
    929   SDLoc DL(Addr);
    930 
    931   GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
    932   SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
    933   TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
    934 
    935   Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
    936   Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
    937   Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
    938   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
    939 
    940   if (CurDAG->isBaseWithConstantOffset(Addr)) {
    941     SDValue N0 = Addr.getOperand(0);
    942     SDValue N1 = Addr.getOperand(1);
    943     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
    944 
    945     if (N0.getOpcode() == ISD::ADD) {
    946       // (add (add N2, N3), C1) -> addr64
    947       SDValue N2 = N0.getOperand(0);
    948       SDValue N3 = N0.getOperand(1);
    949       Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
    950       Ptr = N2;
    951       VAddr = N3;
    952     } else {
    953 
    954       // (add N0, C1) -> offset
    955       VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
    956       Ptr = N0;
    957     }
    958 
    959     if (isLegalMUBUFImmOffset(C1)) {
    960         Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
    961         return;
    962     } else if (isUInt<32>(C1->getZExtValue())) {
    963       // Illegal offset, store it in soffset.
    964       Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    965       SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
    966                    CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
    967                         0);
    968       return;
    969     }
    970   }
    971 
    972   if (Addr.getOpcode() == ISD::ADD) {
    973     // (add N0, N1) -> addr64
    974     SDValue N0 = Addr.getOperand(0);
    975     SDValue N1 = Addr.getOperand(1);
    976     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
    977     Ptr = N0;
    978     VAddr = N1;
    979     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    980     return;
    981   }
    982 
    983   // default case -> offset
    984   VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
    985   Ptr = Addr;
    986   Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
    987 }
    988 
    989 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
    990                                            SDValue &VAddr, SDValue &SOffset,
    991                                            SDValue &Offset, SDValue &GLC,
    992                                            SDValue &SLC, SDValue &TFE) const {
    993   SDValue Ptr, Offen, Idxen, Addr64;
    994 
    995   // addr64 bit was removed for volcanic islands.
    996   if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
    997     return false;
    998 
    999   SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
   1000               GLC, SLC, TFE);
   1001 
   1002   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
   1003   if (C->getSExtValue()) {
   1004     SDLoc DL(Addr);
   1005 
   1006     const SITargetLowering& Lowering =
   1007       *static_cast<const SITargetLowering*>(getTargetLowering());
   1008 
   1009     SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
   1010     return true;
   1011   }
   1012 
   1013   return false;
   1014 }
   1015 
   1016 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
   1017                                            SDValue &VAddr, SDValue &SOffset,
   1018                                            SDValue &Offset,
   1019                                            SDValue &SLC) const {
   1020   SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
   1021   SDValue GLC, TFE;
   1022 
   1023   return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
   1024 }
   1025 
   1026 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
   1027                                             SDValue &VAddr, SDValue &SOffset,
   1028                                             SDValue &ImmOffset) const {
   1029 
   1030   SDLoc DL(Addr);
   1031   MachineFunction &MF = CurDAG->getMachineFunction();
   1032   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
   1033 
   1034   Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
   1035   SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
   1036 
   1037   // (add n0, c1)
   1038   if (CurDAG->isBaseWithConstantOffset(Addr)) {
   1039     SDValue N0 = Addr.getOperand(0);
   1040     SDValue N1 = Addr.getOperand(1);
   1041     // Offsets in vaddr must be positive.
   1042     if (CurDAG->SignBitIsZero(N0)) {
   1043       ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
   1044       if (isLegalMUBUFImmOffset(C1)) {
   1045         VAddr = N0;
   1046         ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
   1047         return true;
   1048       }
   1049     }
   1050   }
   1051 
   1052   // (node)
   1053   VAddr = Addr;
   1054   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
   1055   return true;
   1056 }
   1057 
   1058 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
   1059                                            SDValue &SOffset, SDValue &Offset,
   1060                                            SDValue &GLC, SDValue &SLC,
   1061                                            SDValue &TFE) const {
   1062   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
   1063   const SIInstrInfo *TII =
   1064     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
   1065 
   1066   SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
   1067               GLC, SLC, TFE);
   1068 
   1069   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
   1070       !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
   1071       !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
   1072     uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
   1073                     APInt::getAllOnesValue(32).getZExtValue(); // Size
   1074     SDLoc DL(Addr);
   1075 
   1076     const SITargetLowering& Lowering =
   1077       *static_cast<const SITargetLowering*>(getTargetLowering());
   1078 
   1079     SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
   1080     return true;
   1081   }
   1082   return false;
   1083 }
   1084 
   1085 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
   1086                                            SDValue &Soffset, SDValue &Offset,
   1087                                            SDValue &GLC) const {
   1088   SDValue SLC, TFE;
   1089 
   1090   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
   1091 }
   1092 
   1093 ///
   1094 /// \param EncodedOffset This is the immediate value that will be encoded
   1095 ///        directly into the instruction.  On SI/CI the \p EncodedOffset
   1096 ///        will be in units of dwords and on VI+ it will be units of bytes.
   1097 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST,
   1098                                  int64_t EncodedOffset) {
   1099   return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
   1100      isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset);
   1101 }
   1102 
   1103 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
   1104                                           SDValue &Offset, bool &Imm) const {
   1105 
   1106   // FIXME: Handle non-constant offsets.
   1107   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
   1108   if (!C)
   1109     return false;
   1110 
   1111   SDLoc SL(ByteOffsetNode);
   1112   AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
   1113   int64_t ByteOffset = C->getSExtValue();
   1114   int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
   1115       ByteOffset >> 2 : ByteOffset;
   1116 
   1117   if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) {
   1118     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
   1119     Imm = true;
   1120     return true;
   1121   }
   1122 
   1123   if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
   1124     return false;
   1125 
   1126   if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
   1127     // 32-bit Immediates are supported on Sea Islands.
   1128     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
   1129   } else {
   1130     SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
   1131     Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
   1132                                             C32Bit), 0);
   1133   }
   1134   Imm = false;
   1135   return true;
   1136 }
   1137 
   1138 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
   1139                                      SDValue &Offset, bool &Imm) const {
   1140 
   1141   SDLoc SL(Addr);
   1142   if (CurDAG->isBaseWithConstantOffset(Addr)) {
   1143     SDValue N0 = Addr.getOperand(0);
   1144     SDValue N1 = Addr.getOperand(1);
   1145 
   1146     if (SelectSMRDOffset(N1, Offset, Imm)) {
   1147       SBase = N0;
   1148       return true;
   1149     }
   1150   }
   1151   SBase = Addr;
   1152   Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
   1153   Imm = true;
   1154   return true;
   1155 }
   1156 
   1157 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
   1158                                        SDValue &Offset) const {
   1159   bool Imm;
   1160   return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
   1161 }
   1162 
   1163 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
   1164                                          SDValue &Offset) const {
   1165 
   1166   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
   1167     return false;
   1168 
   1169   bool Imm;
   1170   if (!SelectSMRD(Addr, SBase, Offset, Imm))
   1171     return false;
   1172 
   1173   return !Imm && isa<ConstantSDNode>(Offset);
   1174 }
   1175 
   1176 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
   1177                                         SDValue &Offset) const {
   1178   bool Imm;
   1179   return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
   1180          !isa<ConstantSDNode>(Offset);
   1181 }
   1182 
   1183 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
   1184                                              SDValue &Offset) const {
   1185   bool Imm;
   1186   return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
   1187 }
   1188 
   1189 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
   1190                                                SDValue &Offset) const {
   1191   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
   1192     return false;
   1193 
   1194   bool Imm;
   1195   if (!SelectSMRDOffset(Addr, Offset, Imm))
   1196     return false;
   1197 
   1198   return !Imm && isa<ConstantSDNode>(Offset);
   1199 }
   1200 
   1201 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr,
   1202                                               SDValue &Offset) const {
   1203   bool Imm;
   1204   return SelectSMRDOffset(Addr, Offset, Imm) && !Imm &&
   1205          !isa<ConstantSDNode>(Offset);
   1206 }
   1207 
   1208 // FIXME: This is incorrect and only enough to be able to compile.
   1209 SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
   1210   AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N);
   1211   SDLoc DL(N);
   1212 
   1213   const MachineFunction &MF = CurDAG->getMachineFunction();
   1214   DiagnosticInfoUnsupported NotImplemented(*MF.getFunction(),
   1215                                            "addrspacecast not implemented");
   1216   CurDAG->getContext()->diagnose(NotImplemented);
   1217 
   1218   assert(Subtarget->hasFlatAddressSpace() &&
   1219          "addrspacecast only supported with flat address space!");
   1220 
   1221   assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
   1222           ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) &&
   1223          "Can only cast to / from flat address space!");
   1224 
   1225   // The flat instructions read the address as the index of the VGPR holding the
   1226   // address, so casting should just be reinterpreting the base VGPR, so just
   1227   // insert trunc / bitcast / zext.
   1228 
   1229   SDValue Src = ASC->getOperand(0);
   1230   EVT DestVT = ASC->getValueType(0);
   1231   EVT SrcVT = Src.getValueType();
   1232 
   1233   unsigned SrcSize = SrcVT.getSizeInBits();
   1234   unsigned DestSize = DestVT.getSizeInBits();
   1235 
   1236   if (SrcSize > DestSize) {
   1237     assert(SrcSize == 64 && DestSize == 32);
   1238     return CurDAG->getMachineNode(
   1239       TargetOpcode::EXTRACT_SUBREG,
   1240       DL,
   1241       DestVT,
   1242       Src,
   1243       CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32));
   1244   }
   1245 
   1246   if (DestSize > SrcSize) {
   1247     assert(SrcSize == 32 && DestSize == 64);
   1248 
   1249     // FIXME: This is probably wrong, we should never be defining
   1250     // a register class with both VGPRs and SGPRs
   1251     SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, DL,
   1252                                            MVT::i32);
   1253 
   1254     const SDValue Ops[] = {
   1255       RC,
   1256       Src,
   1257       CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
   1258       SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
   1259                                      CurDAG->getConstant(0, DL, MVT::i32)), 0),
   1260       CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
   1261     };
   1262 
   1263     return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
   1264                                   DL, N->getValueType(0), Ops);
   1265   }
   1266 
   1267   assert(SrcSize == 64 && DestSize == 64);
   1268   return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode();
   1269 }
   1270 
   1271 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
   1272                                      uint32_t Offset, uint32_t Width) {
   1273   // Transformation function, pack the offset and width of a BFE into
   1274   // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
   1275   // source, bits [5:0] contain the offset and bits [22:16] the width.
   1276   uint32_t PackedVal = Offset | (Width << 16);
   1277   SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
   1278 
   1279   return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
   1280 }
   1281 
   1282 SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
   1283   // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
   1284   // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
   1285   // Predicate: 0 < b <= c < 32
   1286 
   1287   const SDValue &Shl = N->getOperand(0);
   1288   ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
   1289   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
   1290 
   1291   if (B && C) {
   1292     uint32_t BVal = B->getZExtValue();
   1293     uint32_t CVal = C->getZExtValue();
   1294 
   1295     if (0 < BVal && BVal <= CVal && CVal < 32) {
   1296       bool Signed = N->getOpcode() == ISD::SRA;
   1297       unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
   1298 
   1299       return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0),
   1300                       CVal - BVal, 32 - CVal);
   1301     }
   1302   }
   1303   return SelectCode(N);
   1304 }
   1305 
   1306 SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
   1307   switch (N->getOpcode()) {
   1308   case ISD::AND:
   1309     if (N->getOperand(0).getOpcode() == ISD::SRL) {
   1310       // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
   1311       // Predicate: isMask(mask)
   1312       const SDValue &Srl = N->getOperand(0);
   1313       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
   1314       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
   1315 
   1316       if (Shift && Mask) {
   1317         uint32_t ShiftVal = Shift->getZExtValue();
   1318         uint32_t MaskVal = Mask->getZExtValue();
   1319 
   1320         if (isMask_32(MaskVal)) {
   1321           uint32_t WidthVal = countPopulation(MaskVal);
   1322 
   1323           return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0),
   1324                           ShiftVal, WidthVal);
   1325         }
   1326       }
   1327     }
   1328     break;
   1329   case ISD::SRL:
   1330     if (N->getOperand(0).getOpcode() == ISD::AND) {
   1331       // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
   1332       // Predicate: isMask(mask >> b)
   1333       const SDValue &And = N->getOperand(0);
   1334       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
   1335       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
   1336 
   1337       if (Shift && Mask) {
   1338         uint32_t ShiftVal = Shift->getZExtValue();
   1339         uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
   1340 
   1341         if (isMask_32(MaskVal)) {
   1342           uint32_t WidthVal = countPopulation(MaskVal);
   1343 
   1344           return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0),
   1345                           ShiftVal, WidthVal);
   1346         }
   1347       }
   1348     } else if (N->getOperand(0).getOpcode() == ISD::SHL)
   1349       return SelectS_BFEFromShifts(N);
   1350     break;
   1351   case ISD::SRA:
   1352     if (N->getOperand(0).getOpcode() == ISD::SHL)
   1353       return SelectS_BFEFromShifts(N);
   1354     break;
   1355   }
   1356 
   1357   return SelectCode(N);
   1358 }
   1359 
   1360 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
   1361                                         SDValue &SrcMods) const {
   1362 
   1363   unsigned Mods = 0;
   1364 
   1365   Src = In;
   1366 
   1367   if (Src.getOpcode() == ISD::FNEG) {
   1368     Mods |= SISrcMods::NEG;
   1369     Src = Src.getOperand(0);
   1370   }
   1371 
   1372   if (Src.getOpcode() == ISD::FABS) {
   1373     Mods |= SISrcMods::ABS;
   1374     Src = Src.getOperand(0);
   1375   }
   1376 
   1377   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
   1378 
   1379   return true;
   1380 }
   1381 
   1382 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
   1383                                          SDValue &SrcMods) const {
   1384   bool Res = SelectVOP3Mods(In, Src, SrcMods);
   1385   return Res && cast<ConstantSDNode>(SrcMods)->isNullValue();
   1386 }
   1387 
   1388 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
   1389                                          SDValue &SrcMods, SDValue &Clamp,
   1390                                          SDValue &Omod) const {
   1391   SDLoc DL(In);
   1392   // FIXME: Handle Clamp and Omod
   1393   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
   1394   Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
   1395 
   1396   return SelectVOP3Mods(In, Src, SrcMods);
   1397 }
   1398 
   1399 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src,
   1400                                            SDValue &SrcMods, SDValue &Clamp,
   1401                                            SDValue &Omod) const {
   1402   bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod);
   1403 
   1404   return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() &&
   1405                 cast<ConstantSDNode>(Clamp)->isNullValue() &&
   1406                 cast<ConstantSDNode>(Omod)->isNullValue();
   1407 }
   1408 
   1409 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
   1410                                               SDValue &SrcMods,
   1411                                               SDValue &Omod) const {
   1412   // FIXME: Handle Omod
   1413   Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
   1414 
   1415   return SelectVOP3Mods(In, Src, SrcMods);
   1416 }
   1417 
   1418 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
   1419                                                    SDValue &SrcMods,
   1420                                                    SDValue &Clamp,
   1421                                                    SDValue &Omod) const {
   1422   Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
   1423   return SelectVOP3Mods(In, Src, SrcMods);
   1424 }
   1425 
   1426 void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
   1427   bool Modified = false;
   1428 
   1429   // XXX - Other targets seem to be able to do this without a worklist.
   1430   SmallVector<LoadSDNode *, 8> LoadsToReplace;
   1431   SmallVector<StoreSDNode *, 8> StoresToReplace;
   1432 
   1433   for (SDNode &Node : CurDAG->allnodes()) {
   1434     if (LoadSDNode *LD = dyn_cast<LoadSDNode>(&Node)) {
   1435       EVT VT = LD->getValueType(0);
   1436       if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD)
   1437         continue;
   1438 
   1439       // To simplify the TableGen patters, we replace all i64 loads with v2i32
   1440       // loads.  Alternatively, we could promote i64 loads to v2i32 during DAG
   1441       // legalization, however, so places (ExpandUnalignedLoad) in the DAG
   1442       // legalizer assume that if i64 is legal, so doing this promotion early
   1443       // can cause problems.
   1444       LoadsToReplace.push_back(LD);
   1445     } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(&Node)) {
   1446       // Handle i64 stores here for the same reason mentioned above for loads.
   1447       SDValue Value = ST->getValue();
   1448       if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore())
   1449         continue;
   1450       StoresToReplace.push_back(ST);
   1451     }
   1452   }
   1453 
   1454   for (LoadSDNode *LD : LoadsToReplace) {
   1455     SDLoc SL(LD);
   1456 
   1457     SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SL, LD->getChain(),
   1458                                       LD->getBasePtr(), LD->getMemOperand());
   1459     SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
   1460                                       MVT::i64, NewLoad);
   1461     CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1));
   1462     CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 0), BitCast);
   1463     Modified = true;
   1464   }
   1465 
   1466   for (StoreSDNode *ST : StoresToReplace) {
   1467     SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(ST),
   1468                                        MVT::v2i32, ST->getValue());
   1469     const SDValue StoreOps[] = {
   1470       ST->getChain(),
   1471       NewValue,
   1472       ST->getBasePtr(),
   1473       ST->getOffset()
   1474     };
   1475 
   1476     CurDAG->UpdateNodeOperands(ST, StoreOps);
   1477     Modified = true;
   1478   }
   1479 
   1480   // XXX - Is this necessary?
   1481   if (Modified)
   1482     CurDAG->RemoveDeadNodes();
   1483 }
   1484 
   1485 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
   1486   const AMDGPUTargetLowering& Lowering =
   1487     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
   1488   bool IsModified = false;
   1489   do {
   1490     IsModified = false;
   1491     // Go over all selected nodes and try to fold them a bit more
   1492     for (SDNode &Node : CurDAG->allnodes()) {
   1493       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
   1494       if (!MachineNode)
   1495         continue;
   1496 
   1497       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
   1498       if (ResNode != &Node) {
   1499         ReplaceUses(&Node, ResNode);
   1500         IsModified = true;
   1501       }
   1502     }
   1503     CurDAG->RemoveDeadNodes();
   1504   } while (IsModified);
   1505 }
   1506