Home | History | Annotate | Download | only in AMDGPU
      1 //===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// \brief R600 Implementation of TargetInstrInfo.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "R600InstrInfo.h"
     16 #include "AMDGPU.h"
     17 #include "AMDGPUSubtarget.h"
     18 #include "AMDGPUTargetMachine.h"
     19 #include "R600Defines.h"
     20 #include "R600MachineFunctionInfo.h"
     21 #include "R600RegisterInfo.h"
     22 #include "llvm/CodeGen/MachineFrameInfo.h"
     23 #include "llvm/CodeGen/MachineInstrBuilder.h"
     24 #include "llvm/CodeGen/MachineRegisterInfo.h"
     25 
     26 using namespace llvm;
     27 
     28 #define GET_INSTRINFO_CTOR_DTOR
     29 #include "AMDGPUGenDFAPacketizer.inc"
     30 
     31 R600InstrInfo::R600InstrInfo(const R600Subtarget &ST)
     32   : AMDGPUInstrInfo(ST), RI(), ST(ST) {}
     33 
     34 bool R600InstrInfo::isTrig(const MachineInstr &MI) const {
     35   return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
     36 }
     37 
     38 bool R600InstrInfo::isVector(const MachineInstr &MI) const {
     39   return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
     40 }
     41 
     42 void R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     43                                 MachineBasicBlock::iterator MI,
     44                                 const DebugLoc &DL, unsigned DestReg,
     45                                 unsigned SrcReg, bool KillSrc) const {
     46   unsigned VectorComponents = 0;
     47   if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) ||
     48       AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) &&
     49       (AMDGPU::R600_Reg128RegClass.contains(SrcReg) ||
     50        AMDGPU::R600_Reg128VerticalRegClass.contains(SrcReg))) {
     51     VectorComponents = 4;
     52   } else if((AMDGPU::R600_Reg64RegClass.contains(DestReg) ||
     53             AMDGPU::R600_Reg64VerticalRegClass.contains(DestReg)) &&
     54             (AMDGPU::R600_Reg64RegClass.contains(SrcReg) ||
     55              AMDGPU::R600_Reg64VerticalRegClass.contains(SrcReg))) {
     56     VectorComponents = 2;
     57   }
     58 
     59   if (VectorComponents > 0) {
     60     for (unsigned I = 0; I < VectorComponents; I++) {
     61       unsigned SubRegIndex = RI.getSubRegFromChannel(I);
     62       buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
     63                               RI.getSubReg(DestReg, SubRegIndex),
     64                               RI.getSubReg(SrcReg, SubRegIndex))
     65                               .addReg(DestReg,
     66                                       RegState::Define | RegState::Implicit);
     67     }
     68   } else {
     69     MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
     70                                                   DestReg, SrcReg);
     71     NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0))
     72                                     .setIsKill(KillSrc);
     73   }
     74 }
     75 
     76 /// \returns true if \p MBBI can be moved into a new basic.
     77 bool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
     78                                        MachineBasicBlock::iterator MBBI) const {
     79   for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(),
     80                                         E = MBBI->operands_end(); I != E; ++I) {
     81     if (I->isReg() && !TargetRegisterInfo::isVirtualRegister(I->getReg()) &&
     82         I->isUse() && RI.isPhysRegLiveAcrossClauses(I->getReg()))
     83       return false;
     84   }
     85   return true;
     86 }
     87 
     88 bool R600InstrInfo::isMov(unsigned Opcode) const {
     89   switch(Opcode) {
     90   default:
     91     return false;
     92   case AMDGPU::MOV:
     93   case AMDGPU::MOV_IMM_F32:
     94   case AMDGPU::MOV_IMM_I32:
     95     return true;
     96   }
     97 }
     98 
     99 // Some instructions act as place holders to emulate operations that the GPU
    100 // hardware does automatically. This function can be used to check if
    101 // an opcode falls into this category.
    102 bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const {
    103   switch (Opcode) {
    104   default: return false;
    105   case AMDGPU::RETURN:
    106     return true;
    107   }
    108 }
    109 
    110 bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
    111   return false;
    112 }
    113 
    114 bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
    115   switch(Opcode) {
    116     default: return false;
    117     case AMDGPU::CUBE_r600_pseudo:
    118     case AMDGPU::CUBE_r600_real:
    119     case AMDGPU::CUBE_eg_pseudo:
    120     case AMDGPU::CUBE_eg_real:
    121       return true;
    122   }
    123 }
    124 
    125 bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
    126   unsigned TargetFlags = get(Opcode).TSFlags;
    127 
    128   return (TargetFlags & R600_InstFlag::ALU_INST);
    129 }
    130 
    131 bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const {
    132   unsigned TargetFlags = get(Opcode).TSFlags;
    133 
    134   return ((TargetFlags & R600_InstFlag::OP1) |
    135           (TargetFlags & R600_InstFlag::OP2) |
    136           (TargetFlags & R600_InstFlag::OP3));
    137 }
    138 
    139 bool R600InstrInfo::isLDSInstr(unsigned Opcode) const {
    140   unsigned TargetFlags = get(Opcode).TSFlags;
    141 
    142   return ((TargetFlags & R600_InstFlag::LDS_1A) |
    143           (TargetFlags & R600_InstFlag::LDS_1A1D) |
    144           (TargetFlags & R600_InstFlag::LDS_1A2D));
    145 }
    146 
    147 bool R600InstrInfo::isLDSNoRetInstr(unsigned Opcode) const {
    148   return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) == -1;
    149 }
    150 
    151 bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const {
    152   return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1;
    153 }
    154 
    155 bool R600InstrInfo::canBeConsideredALU(const MachineInstr &MI) const {
    156   if (isALUInstr(MI.getOpcode()))
    157     return true;
    158   if (isVector(MI) || isCubeOp(MI.getOpcode()))
    159     return true;
    160   switch (MI.getOpcode()) {
    161   case AMDGPU::PRED_X:
    162   case AMDGPU::INTERP_PAIR_XY:
    163   case AMDGPU::INTERP_PAIR_ZW:
    164   case AMDGPU::INTERP_VEC_LOAD:
    165   case AMDGPU::COPY:
    166   case AMDGPU::DOT_4:
    167     return true;
    168   default:
    169     return false;
    170   }
    171 }
    172 
    173 bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
    174   if (ST.hasCaymanISA())
    175     return false;
    176   return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU);
    177 }
    178 
    179 bool R600InstrInfo::isTransOnly(const MachineInstr &MI) const {
    180   return isTransOnly(MI.getOpcode());
    181 }
    182 
    183 bool R600InstrInfo::isVectorOnly(unsigned Opcode) const {
    184   return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU);
    185 }
    186 
    187 bool R600InstrInfo::isVectorOnly(const MachineInstr &MI) const {
    188   return isVectorOnly(MI.getOpcode());
    189 }
    190 
    191 bool R600InstrInfo::isExport(unsigned Opcode) const {
    192   return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT);
    193 }
    194 
    195 bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
    196   return ST.hasVertexCache() && IS_VTX(get(Opcode));
    197 }
    198 
    199 bool R600InstrInfo::usesVertexCache(const MachineInstr &MI) const {
    200   const MachineFunction *MF = MI.getParent()->getParent();
    201   return !AMDGPU::isCompute(MF->getFunction()->getCallingConv()) &&
    202          usesVertexCache(MI.getOpcode());
    203 }
    204 
    205 bool R600InstrInfo::usesTextureCache(unsigned Opcode) const {
    206   return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode));
    207 }
    208 
    209 bool R600InstrInfo::usesTextureCache(const MachineInstr &MI) const {
    210   const MachineFunction *MF = MI.getParent()->getParent();
    211   return (AMDGPU::isCompute(MF->getFunction()->getCallingConv()) &&
    212           usesVertexCache(MI.getOpcode())) ||
    213          usesTextureCache(MI.getOpcode());
    214 }
    215 
    216 bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
    217   switch (Opcode) {
    218   case AMDGPU::KILLGT:
    219   case AMDGPU::GROUP_BARRIER:
    220     return true;
    221   default:
    222     return false;
    223   }
    224 }
    225 
    226 bool R600InstrInfo::usesAddressRegister(MachineInstr &MI) const {
    227   return MI.findRegisterUseOperandIdx(AMDGPU::AR_X) != -1;
    228 }
    229 
    230 bool R600InstrInfo::definesAddressRegister(MachineInstr &MI) const {
    231   return MI.findRegisterDefOperandIdx(AMDGPU::AR_X) != -1;
    232 }
    233 
    234 bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const {
    235   if (!isALUInstr(MI.getOpcode())) {
    236     return false;
    237   }
    238   for (MachineInstr::const_mop_iterator I = MI.operands_begin(),
    239                                         E = MI.operands_end();
    240        I != E; ++I) {
    241     if (!I->isReg() || !I->isUse() ||
    242         TargetRegisterInfo::isVirtualRegister(I->getReg()))
    243       continue;
    244 
    245     if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
    246       return true;
    247   }
    248   return false;
    249 }
    250 
    251 int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const {
    252   static const unsigned OpTable[] = {
    253     AMDGPU::OpName::src0,
    254     AMDGPU::OpName::src1,
    255     AMDGPU::OpName::src2
    256   };
    257 
    258   assert (SrcNum < 3);
    259   return getOperandIdx(Opcode, OpTable[SrcNum]);
    260 }
    261 
    262 int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
    263   static const unsigned SrcSelTable[][2] = {
    264     {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
    265     {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
    266     {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
    267     {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
    268     {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
    269     {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
    270     {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
    271     {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
    272     {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
    273     {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
    274     {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
    275   };
    276 
    277   for (const auto &Row : SrcSelTable) {
    278     if (getOperandIdx(Opcode, Row[0]) == (int)SrcIdx) {
    279       return getOperandIdx(Opcode, Row[1]);
    280     }
    281   }
    282   return -1;
    283 }
    284 
    285 SmallVector<std::pair<MachineOperand *, int64_t>, 3>
    286 R600InstrInfo::getSrcs(MachineInstr &MI) const {
    287   SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
    288 
    289   if (MI.getOpcode() == AMDGPU::DOT_4) {
    290     static const unsigned OpTable[8][2] = {
    291       {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
    292       {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
    293       {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
    294       {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
    295       {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
    296       {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
    297       {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
    298       {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W},
    299     };
    300 
    301     for (unsigned j = 0; j < 8; j++) {
    302       MachineOperand &MO =
    303           MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][0]));
    304       unsigned Reg = MO.getReg();
    305       if (Reg == AMDGPU::ALU_CONST) {
    306         MachineOperand &Sel =
    307             MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1]));
    308         Result.push_back(std::make_pair(&MO, Sel.getImm()));
    309         continue;
    310       }
    311 
    312     }
    313     return Result;
    314   }
    315 
    316   static const unsigned OpTable[3][2] = {
    317     {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
    318     {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
    319     {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
    320   };
    321 
    322   for (unsigned j = 0; j < 3; j++) {
    323     int SrcIdx = getOperandIdx(MI.getOpcode(), OpTable[j][0]);
    324     if (SrcIdx < 0)
    325       break;
    326     MachineOperand &MO = MI.getOperand(SrcIdx);
    327     unsigned Reg = MO.getReg();
    328     if (Reg == AMDGPU::ALU_CONST) {
    329       MachineOperand &Sel =
    330           MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1]));
    331       Result.push_back(std::make_pair(&MO, Sel.getImm()));
    332       continue;
    333     }
    334     if (Reg == AMDGPU::ALU_LITERAL_X) {
    335       MachineOperand &Operand =
    336           MI.getOperand(getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal));
    337       if (Operand.isImm()) {
    338         Result.push_back(std::make_pair(&MO, Operand.getImm()));
    339         continue;
    340       }
    341       assert(Operand.isGlobal());
    342     }
    343     Result.push_back(std::make_pair(&MO, 0));
    344   }
    345   return Result;
    346 }
    347 
    348 std::vector<std::pair<int, unsigned>>
    349 R600InstrInfo::ExtractSrcs(MachineInstr &MI,
    350                            const DenseMap<unsigned, unsigned> &PV,
    351                            unsigned &ConstCount) const {
    352   ConstCount = 0;
    353   ArrayRef<std::pair<MachineOperand *, int64_t>> Srcs = getSrcs(MI);
    354   const std::pair<int, unsigned> DummyPair(-1, 0);
    355   std::vector<std::pair<int, unsigned> > Result;
    356   unsigned i = 0;
    357   for (unsigned n = Srcs.size(); i < n; ++i) {
    358     unsigned Reg = Srcs[i].first->getReg();
    359     int Index = RI.getEncodingValue(Reg) & 0xff;
    360     if (Reg == AMDGPU::OQAP) {
    361       Result.push_back(std::make_pair(Index, 0U));
    362     }
    363     if (PV.find(Reg) != PV.end()) {
    364       // 255 is used to tells its a PS/PV reg
    365       Result.push_back(std::make_pair(255, 0U));
    366       continue;
    367     }
    368     if (Index > 127) {
    369       ConstCount++;
    370       Result.push_back(DummyPair);
    371       continue;
    372     }
    373     unsigned Chan = RI.getHWRegChan(Reg);
    374     Result.push_back(std::make_pair(Index, Chan));
    375   }
    376   for (; i < 3; ++i)
    377     Result.push_back(DummyPair);
    378   return Result;
    379 }
    380 
    381 static std::vector<std::pair<int, unsigned> >
    382 Swizzle(std::vector<std::pair<int, unsigned> > Src,
    383         R600InstrInfo::BankSwizzle Swz) {
    384   if (Src[0] == Src[1])
    385     Src[1].first = -1;
    386   switch (Swz) {
    387   case R600InstrInfo::ALU_VEC_012_SCL_210:
    388     break;
    389   case R600InstrInfo::ALU_VEC_021_SCL_122:
    390     std::swap(Src[1], Src[2]);
    391     break;
    392   case R600InstrInfo::ALU_VEC_102_SCL_221:
    393     std::swap(Src[0], Src[1]);
    394     break;
    395   case R600InstrInfo::ALU_VEC_120_SCL_212:
    396     std::swap(Src[0], Src[1]);
    397     std::swap(Src[0], Src[2]);
    398     break;
    399   case R600InstrInfo::ALU_VEC_201:
    400     std::swap(Src[0], Src[2]);
    401     std::swap(Src[0], Src[1]);
    402     break;
    403   case R600InstrInfo::ALU_VEC_210:
    404     std::swap(Src[0], Src[2]);
    405     break;
    406   }
    407   return Src;
    408 }
    409 
    410 static unsigned
    411 getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
    412   switch (Swz) {
    413   case R600InstrInfo::ALU_VEC_012_SCL_210: {
    414     unsigned Cycles[3] = { 2, 1, 0};
    415     return Cycles[Op];
    416   }
    417   case R600InstrInfo::ALU_VEC_021_SCL_122: {
    418     unsigned Cycles[3] = { 1, 2, 2};
    419     return Cycles[Op];
    420   }
    421   case R600InstrInfo::ALU_VEC_120_SCL_212: {
    422     unsigned Cycles[3] = { 2, 1, 2};
    423     return Cycles[Op];
    424   }
    425   case R600InstrInfo::ALU_VEC_102_SCL_221: {
    426     unsigned Cycles[3] = { 2, 2, 1};
    427     return Cycles[Op];
    428   }
    429   default:
    430     llvm_unreachable("Wrong Swizzle for Trans Slot");
    431     return 0;
    432   }
    433 }
    434 
    435 /// returns how many MIs (whose inputs are represented by IGSrcs) can be packed
    436 /// in the same Instruction Group while meeting read port limitations given a
    437 /// Swz swizzle sequence.
    438 unsigned  R600InstrInfo::isLegalUpTo(
    439     const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
    440     const std::vector<R600InstrInfo::BankSwizzle> &Swz,
    441     const std::vector<std::pair<int, unsigned> > &TransSrcs,
    442     R600InstrInfo::BankSwizzle TransSwz) const {
    443   int Vector[4][3];
    444   memset(Vector, -1, sizeof(Vector));
    445   for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
    446     const std::vector<std::pair<int, unsigned> > &Srcs =
    447         Swizzle(IGSrcs[i], Swz[i]);
    448     for (unsigned j = 0; j < 3; j++) {
    449       const std::pair<int, unsigned> &Src = Srcs[j];
    450       if (Src.first < 0 || Src.first == 255)
    451         continue;
    452       if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) {
    453         if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&
    454             Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) {
    455             // The value from output queue A (denoted by register OQAP) can
    456             // only be fetched during the first cycle.
    457             return false;
    458         }
    459         // OQAP does not count towards the normal read port restrictions
    460         continue;
    461       }
    462       if (Vector[Src.second][j] < 0)
    463         Vector[Src.second][j] = Src.first;
    464       if (Vector[Src.second][j] != Src.first)
    465         return i;
    466     }
    467   }
    468   // Now check Trans Alu
    469   for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) {
    470     const std::pair<int, unsigned> &Src = TransSrcs[i];
    471     unsigned Cycle = getTransSwizzle(TransSwz, i);
    472     if (Src.first < 0)
    473       continue;
    474     if (Src.first == 255)
    475       continue;
    476     if (Vector[Src.second][Cycle] < 0)
    477       Vector[Src.second][Cycle] = Src.first;
    478     if (Vector[Src.second][Cycle] != Src.first)
    479       return IGSrcs.size() - 1;
    480   }
    481   return IGSrcs.size();
    482 }
    483 
    484 /// Given a swizzle sequence SwzCandidate and an index Idx, returns the next
    485 /// (in lexicographic term) swizzle sequence assuming that all swizzles after
    486 /// Idx can be skipped
    487 static bool
    488 NextPossibleSolution(
    489     std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
    490     unsigned Idx) {
    491   assert(Idx < SwzCandidate.size());
    492   int ResetIdx = Idx;
    493   while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210)
    494     ResetIdx --;
    495   for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) {
    496     SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;
    497   }
    498   if (ResetIdx == -1)
    499     return false;
    500   int NextSwizzle = SwzCandidate[ResetIdx] + 1;
    501   SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle;
    502   return true;
    503 }
    504 
    505 /// Enumerate all possible Swizzle sequence to find one that can meet all
    506 /// read port requirements.
    507 bool R600InstrInfo::FindSwizzleForVectorSlot(
    508     const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
    509     std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
    510     const std::vector<std::pair<int, unsigned> > &TransSrcs,
    511     R600InstrInfo::BankSwizzle TransSwz) const {
    512   unsigned ValidUpTo = 0;
    513   do {
    514     ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);
    515     if (ValidUpTo == IGSrcs.size())
    516       return true;
    517   } while (NextPossibleSolution(SwzCandidate, ValidUpTo));
    518   return false;
    519 }
    520 
    521 /// Instructions in Trans slot can't read gpr at cycle 0 if they also read
    522 /// a const, and can't read a gpr at cycle 1 if they read 2 const.
    523 static bool
    524 isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
    525                   const std::vector<std::pair<int, unsigned> > &TransOps,
    526                   unsigned ConstCount) {
    527   // TransALU can't read 3 constants
    528   if (ConstCount > 2)
    529     return false;
    530   for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
    531     const std::pair<int, unsigned> &Src = TransOps[i];
    532     unsigned Cycle = getTransSwizzle(TransSwz, i);
    533     if (Src.first < 0)
    534       continue;
    535     if (ConstCount > 0 && Cycle == 0)
    536       return false;
    537     if (ConstCount > 1 && Cycle == 1)
    538       return false;
    539   }
    540   return true;
    541 }
    542 
    543 bool
    544 R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
    545                                        const DenseMap<unsigned, unsigned> &PV,
    546                                        std::vector<BankSwizzle> &ValidSwizzle,
    547                                        bool isLastAluTrans)
    548     const {
    549   //Todo : support shared src0 - src1 operand
    550 
    551   std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
    552   ValidSwizzle.clear();
    553   unsigned ConstCount;
    554   BankSwizzle TransBS = ALU_VEC_012_SCL_210;
    555   for (unsigned i = 0, e = IG.size(); i < e; ++i) {
    556     IGSrcs.push_back(ExtractSrcs(*IG[i], PV, ConstCount));
    557     unsigned Op = getOperandIdx(IG[i]->getOpcode(),
    558         AMDGPU::OpName::bank_swizzle);
    559     ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
    560         IG[i]->getOperand(Op).getImm());
    561   }
    562   std::vector<std::pair<int, unsigned> > TransOps;
    563   if (!isLastAluTrans)
    564     return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
    565 
    566   TransOps = std::move(IGSrcs.back());
    567   IGSrcs.pop_back();
    568   ValidSwizzle.pop_back();
    569 
    570   static const R600InstrInfo::BankSwizzle TransSwz[] = {
    571     ALU_VEC_012_SCL_210,
    572     ALU_VEC_021_SCL_122,
    573     ALU_VEC_120_SCL_212,
    574     ALU_VEC_102_SCL_221
    575   };
    576   for (unsigned i = 0; i < 4; i++) {
    577     TransBS = TransSwz[i];
    578     if (!isConstCompatible(TransBS, TransOps, ConstCount))
    579       continue;
    580     bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps,
    581         TransBS);
    582     if (Result) {
    583       ValidSwizzle.push_back(TransBS);
    584       return true;
    585     }
    586   }
    587 
    588   return false;
    589 }
    590 
    591 
    592 bool
    593 R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
    594     const {
    595   assert (Consts.size() <= 12 && "Too many operands in instructions group");
    596   unsigned Pair1 = 0, Pair2 = 0;
    597   for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
    598     unsigned ReadConstHalf = Consts[i] & 2;
    599     unsigned ReadConstIndex = Consts[i] & (~3);
    600     unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
    601     if (!Pair1) {
    602       Pair1 = ReadHalfConst;
    603       continue;
    604     }
    605     if (Pair1 == ReadHalfConst)
    606       continue;
    607     if (!Pair2) {
    608       Pair2 = ReadHalfConst;
    609       continue;
    610     }
    611     if (Pair2 != ReadHalfConst)
    612       return false;
    613   }
    614   return true;
    615 }
    616 
    617 bool
    618 R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
    619     const {
    620   std::vector<unsigned> Consts;
    621   SmallSet<int64_t, 4> Literals;
    622   for (unsigned i = 0, n = MIs.size(); i < n; i++) {
    623     MachineInstr &MI = *MIs[i];
    624     if (!isALUInstr(MI.getOpcode()))
    625       continue;
    626 
    627     ArrayRef<std::pair<MachineOperand *, int64_t>> Srcs = getSrcs(MI);
    628 
    629     for (const auto &Src:Srcs) {
    630       if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X)
    631         Literals.insert(Src.second);
    632       if (Literals.size() > 4)
    633         return false;
    634       if (Src.first->getReg() == AMDGPU::ALU_CONST)
    635         Consts.push_back(Src.second);
    636       if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) ||
    637           AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) {
    638         unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff;
    639         unsigned Chan = RI.getHWRegChan(Src.first->getReg());
    640         Consts.push_back((Index << 2) | Chan);
    641       }
    642     }
    643   }
    644   return fitsConstReadLimitations(Consts);
    645 }
    646 
    647 DFAPacketizer *
    648 R600InstrInfo::CreateTargetScheduleState(const TargetSubtargetInfo &STI) const {
    649   const InstrItineraryData *II = STI.getInstrItineraryData();
    650   return static_cast<const R600Subtarget &>(STI).createDFAPacketizer(II);
    651 }
    652 
    653 static bool
    654 isPredicateSetter(unsigned Opcode) {
    655   switch (Opcode) {
    656   case AMDGPU::PRED_X:
    657     return true;
    658   default:
    659     return false;
    660   }
    661 }
    662 
    663 static MachineInstr *
    664 findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
    665                              MachineBasicBlock::iterator I) {
    666   while (I != MBB.begin()) {
    667     --I;
    668     MachineInstr &MI = *I;
    669     if (isPredicateSetter(MI.getOpcode()))
    670       return &MI;
    671   }
    672 
    673   return nullptr;
    674 }
    675 
    676 static
    677 bool isJump(unsigned Opcode) {
    678   return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
    679 }
    680 
    681 static bool isBranch(unsigned Opcode) {
    682   return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 ||
    683       Opcode == AMDGPU::BRANCH_COND_f32;
    684 }
    685 
    686 bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
    687                                   MachineBasicBlock *&TBB,
    688                                   MachineBasicBlock *&FBB,
    689                                   SmallVectorImpl<MachineOperand> &Cond,
    690                                   bool AllowModify) const {
    691   // Most of the following comes from the ARM implementation of AnalyzeBranch
    692 
    693   // If the block has no terminators, it just falls into the block after it.
    694   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
    695   if (I == MBB.end())
    696     return false;
    697 
    698   // AMDGPU::BRANCH* instructions are only available after isel and are not
    699   // handled
    700   if (isBranch(I->getOpcode()))
    701     return true;
    702   if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) {
    703     return false;
    704   }
    705 
    706   // Remove successive JUMP
    707   while (I != MBB.begin() && std::prev(I)->getOpcode() == AMDGPU::JUMP) {
    708       MachineBasicBlock::iterator PriorI = std::prev(I);
    709       if (AllowModify)
    710         I->removeFromParent();
    711       I = PriorI;
    712   }
    713   MachineInstr &LastInst = *I;
    714 
    715   // If there is only one terminator instruction, process it.
    716   unsigned LastOpc = LastInst.getOpcode();
    717   if (I == MBB.begin() ||
    718           !isJump(static_cast<MachineInstr *>(--I)->getOpcode())) {
    719     if (LastOpc == AMDGPU::JUMP) {
    720       TBB = LastInst.getOperand(0).getMBB();
    721       return false;
    722     } else if (LastOpc == AMDGPU::JUMP_COND) {
    723       auto predSet = I;
    724       while (!isPredicateSetter(predSet->getOpcode())) {
    725         predSet = --I;
    726       }
    727       TBB = LastInst.getOperand(0).getMBB();
    728       Cond.push_back(predSet->getOperand(1));
    729       Cond.push_back(predSet->getOperand(2));
    730       Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
    731       return false;
    732     }
    733     return true;  // Can't handle indirect branch.
    734   }
    735 
    736   // Get the instruction before it if it is a terminator.
    737   MachineInstr &SecondLastInst = *I;
    738   unsigned SecondLastOpc = SecondLastInst.getOpcode();
    739 
    740   // If the block ends with a B and a Bcc, handle it.
    741   if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) {
    742     auto predSet = --I;
    743     while (!isPredicateSetter(predSet->getOpcode())) {
    744       predSet = --I;
    745     }
    746     TBB = SecondLastInst.getOperand(0).getMBB();
    747     FBB = LastInst.getOperand(0).getMBB();
    748     Cond.push_back(predSet->getOperand(1));
    749     Cond.push_back(predSet->getOperand(2));
    750     Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
    751     return false;
    752   }
    753 
    754   // Otherwise, can't handle this.
    755   return true;
    756 }
    757 
    758 static
    759 MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) {
    760   for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
    761       It != E; ++It) {
    762     if (It->getOpcode() == AMDGPU::CF_ALU ||
    763         It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
    764       return std::prev(It.base());
    765   }
    766   return MBB.end();
    767 }
    768 
    769 unsigned R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
    770                                      MachineBasicBlock *TBB,
    771                                      MachineBasicBlock *FBB,
    772                                      ArrayRef<MachineOperand> Cond,
    773                                      const DebugLoc &DL) const {
    774   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
    775 
    776   if (!FBB) {
    777     if (Cond.empty()) {
    778       BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB);
    779       return 1;
    780     } else {
    781       MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
    782       assert(PredSet && "No previous predicate !");
    783       addFlag(*PredSet, 0, MO_FLAG_PUSH);
    784       PredSet->getOperand(2).setImm(Cond[1].getImm());
    785 
    786       BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
    787              .addMBB(TBB)
    788              .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
    789       MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
    790       if (CfAlu == MBB.end())
    791         return 1;
    792       assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
    793       CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
    794       return 1;
    795     }
    796   } else {
    797     MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
    798     assert(PredSet && "No previous predicate !");
    799     addFlag(*PredSet, 0, MO_FLAG_PUSH);
    800     PredSet->getOperand(2).setImm(Cond[1].getImm());
    801     BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
    802             .addMBB(TBB)
    803             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
    804     BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
    805     MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
    806     if (CfAlu == MBB.end())
    807       return 2;
    808     assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
    809     CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
    810     return 2;
    811   }
    812 }
    813 
    814 unsigned
    815 R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
    816 
    817   // Note : we leave PRED* instructions there.
    818   // They may be needed when predicating instructions.
    819 
    820   MachineBasicBlock::iterator I = MBB.end();
    821 
    822   if (I == MBB.begin()) {
    823     return 0;
    824   }
    825   --I;
    826   switch (I->getOpcode()) {
    827   default:
    828     return 0;
    829   case AMDGPU::JUMP_COND: {
    830     MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
    831     clearFlag(*predSet, 0, MO_FLAG_PUSH);
    832     I->eraseFromParent();
    833     MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
    834     if (CfAlu == MBB.end())
    835       break;
    836     assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
    837     CfAlu->setDesc(get(AMDGPU::CF_ALU));
    838     break;
    839   }
    840   case AMDGPU::JUMP:
    841     I->eraseFromParent();
    842     break;
    843   }
    844   I = MBB.end();
    845 
    846   if (I == MBB.begin()) {
    847     return 1;
    848   }
    849   --I;
    850   switch (I->getOpcode()) {
    851     // FIXME: only one case??
    852   default:
    853     return 1;
    854   case AMDGPU::JUMP_COND: {
    855     MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
    856     clearFlag(*predSet, 0, MO_FLAG_PUSH);
    857     I->eraseFromParent();
    858     MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
    859     if (CfAlu == MBB.end())
    860       break;
    861     assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
    862     CfAlu->setDesc(get(AMDGPU::CF_ALU));
    863     break;
    864   }
    865   case AMDGPU::JUMP:
    866     I->eraseFromParent();
    867     break;
    868   }
    869   return 2;
    870 }
    871 
    872 bool R600InstrInfo::isPredicated(const MachineInstr &MI) const {
    873   int idx = MI.findFirstPredOperandIdx();
    874   if (idx < 0)
    875     return false;
    876 
    877   unsigned Reg = MI.getOperand(idx).getReg();
    878   switch (Reg) {
    879   default: return false;
    880   case AMDGPU::PRED_SEL_ONE:
    881   case AMDGPU::PRED_SEL_ZERO:
    882   case AMDGPU::PREDICATE_BIT:
    883     return true;
    884   }
    885 }
    886 
    887 bool R600InstrInfo::isPredicable(MachineInstr &MI) const {
    888   // XXX: KILL* instructions can be predicated, but they must be the last
    889   // instruction in a clause, so this means any instructions after them cannot
    890   // be predicated.  Until we have proper support for instruction clauses in the
    891   // backend, we will mark KILL* instructions as unpredicable.
    892 
    893   if (MI.getOpcode() == AMDGPU::KILLGT) {
    894     return false;
    895   } else if (MI.getOpcode() == AMDGPU::CF_ALU) {
    896     // If the clause start in the middle of MBB then the MBB has more
    897     // than a single clause, unable to predicate several clauses.
    898     if (MI.getParent()->begin() != MachineBasicBlock::iterator(MI))
    899       return false;
    900     // TODO: We don't support KC merging atm
    901     return MI.getOperand(3).getImm() == 0 && MI.getOperand(4).getImm() == 0;
    902   } else if (isVector(MI)) {
    903     return false;
    904   } else {
    905     return AMDGPUInstrInfo::isPredicable(MI);
    906   }
    907 }
    908 
    909 
    910 bool
    911 R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
    912                                    unsigned NumCyles,
    913                                    unsigned ExtraPredCycles,
    914                                    BranchProbability Probability) const{
    915   return true;
    916 }
    917 
    918 bool
    919 R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
    920                                    unsigned NumTCycles,
    921                                    unsigned ExtraTCycles,
    922                                    MachineBasicBlock &FMBB,
    923                                    unsigned NumFCycles,
    924                                    unsigned ExtraFCycles,
    925                                    BranchProbability Probability) const {
    926   return true;
    927 }
    928 
    929 bool
    930 R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
    931                                          unsigned NumCyles,
    932                                          BranchProbability Probability)
    933                                          const {
    934   return true;
    935 }
    936 
    937 bool
    938 R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
    939                                          MachineBasicBlock &FMBB) const {
    940   return false;
    941 }
    942 
    943 
    944 bool
    945 R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
    946   MachineOperand &MO = Cond[1];
    947   switch (MO.getImm()) {
    948   case OPCODE_IS_ZERO_INT:
    949     MO.setImm(OPCODE_IS_NOT_ZERO_INT);
    950     break;
    951   case OPCODE_IS_NOT_ZERO_INT:
    952     MO.setImm(OPCODE_IS_ZERO_INT);
    953     break;
    954   case OPCODE_IS_ZERO:
    955     MO.setImm(OPCODE_IS_NOT_ZERO);
    956     break;
    957   case OPCODE_IS_NOT_ZERO:
    958     MO.setImm(OPCODE_IS_ZERO);
    959     break;
    960   default:
    961     return true;
    962   }
    963 
    964   MachineOperand &MO2 = Cond[2];
    965   switch (MO2.getReg()) {
    966   case AMDGPU::PRED_SEL_ZERO:
    967     MO2.setReg(AMDGPU::PRED_SEL_ONE);
    968     break;
    969   case AMDGPU::PRED_SEL_ONE:
    970     MO2.setReg(AMDGPU::PRED_SEL_ZERO);
    971     break;
    972   default:
    973     return true;
    974   }
    975   return false;
    976 }
    977 
    978 bool R600InstrInfo::DefinesPredicate(MachineInstr &MI,
    979                                      std::vector<MachineOperand> &Pred) const {
    980   return isPredicateSetter(MI.getOpcode());
    981 }
    982 
    983 
    984 bool
    985 R600InstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
    986                                  ArrayRef<MachineOperand> Pred2) const {
    987   return false;
    988 }
    989 
    990 bool R600InstrInfo::PredicateInstruction(MachineInstr &MI,
    991                                          ArrayRef<MachineOperand> Pred) const {
    992   int PIdx = MI.findFirstPredOperandIdx();
    993 
    994   if (MI.getOpcode() == AMDGPU::CF_ALU) {
    995     MI.getOperand(8).setImm(0);
    996     return true;
    997   }
    998 
    999   if (MI.getOpcode() == AMDGPU::DOT_4) {
   1000     MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_X))
   1001         .setReg(Pred[2].getReg());
   1002     MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Y))
   1003         .setReg(Pred[2].getReg());
   1004     MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Z))
   1005         .setReg(Pred[2].getReg());
   1006     MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_W))
   1007         .setReg(Pred[2].getReg());
   1008     MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
   1009     MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
   1010     return true;
   1011   }
   1012 
   1013   if (PIdx != -1) {
   1014     MachineOperand &PMO = MI.getOperand(PIdx);
   1015     PMO.setReg(Pred[2].getReg());
   1016     MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
   1017     MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
   1018     return true;
   1019   }
   1020 
   1021   return false;
   1022 }
   1023 
   1024 unsigned int R600InstrInfo::getPredicationCost(const MachineInstr &) const {
   1025   return 2;
   1026 }
   1027 
   1028 unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
   1029                                             const MachineInstr &,
   1030                                             unsigned *PredCost) const {
   1031   if (PredCost)
   1032     *PredCost = 2;
   1033   return 2;
   1034 }
   1035 
   1036 unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
   1037                                                    unsigned Channel) const {
   1038   assert(Channel == 0);
   1039   return RegIndex;
   1040 }
   1041 
   1042 bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   1043   switch (MI.getOpcode()) {
   1044   default: {
   1045     MachineBasicBlock *MBB = MI.getParent();
   1046     int OffsetOpIdx =
   1047         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::addr);
   1048     // addr is a custom operand with multiple MI operands, and only the
   1049     // first MI operand is given a name.
   1050     int RegOpIdx = OffsetOpIdx + 1;
   1051     int ChanOpIdx =
   1052         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::chan);
   1053     if (isRegisterLoad(MI)) {
   1054       int DstOpIdx =
   1055           AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
   1056       unsigned RegIndex = MI.getOperand(RegOpIdx).getImm();
   1057       unsigned Channel = MI.getOperand(ChanOpIdx).getImm();
   1058       unsigned Address = calculateIndirectAddress(RegIndex, Channel);
   1059       unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg();
   1060       if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
   1061         buildMovInstr(MBB, MI, MI.getOperand(DstOpIdx).getReg(),
   1062                       getIndirectAddrRegClass()->getRegister(Address));
   1063       } else {
   1064         buildIndirectRead(MBB, MI, MI.getOperand(DstOpIdx).getReg(), Address,
   1065                           OffsetReg);
   1066       }
   1067     } else if (isRegisterStore(MI)) {
   1068       int ValOpIdx =
   1069           AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::val);
   1070       unsigned RegIndex = MI.getOperand(RegOpIdx).getImm();
   1071       unsigned Channel = MI.getOperand(ChanOpIdx).getImm();
   1072       unsigned Address = calculateIndirectAddress(RegIndex, Channel);
   1073       unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg();
   1074       if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
   1075         buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address),
   1076                       MI.getOperand(ValOpIdx).getReg());
   1077       } else {
   1078         buildIndirectWrite(MBB, MI, MI.getOperand(ValOpIdx).getReg(),
   1079                            calculateIndirectAddress(RegIndex, Channel),
   1080                            OffsetReg);
   1081       }
   1082     } else {
   1083       return false;
   1084     }
   1085 
   1086     MBB->erase(MI);
   1087     return true;
   1088   }
   1089   case AMDGPU::R600_EXTRACT_ELT_V2:
   1090   case AMDGPU::R600_EXTRACT_ELT_V4:
   1091     buildIndirectRead(MI.getParent(), MI, MI.getOperand(0).getReg(),
   1092                       RI.getHWRegIndex(MI.getOperand(1).getReg()), //  Address
   1093                       MI.getOperand(2).getReg(),
   1094                       RI.getHWRegChan(MI.getOperand(1).getReg()));
   1095     break;
   1096   case AMDGPU::R600_INSERT_ELT_V2:
   1097   case AMDGPU::R600_INSERT_ELT_V4:
   1098     buildIndirectWrite(MI.getParent(), MI, MI.getOperand(2).getReg(), // Value
   1099                        RI.getHWRegIndex(MI.getOperand(1).getReg()),   // Address
   1100                        MI.getOperand(3).getReg(),                     // Offset
   1101                        RI.getHWRegChan(MI.getOperand(1).getReg()));   // Channel
   1102     break;
   1103   }
   1104   MI.eraseFromParent();
   1105   return true;
   1106 }
   1107 
   1108 void  R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
   1109                                              const MachineFunction &MF) const {
   1110   const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
   1111   const R600FrameLowering *TFL = ST.getFrameLowering();
   1112 
   1113   unsigned StackWidth = TFL->getStackWidth(MF);
   1114   int End = getIndirectIndexEnd(MF);
   1115 
   1116   if (End == -1)
   1117     return;
   1118 
   1119   for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
   1120     unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
   1121     Reserved.set(SuperReg);
   1122     for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
   1123       unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
   1124       Reserved.set(Reg);
   1125     }
   1126   }
   1127 }
   1128 
   1129 const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const {
   1130   return &AMDGPU::R600_TReg32_XRegClass;
   1131 }
   1132 
   1133 MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
   1134                                        MachineBasicBlock::iterator I,
   1135                                        unsigned ValueReg, unsigned Address,
   1136                                        unsigned OffsetReg) const {
   1137   return buildIndirectWrite(MBB, I, ValueReg, Address, OffsetReg, 0);
   1138 }
   1139 
   1140 MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
   1141                                        MachineBasicBlock::iterator I,
   1142                                        unsigned ValueReg, unsigned Address,
   1143                                        unsigned OffsetReg,
   1144                                        unsigned AddrChan) const {
   1145   unsigned AddrReg;
   1146   switch (AddrChan) {
   1147     default: llvm_unreachable("Invalid Channel");
   1148     case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
   1149     case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
   1150     case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
   1151     case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
   1152   }
   1153   MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
   1154                                                AMDGPU::AR_X, OffsetReg);
   1155   setImmOperand(*MOVA, AMDGPU::OpName::write, 0);
   1156 
   1157   MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
   1158                                       AddrReg, ValueReg)
   1159                                       .addReg(AMDGPU::AR_X,
   1160                                            RegState::Implicit | RegState::Kill);
   1161   setImmOperand(*Mov, AMDGPU::OpName::dst_rel, 1);
   1162   return Mov;
   1163 }
   1164 
   1165 MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
   1166                                        MachineBasicBlock::iterator I,
   1167                                        unsigned ValueReg, unsigned Address,
   1168                                        unsigned OffsetReg) const {
   1169   return buildIndirectRead(MBB, I, ValueReg, Address, OffsetReg, 0);
   1170 }
   1171 
   1172 MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
   1173                                        MachineBasicBlock::iterator I,
   1174                                        unsigned ValueReg, unsigned Address,
   1175                                        unsigned OffsetReg,
   1176                                        unsigned AddrChan) const {
   1177   unsigned AddrReg;
   1178   switch (AddrChan) {
   1179     default: llvm_unreachable("Invalid Channel");
   1180     case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
   1181     case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
   1182     case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
   1183     case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
   1184   }
   1185   MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
   1186                                                        AMDGPU::AR_X,
   1187                                                        OffsetReg);
   1188   setImmOperand(*MOVA, AMDGPU::OpName::write, 0);
   1189   MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
   1190                                       ValueReg,
   1191                                       AddrReg)
   1192                                       .addReg(AMDGPU::AR_X,
   1193                                            RegState::Implicit | RegState::Kill);
   1194   setImmOperand(*Mov, AMDGPU::OpName::src0_rel, 1);
   1195 
   1196   return Mov;
   1197 }
   1198 
   1199 int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
   1200   const MachineRegisterInfo &MRI = MF.getRegInfo();
   1201   const MachineFrameInfo *MFI = MF.getFrameInfo();
   1202   int Offset = -1;
   1203 
   1204   if (MFI->getNumObjects() == 0) {
   1205     return -1;
   1206   }
   1207 
   1208   if (MRI.livein_empty()) {
   1209     return 0;
   1210   }
   1211 
   1212   const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass();
   1213   for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
   1214                                             LE = MRI.livein_end();
   1215                                             LI != LE; ++LI) {
   1216     unsigned Reg = LI->first;
   1217     if (TargetRegisterInfo::isVirtualRegister(Reg) ||
   1218         !IndirectRC->contains(Reg))
   1219       continue;
   1220 
   1221     unsigned RegIndex;
   1222     unsigned RegEnd;
   1223     for (RegIndex = 0, RegEnd = IndirectRC->getNumRegs(); RegIndex != RegEnd;
   1224                                                           ++RegIndex) {
   1225       if (IndirectRC->getRegister(RegIndex) == Reg)
   1226         break;
   1227     }
   1228     Offset = std::max(Offset, (int)RegIndex);
   1229   }
   1230 
   1231   return Offset + 1;
   1232 }
   1233 
   1234 int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
   1235   int Offset = 0;
   1236   const MachineFrameInfo *MFI = MF.getFrameInfo();
   1237 
   1238   // Variable sized objects are not supported
   1239   if (MFI->hasVarSizedObjects()) {
   1240     return -1;
   1241   }
   1242 
   1243   if (MFI->getNumObjects() == 0) {
   1244     return -1;
   1245   }
   1246 
   1247   const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
   1248   const R600FrameLowering *TFL = ST.getFrameLowering();
   1249 
   1250   unsigned IgnoredFrameReg;
   1251   Offset = TFL->getFrameIndexReference(MF, -1, IgnoredFrameReg);
   1252 
   1253   return getIndirectIndexBegin(MF) + Offset;
   1254 }
   1255 
   1256 unsigned R600InstrInfo::getMaxAlusPerClause() const {
   1257   return 115;
   1258 }
   1259 
   1260 MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB,
   1261                                                   MachineBasicBlock::iterator I,
   1262                                                   unsigned Opcode,
   1263                                                   unsigned DstReg,
   1264                                                   unsigned Src0Reg,
   1265                                                   unsigned Src1Reg) const {
   1266   MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode),
   1267     DstReg);           // $dst
   1268 
   1269   if (Src1Reg) {
   1270     MIB.addImm(0)     // $update_exec_mask
   1271        .addImm(0);    // $update_predicate
   1272   }
   1273   MIB.addImm(1)        // $write
   1274      .addImm(0)        // $omod
   1275      .addImm(0)        // $dst_rel
   1276      .addImm(0)        // $dst_clamp
   1277      .addReg(Src0Reg)  // $src0
   1278      .addImm(0)        // $src0_neg
   1279      .addImm(0)        // $src0_rel
   1280      .addImm(0)        // $src0_abs
   1281      .addImm(-1);       // $src0_sel
   1282 
   1283   if (Src1Reg) {
   1284     MIB.addReg(Src1Reg) // $src1
   1285        .addImm(0)       // $src1_neg
   1286        .addImm(0)       // $src1_rel
   1287        .addImm(0)       // $src1_abs
   1288        .addImm(-1);      // $src1_sel
   1289   }
   1290 
   1291   //XXX: The r600g finalizer expects this to be 1, once we've moved the
   1292   //scheduling to the backend, we can change the default to 0.
   1293   MIB.addImm(1)        // $last
   1294       .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
   1295       .addImm(0)         // $literal
   1296       .addImm(0);        // $bank_swizzle
   1297 
   1298   return MIB;
   1299 }
   1300 
   1301 #define OPERAND_CASE(Label) \
   1302   case Label: { \
   1303     static const unsigned Ops[] = \
   1304     { \
   1305       Label##_X, \
   1306       Label##_Y, \
   1307       Label##_Z, \
   1308       Label##_W \
   1309     }; \
   1310     return Ops[Slot]; \
   1311   }
   1312 
   1313 static unsigned getSlotedOps(unsigned  Op, unsigned Slot) {
   1314   switch (Op) {
   1315   OPERAND_CASE(AMDGPU::OpName::update_exec_mask)
   1316   OPERAND_CASE(AMDGPU::OpName::update_pred)
   1317   OPERAND_CASE(AMDGPU::OpName::write)
   1318   OPERAND_CASE(AMDGPU::OpName::omod)
   1319   OPERAND_CASE(AMDGPU::OpName::dst_rel)
   1320   OPERAND_CASE(AMDGPU::OpName::clamp)
   1321   OPERAND_CASE(AMDGPU::OpName::src0)
   1322   OPERAND_CASE(AMDGPU::OpName::src0_neg)
   1323   OPERAND_CASE(AMDGPU::OpName::src0_rel)
   1324   OPERAND_CASE(AMDGPU::OpName::src0_abs)
   1325   OPERAND_CASE(AMDGPU::OpName::src0_sel)
   1326   OPERAND_CASE(AMDGPU::OpName::src1)
   1327   OPERAND_CASE(AMDGPU::OpName::src1_neg)
   1328   OPERAND_CASE(AMDGPU::OpName::src1_rel)
   1329   OPERAND_CASE(AMDGPU::OpName::src1_abs)
   1330   OPERAND_CASE(AMDGPU::OpName::src1_sel)
   1331   OPERAND_CASE(AMDGPU::OpName::pred_sel)
   1332   default:
   1333     llvm_unreachable("Wrong Operand");
   1334   }
   1335 }
   1336 
   1337 #undef OPERAND_CASE
   1338 
   1339 MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
   1340     MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg)
   1341     const {
   1342   assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
   1343   unsigned Opcode;
   1344   if (ST.getGeneration() <= R600Subtarget::R700)
   1345     Opcode = AMDGPU::DOT4_r600;
   1346   else
   1347     Opcode = AMDGPU::DOT4_eg;
   1348   MachineBasicBlock::iterator I = MI;
   1349   MachineOperand &Src0 = MI->getOperand(
   1350       getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot)));
   1351   MachineOperand &Src1 = MI->getOperand(
   1352       getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot)));
   1353   MachineInstr *MIB = buildDefaultInstruction(
   1354       MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg());
   1355   static const unsigned  Operands[14] = {
   1356     AMDGPU::OpName::update_exec_mask,
   1357     AMDGPU::OpName::update_pred,
   1358     AMDGPU::OpName::write,
   1359     AMDGPU::OpName::omod,
   1360     AMDGPU::OpName::dst_rel,
   1361     AMDGPU::OpName::clamp,
   1362     AMDGPU::OpName::src0_neg,
   1363     AMDGPU::OpName::src0_rel,
   1364     AMDGPU::OpName::src0_abs,
   1365     AMDGPU::OpName::src0_sel,
   1366     AMDGPU::OpName::src1_neg,
   1367     AMDGPU::OpName::src1_rel,
   1368     AMDGPU::OpName::src1_abs,
   1369     AMDGPU::OpName::src1_sel,
   1370   };
   1371 
   1372   MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
   1373       getSlotedOps(AMDGPU::OpName::pred_sel, Slot)));
   1374   MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel))
   1375       .setReg(MO.getReg());
   1376 
   1377   for (unsigned i = 0; i < 14; i++) {
   1378     MachineOperand &MO = MI->getOperand(
   1379         getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot)));
   1380     assert (MO.isImm());
   1381     setImmOperand(*MIB, Operands[i], MO.getImm());
   1382   }
   1383   MIB->getOperand(20).setImm(0);
   1384   return MIB;
   1385 }
   1386 
   1387 MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
   1388                                          MachineBasicBlock::iterator I,
   1389                                          unsigned DstReg,
   1390                                          uint64_t Imm) const {
   1391   MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
   1392                                                   AMDGPU::ALU_LITERAL_X);
   1393   setImmOperand(*MovImm, AMDGPU::OpName::literal, Imm);
   1394   return MovImm;
   1395 }
   1396 
   1397 MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB,
   1398                                        MachineBasicBlock::iterator I,
   1399                                        unsigned DstReg, unsigned SrcReg) const {
   1400   return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg);
   1401 }
   1402 
   1403 int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
   1404   return getOperandIdx(MI.getOpcode(), Op);
   1405 }
   1406 
   1407 int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const {
   1408   return AMDGPU::getNamedOperandIdx(Opcode, Op);
   1409 }
   1410 
   1411 void R600InstrInfo::setImmOperand(MachineInstr &MI, unsigned Op,
   1412                                   int64_t Imm) const {
   1413   int Idx = getOperandIdx(MI, Op);
   1414   assert(Idx != -1 && "Operand not supported for this instruction.");
   1415   assert(MI.getOperand(Idx).isImm());
   1416   MI.getOperand(Idx).setImm(Imm);
   1417 }
   1418 
   1419 //===----------------------------------------------------------------------===//
   1420 // Instruction flag getters/setters
   1421 //===----------------------------------------------------------------------===//
   1422 
   1423 bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const {
   1424   return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
   1425 }
   1426 
   1427 MachineOperand &R600InstrInfo::getFlagOp(MachineInstr &MI, unsigned SrcIdx,
   1428                                          unsigned Flag) const {
   1429   unsigned TargetFlags = get(MI.getOpcode()).TSFlags;
   1430   int FlagIndex = 0;
   1431   if (Flag != 0) {
   1432     // If we pass something other than the default value of Flag to this
   1433     // function, it means we are want to set a flag on an instruction
   1434     // that uses native encoding.
   1435     assert(HAS_NATIVE_OPERANDS(TargetFlags));
   1436     bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
   1437     switch (Flag) {
   1438     case MO_FLAG_CLAMP:
   1439       FlagIndex = getOperandIdx(MI, AMDGPU::OpName::clamp);
   1440       break;
   1441     case MO_FLAG_MASK:
   1442       FlagIndex = getOperandIdx(MI, AMDGPU::OpName::write);
   1443       break;
   1444     case MO_FLAG_NOT_LAST:
   1445     case MO_FLAG_LAST:
   1446       FlagIndex = getOperandIdx(MI, AMDGPU::OpName::last);
   1447       break;
   1448     case MO_FLAG_NEG:
   1449       switch (SrcIdx) {
   1450       case 0:
   1451         FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_neg);
   1452         break;
   1453       case 1:
   1454         FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_neg);
   1455         break;
   1456       case 2:
   1457         FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src2_neg);
   1458         break;
   1459       }
   1460       break;
   1461 
   1462     case MO_FLAG_ABS:
   1463       assert(!IsOP3 && "Cannot set absolute value modifier for OP3 "
   1464                        "instructions.");
   1465       (void)IsOP3;
   1466       switch (SrcIdx) {
   1467       case 0:
   1468         FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_abs);
   1469         break;
   1470       case 1:
   1471         FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_abs);
   1472         break;
   1473       }
   1474       break;
   1475 
   1476     default:
   1477       FlagIndex = -1;
   1478       break;
   1479     }
   1480     assert(FlagIndex != -1 && "Flag not supported for this instruction");
   1481   } else {
   1482       FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags);
   1483       assert(FlagIndex != 0 &&
   1484          "Instruction flags not supported for this instruction");
   1485   }
   1486 
   1487   MachineOperand &FlagOp = MI.getOperand(FlagIndex);
   1488   assert(FlagOp.isImm());
   1489   return FlagOp;
   1490 }
   1491 
   1492 void R600InstrInfo::addFlag(MachineInstr &MI, unsigned Operand,
   1493                             unsigned Flag) const {
   1494   unsigned TargetFlags = get(MI.getOpcode()).TSFlags;
   1495   if (Flag == 0) {
   1496     return;
   1497   }
   1498   if (HAS_NATIVE_OPERANDS(TargetFlags)) {
   1499     MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
   1500     if (Flag == MO_FLAG_NOT_LAST) {
   1501       clearFlag(MI, Operand, MO_FLAG_LAST);
   1502     } else if (Flag == MO_FLAG_MASK) {
   1503       clearFlag(MI, Operand, Flag);
   1504     } else {
   1505       FlagOp.setImm(1);
   1506     }
   1507   } else {
   1508       MachineOperand &FlagOp = getFlagOp(MI, Operand);
   1509       FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
   1510   }
   1511 }
   1512 
   1513 void R600InstrInfo::clearFlag(MachineInstr &MI, unsigned Operand,
   1514                               unsigned Flag) const {
   1515   unsigned TargetFlags = get(MI.getOpcode()).TSFlags;
   1516   if (HAS_NATIVE_OPERANDS(TargetFlags)) {
   1517     MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
   1518     FlagOp.setImm(0);
   1519   } else {
   1520     MachineOperand &FlagOp = getFlagOp(MI);
   1521     unsigned InstFlags = FlagOp.getImm();
   1522     InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
   1523     FlagOp.setImm(InstFlags);
   1524   }
   1525 }
   1526 
   1527 bool R600InstrInfo::isRegisterStore(const MachineInstr &MI) const {
   1528   return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_STORE;
   1529 }
   1530 
   1531 bool R600InstrInfo::isRegisterLoad(const MachineInstr &MI) const {
   1532   return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD;
   1533 }
   1534