Home | History | Annotate | Download | only in R600
      1 //===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// \brief R600 Implementation of TargetInstrInfo.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "R600InstrInfo.h"
     16 #include "AMDGPU.h"
     17 #include "AMDGPUSubtarget.h"
     18 #include "AMDGPUTargetMachine.h"
     19 #include "R600Defines.h"
     20 #include "R600MachineFunctionInfo.h"
     21 #include "R600RegisterInfo.h"
     22 #include "llvm/CodeGen/MachineFrameInfo.h"
     23 #include "llvm/CodeGen/MachineInstrBuilder.h"
     24 #include "llvm/CodeGen/MachineRegisterInfo.h"
     25 
     26 using namespace llvm;
     27 
     28 #define GET_INSTRINFO_CTOR_DTOR
     29 #include "AMDGPUGenDFAPacketizer.inc"
     30 
     31 R600InstrInfo::R600InstrInfo(const AMDGPUSubtarget &st)
     32   : AMDGPUInstrInfo(st),
     33     RI(st)
     34   { }
     35 
     36 const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
     37   return RI;
     38 }
     39 
     40 bool R600InstrInfo::isTrig(const MachineInstr &MI) const {
     41   return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
     42 }
     43 
     44 bool R600InstrInfo::isVector(const MachineInstr &MI) const {
     45   return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
     46 }
     47 
     48 void
     49 R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     50                            MachineBasicBlock::iterator MI, DebugLoc DL,
     51                            unsigned DestReg, unsigned SrcReg,
     52                            bool KillSrc) const {
     53   unsigned VectorComponents = 0;
     54   if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) ||
     55       AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) &&
     56       (AMDGPU::R600_Reg128RegClass.contains(SrcReg) ||
     57        AMDGPU::R600_Reg128VerticalRegClass.contains(SrcReg))) {
     58     VectorComponents = 4;
     59   } else if((AMDGPU::R600_Reg64RegClass.contains(DestReg) ||
     60             AMDGPU::R600_Reg64VerticalRegClass.contains(DestReg)) &&
     61             (AMDGPU::R600_Reg64RegClass.contains(SrcReg) ||
     62              AMDGPU::R600_Reg64VerticalRegClass.contains(SrcReg))) {
     63     VectorComponents = 2;
     64   }
     65 
     66   if (VectorComponents > 0) {
     67     for (unsigned I = 0; I < VectorComponents; I++) {
     68       unsigned SubRegIndex = RI.getSubRegFromChannel(I);
     69       buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
     70                               RI.getSubReg(DestReg, SubRegIndex),
     71                               RI.getSubReg(SrcReg, SubRegIndex))
     72                               .addReg(DestReg,
     73                                       RegState::Define | RegState::Implicit);
     74     }
     75   } else {
     76     MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
     77                                                   DestReg, SrcReg);
     78     NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0))
     79                                     .setIsKill(KillSrc);
     80   }
     81 }
     82 
     83 /// \returns true if \p MBBI can be moved into a new basic.
     84 bool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
     85                                        MachineBasicBlock::iterator MBBI) const {
     86   for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(),
     87                                         E = MBBI->operands_end(); I != E; ++I) {
     88     if (I->isReg() && !TargetRegisterInfo::isVirtualRegister(I->getReg()) &&
     89         I->isUse() && RI.isPhysRegLiveAcrossClauses(I->getReg()))
     90       return false;
     91   }
     92   return true;
     93 }
     94 
     95 unsigned R600InstrInfo::getIEQOpcode() const {
     96   return AMDGPU::SETE_INT;
     97 }
     98 
     99 bool R600InstrInfo::isMov(unsigned Opcode) const {
    100 
    101 
    102   switch(Opcode) {
    103   default: return false;
    104   case AMDGPU::MOV:
    105   case AMDGPU::MOV_IMM_F32:
    106   case AMDGPU::MOV_IMM_I32:
    107     return true;
    108   }
    109 }
    110 
    111 // Some instructions act as place holders to emulate operations that the GPU
    112 // hardware does automatically. This function can be used to check if
    113 // an opcode falls into this category.
    114 bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const {
    115   switch (Opcode) {
    116   default: return false;
    117   case AMDGPU::RETURN:
    118     return true;
    119   }
    120 }
    121 
    122 bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
    123   return false;
    124 }
    125 
    126 bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
    127   switch(Opcode) {
    128     default: return false;
    129     case AMDGPU::CUBE_r600_pseudo:
    130     case AMDGPU::CUBE_r600_real:
    131     case AMDGPU::CUBE_eg_pseudo:
    132     case AMDGPU::CUBE_eg_real:
    133       return true;
    134   }
    135 }
    136 
    137 bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
    138   unsigned TargetFlags = get(Opcode).TSFlags;
    139 
    140   return (TargetFlags & R600_InstFlag::ALU_INST);
    141 }
    142 
    143 bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const {
    144   unsigned TargetFlags = get(Opcode).TSFlags;
    145 
    146   return ((TargetFlags & R600_InstFlag::OP1) |
    147           (TargetFlags & R600_InstFlag::OP2) |
    148           (TargetFlags & R600_InstFlag::OP3));
    149 }
    150 
    151 bool R600InstrInfo::isLDSInstr(unsigned Opcode) const {
    152   unsigned TargetFlags = get(Opcode).TSFlags;
    153 
    154   return ((TargetFlags & R600_InstFlag::LDS_1A) |
    155           (TargetFlags & R600_InstFlag::LDS_1A1D) |
    156           (TargetFlags & R600_InstFlag::LDS_1A2D));
    157 }
    158 
    159 bool R600InstrInfo::isLDSNoRetInstr(unsigned Opcode) const {
    160   return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) == -1;
    161 }
    162 
    163 bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const {
    164   return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1;
    165 }
    166 
    167 bool R600InstrInfo::canBeConsideredALU(const MachineInstr *MI) const {
    168   if (isALUInstr(MI->getOpcode()))
    169     return true;
    170   if (isVector(*MI) || isCubeOp(MI->getOpcode()))
    171     return true;
    172   switch (MI->getOpcode()) {
    173   case AMDGPU::PRED_X:
    174   case AMDGPU::INTERP_PAIR_XY:
    175   case AMDGPU::INTERP_PAIR_ZW:
    176   case AMDGPU::INTERP_VEC_LOAD:
    177   case AMDGPU::COPY:
    178   case AMDGPU::DOT_4:
    179     return true;
    180   default:
    181     return false;
    182   }
    183 }
    184 
    185 bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
    186   if (ST.hasCaymanISA())
    187     return false;
    188   return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU);
    189 }
    190 
    191 bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const {
    192   return isTransOnly(MI->getOpcode());
    193 }
    194 
    195 bool R600InstrInfo::isVectorOnly(unsigned Opcode) const {
    196   return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU);
    197 }
    198 
    199 bool R600InstrInfo::isVectorOnly(const MachineInstr *MI) const {
    200   return isVectorOnly(MI->getOpcode());
    201 }
    202 
    203 bool R600InstrInfo::isExport(unsigned Opcode) const {
    204   return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT);
    205 }
    206 
    207 bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
    208   return ST.hasVertexCache() && IS_VTX(get(Opcode));
    209 }
    210 
    211 bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const {
    212   const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>();
    213   return MFI->ShaderType != ShaderType::COMPUTE && usesVertexCache(MI->getOpcode());
    214 }
    215 
    216 bool R600InstrInfo::usesTextureCache(unsigned Opcode) const {
    217   return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode));
    218 }
    219 
    220 bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const {
    221   const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>();
    222   return (MFI->ShaderType == ShaderType::COMPUTE && usesVertexCache(MI->getOpcode())) ||
    223          usesTextureCache(MI->getOpcode());
    224 }
    225 
    226 bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
    227   switch (Opcode) {
    228   case AMDGPU::KILLGT:
    229   case AMDGPU::GROUP_BARRIER:
    230     return true;
    231   default:
    232     return false;
    233   }
    234 }
    235 
    236 bool R600InstrInfo::usesAddressRegister(MachineInstr *MI) const {
    237   return  MI->findRegisterUseOperandIdx(AMDGPU::AR_X) != -1;
    238 }
    239 
    240 bool R600InstrInfo::definesAddressRegister(MachineInstr *MI) const {
    241   return MI->findRegisterDefOperandIdx(AMDGPU::AR_X) != -1;
    242 }
    243 
    244 bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const {
    245   if (!isALUInstr(MI->getOpcode())) {
    246     return false;
    247   }
    248   for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
    249                                         E = MI->operands_end(); I != E; ++I) {
    250     if (!I->isReg() || !I->isUse() ||
    251         TargetRegisterInfo::isVirtualRegister(I->getReg()))
    252       continue;
    253 
    254     if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
    255       return true;
    256   }
    257   return false;
    258 }
    259 
    260 int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const {
    261   static const unsigned OpTable[] = {
    262     AMDGPU::OpName::src0,
    263     AMDGPU::OpName::src1,
    264     AMDGPU::OpName::src2
    265   };
    266 
    267   assert (SrcNum < 3);
    268   return getOperandIdx(Opcode, OpTable[SrcNum]);
    269 }
    270 
    271 #define SRC_SEL_ROWS 11
    272 int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
    273   static const unsigned SrcSelTable[SRC_SEL_ROWS][2] = {
    274     {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
    275     {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
    276     {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
    277     {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
    278     {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
    279     {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
    280     {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
    281     {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
    282     {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
    283     {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
    284     {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
    285   };
    286 
    287   for (unsigned i = 0; i < SRC_SEL_ROWS; ++i) {
    288     if (getOperandIdx(Opcode, SrcSelTable[i][0]) == (int)SrcIdx) {
    289       return getOperandIdx(Opcode, SrcSelTable[i][1]);
    290     }
    291   }
    292   return -1;
    293 }
    294 #undef SRC_SEL_ROWS
    295 
    296 SmallVector<std::pair<MachineOperand *, int64_t>, 3>
    297 R600InstrInfo::getSrcs(MachineInstr *MI) const {
    298   SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
    299 
    300   if (MI->getOpcode() == AMDGPU::DOT_4) {
    301     static const unsigned OpTable[8][2] = {
    302       {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
    303       {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
    304       {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
    305       {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
    306       {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
    307       {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
    308       {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
    309       {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W},
    310     };
    311 
    312     for (unsigned j = 0; j < 8; j++) {
    313       MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
    314                                                         OpTable[j][0]));
    315       unsigned Reg = MO.getReg();
    316       if (Reg == AMDGPU::ALU_CONST) {
    317         unsigned Sel = MI->getOperand(getOperandIdx(MI->getOpcode(),
    318                                                     OpTable[j][1])).getImm();
    319         Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
    320         continue;
    321       }
    322 
    323     }
    324     return Result;
    325   }
    326 
    327   static const unsigned OpTable[3][2] = {
    328     {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
    329     {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
    330     {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
    331   };
    332 
    333   for (unsigned j = 0; j < 3; j++) {
    334     int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
    335     if (SrcIdx < 0)
    336       break;
    337     MachineOperand &MO = MI->getOperand(SrcIdx);
    338     unsigned Reg = MI->getOperand(SrcIdx).getReg();
    339     if (Reg == AMDGPU::ALU_CONST) {
    340       unsigned Sel = MI->getOperand(
    341           getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
    342       Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
    343       continue;
    344     }
    345     if (Reg == AMDGPU::ALU_LITERAL_X) {
    346       unsigned Imm = MI->getOperand(
    347           getOperandIdx(MI->getOpcode(), AMDGPU::OpName::literal)).getImm();
    348       Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Imm));
    349       continue;
    350     }
    351     Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, 0));
    352   }
    353   return Result;
    354 }
    355 
    356 std::vector<std::pair<int, unsigned> >
    357 R600InstrInfo::ExtractSrcs(MachineInstr *MI,
    358                            const DenseMap<unsigned, unsigned> &PV,
    359                            unsigned &ConstCount) const {
    360   ConstCount = 0;
    361   const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI);
    362   const std::pair<int, unsigned> DummyPair(-1, 0);
    363   std::vector<std::pair<int, unsigned> > Result;
    364   unsigned i = 0;
    365   for (unsigned n = Srcs.size(); i < n; ++i) {
    366     unsigned Reg = Srcs[i].first->getReg();
    367     unsigned Index = RI.getEncodingValue(Reg) & 0xff;
    368     if (Reg == AMDGPU::OQAP) {
    369       Result.push_back(std::pair<int, unsigned>(Index, 0));
    370     }
    371     if (PV.find(Reg) != PV.end()) {
    372       // 255 is used to tells its a PS/PV reg
    373       Result.push_back(std::pair<int, unsigned>(255, 0));
    374       continue;
    375     }
    376     if (Index > 127) {
    377       ConstCount++;
    378       Result.push_back(DummyPair);
    379       continue;
    380     }
    381     unsigned Chan = RI.getHWRegChan(Reg);
    382     Result.push_back(std::pair<int, unsigned>(Index, Chan));
    383   }
    384   for (; i < 3; ++i)
    385     Result.push_back(DummyPair);
    386   return Result;
    387 }
    388 
    389 static std::vector<std::pair<int, unsigned> >
    390 Swizzle(std::vector<std::pair<int, unsigned> > Src,
    391         R600InstrInfo::BankSwizzle Swz) {
    392   if (Src[0] == Src[1])
    393     Src[1].first = -1;
    394   switch (Swz) {
    395   case R600InstrInfo::ALU_VEC_012_SCL_210:
    396     break;
    397   case R600InstrInfo::ALU_VEC_021_SCL_122:
    398     std::swap(Src[1], Src[2]);
    399     break;
    400   case R600InstrInfo::ALU_VEC_102_SCL_221:
    401     std::swap(Src[0], Src[1]);
    402     break;
    403   case R600InstrInfo::ALU_VEC_120_SCL_212:
    404     std::swap(Src[0], Src[1]);
    405     std::swap(Src[0], Src[2]);
    406     break;
    407   case R600InstrInfo::ALU_VEC_201:
    408     std::swap(Src[0], Src[2]);
    409     std::swap(Src[0], Src[1]);
    410     break;
    411   case R600InstrInfo::ALU_VEC_210:
    412     std::swap(Src[0], Src[2]);
    413     break;
    414   }
    415   return Src;
    416 }
    417 
    418 static unsigned
    419 getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
    420   switch (Swz) {
    421   case R600InstrInfo::ALU_VEC_012_SCL_210: {
    422     unsigned Cycles[3] = { 2, 1, 0};
    423     return Cycles[Op];
    424   }
    425   case R600InstrInfo::ALU_VEC_021_SCL_122: {
    426     unsigned Cycles[3] = { 1, 2, 2};
    427     return Cycles[Op];
    428   }
    429   case R600InstrInfo::ALU_VEC_120_SCL_212: {
    430     unsigned Cycles[3] = { 2, 1, 2};
    431     return Cycles[Op];
    432   }
    433   case R600InstrInfo::ALU_VEC_102_SCL_221: {
    434     unsigned Cycles[3] = { 2, 2, 1};
    435     return Cycles[Op];
    436   }
    437   default:
    438     llvm_unreachable("Wrong Swizzle for Trans Slot");
    439     return 0;
    440   }
    441 }
    442 
    443 /// returns how many MIs (whose inputs are represented by IGSrcs) can be packed
    444 /// in the same Instruction Group while meeting read port limitations given a
    445 /// Swz swizzle sequence.
    446 unsigned  R600InstrInfo::isLegalUpTo(
    447     const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
    448     const std::vector<R600InstrInfo::BankSwizzle> &Swz,
    449     const std::vector<std::pair<int, unsigned> > &TransSrcs,
    450     R600InstrInfo::BankSwizzle TransSwz) const {
    451   int Vector[4][3];
    452   memset(Vector, -1, sizeof(Vector));
    453   for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
    454     const std::vector<std::pair<int, unsigned> > &Srcs =
    455         Swizzle(IGSrcs[i], Swz[i]);
    456     for (unsigned j = 0; j < 3; j++) {
    457       const std::pair<int, unsigned> &Src = Srcs[j];
    458       if (Src.first < 0 || Src.first == 255)
    459         continue;
    460       if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) {
    461         if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&
    462             Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) {
    463             // The value from output queue A (denoted by register OQAP) can
    464             // only be fetched during the first cycle.
    465             return false;
    466         }
    467         // OQAP does not count towards the normal read port restrictions
    468         continue;
    469       }
    470       if (Vector[Src.second][j] < 0)
    471         Vector[Src.second][j] = Src.first;
    472       if (Vector[Src.second][j] != Src.first)
    473         return i;
    474     }
    475   }
    476   // Now check Trans Alu
    477   for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) {
    478     const std::pair<int, unsigned> &Src = TransSrcs[i];
    479     unsigned Cycle = getTransSwizzle(TransSwz, i);
    480     if (Src.first < 0)
    481       continue;
    482     if (Src.first == 255)
    483       continue;
    484     if (Vector[Src.second][Cycle] < 0)
    485       Vector[Src.second][Cycle] = Src.first;
    486     if (Vector[Src.second][Cycle] != Src.first)
    487       return IGSrcs.size() - 1;
    488   }
    489   return IGSrcs.size();
    490 }
    491 
    492 /// Given a swizzle sequence SwzCandidate and an index Idx, returns the next
    493 /// (in lexicographic term) swizzle sequence assuming that all swizzles after
    494 /// Idx can be skipped
    495 static bool
    496 NextPossibleSolution(
    497     std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
    498     unsigned Idx) {
    499   assert(Idx < SwzCandidate.size());
    500   int ResetIdx = Idx;
    501   while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210)
    502     ResetIdx --;
    503   for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) {
    504     SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;
    505   }
    506   if (ResetIdx == -1)
    507     return false;
    508   int NextSwizzle = SwzCandidate[ResetIdx] + 1;
    509   SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle;
    510   return true;
    511 }
    512 
    513 /// Enumerate all possible Swizzle sequence to find one that can meet all
    514 /// read port requirements.
    515 bool R600InstrInfo::FindSwizzleForVectorSlot(
    516     const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
    517     std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
    518     const std::vector<std::pair<int, unsigned> > &TransSrcs,
    519     R600InstrInfo::BankSwizzle TransSwz) const {
    520   unsigned ValidUpTo = 0;
    521   do {
    522     ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);
    523     if (ValidUpTo == IGSrcs.size())
    524       return true;
    525   } while (NextPossibleSolution(SwzCandidate, ValidUpTo));
    526   return false;
    527 }
    528 
    529 /// Instructions in Trans slot can't read gpr at cycle 0 if they also read
    530 /// a const, and can't read a gpr at cycle 1 if they read 2 const.
    531 static bool
    532 isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
    533                   const std::vector<std::pair<int, unsigned> > &TransOps,
    534                   unsigned ConstCount) {
    535   // TransALU can't read 3 constants
    536   if (ConstCount > 2)
    537     return false;
    538   for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
    539     const std::pair<int, unsigned> &Src = TransOps[i];
    540     unsigned Cycle = getTransSwizzle(TransSwz, i);
    541     if (Src.first < 0)
    542       continue;
    543     if (ConstCount > 0 && Cycle == 0)
    544       return false;
    545     if (ConstCount > 1 && Cycle == 1)
    546       return false;
    547   }
    548   return true;
    549 }
    550 
    551 bool
    552 R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
    553                                        const DenseMap<unsigned, unsigned> &PV,
    554                                        std::vector<BankSwizzle> &ValidSwizzle,
    555                                        bool isLastAluTrans)
    556     const {
    557   //Todo : support shared src0 - src1 operand
    558 
    559   std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
    560   ValidSwizzle.clear();
    561   unsigned ConstCount;
    562   BankSwizzle TransBS = ALU_VEC_012_SCL_210;
    563   for (unsigned i = 0, e = IG.size(); i < e; ++i) {
    564     IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount));
    565     unsigned Op = getOperandIdx(IG[i]->getOpcode(),
    566         AMDGPU::OpName::bank_swizzle);
    567     ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
    568         IG[i]->getOperand(Op).getImm());
    569   }
    570   std::vector<std::pair<int, unsigned> > TransOps;
    571   if (!isLastAluTrans)
    572     return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
    573 
    574   TransOps = IGSrcs.back();
    575   IGSrcs.pop_back();
    576   ValidSwizzle.pop_back();
    577 
    578   static const R600InstrInfo::BankSwizzle TransSwz[] = {
    579     ALU_VEC_012_SCL_210,
    580     ALU_VEC_021_SCL_122,
    581     ALU_VEC_120_SCL_212,
    582     ALU_VEC_102_SCL_221
    583   };
    584   for (unsigned i = 0; i < 4; i++) {
    585     TransBS = TransSwz[i];
    586     if (!isConstCompatible(TransBS, TransOps, ConstCount))
    587       continue;
    588     bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps,
    589         TransBS);
    590     if (Result) {
    591       ValidSwizzle.push_back(TransBS);
    592       return true;
    593     }
    594   }
    595 
    596   return false;
    597 }
    598 
    599 
    600 bool
    601 R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
    602     const {
    603   assert (Consts.size() <= 12 && "Too many operands in instructions group");
    604   unsigned Pair1 = 0, Pair2 = 0;
    605   for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
    606     unsigned ReadConstHalf = Consts[i] & 2;
    607     unsigned ReadConstIndex = Consts[i] & (~3);
    608     unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
    609     if (!Pair1) {
    610       Pair1 = ReadHalfConst;
    611       continue;
    612     }
    613     if (Pair1 == ReadHalfConst)
    614       continue;
    615     if (!Pair2) {
    616       Pair2 = ReadHalfConst;
    617       continue;
    618     }
    619     if (Pair2 != ReadHalfConst)
    620       return false;
    621   }
    622   return true;
    623 }
    624 
    625 bool
    626 R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
    627     const {
    628   std::vector<unsigned> Consts;
    629   SmallSet<int64_t, 4> Literals;
    630   for (unsigned i = 0, n = MIs.size(); i < n; i++) {
    631     MachineInstr *MI = MIs[i];
    632     if (!isALUInstr(MI->getOpcode()))
    633       continue;
    634 
    635     const SmallVectorImpl<std::pair<MachineOperand *, int64_t> > &Srcs =
    636         getSrcs(MI);
    637 
    638     for (unsigned j = 0, e = Srcs.size(); j < e; j++) {
    639       std::pair<MachineOperand *, unsigned> Src = Srcs[j];
    640       if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X)
    641         Literals.insert(Src.second);
    642       if (Literals.size() > 4)
    643         return false;
    644       if (Src.first->getReg() == AMDGPU::ALU_CONST)
    645         Consts.push_back(Src.second);
    646       if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) ||
    647           AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) {
    648         unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff;
    649         unsigned Chan = RI.getHWRegChan(Src.first->getReg());
    650         Consts.push_back((Index << 2) | Chan);
    651       }
    652     }
    653   }
    654   return fitsConstReadLimitations(Consts);
    655 }
    656 
    657 DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
    658     const ScheduleDAG *DAG) const {
    659   const InstrItineraryData *II = TM->getInstrItineraryData();
    660   return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
    661 }
    662 
    663 static bool
    664 isPredicateSetter(unsigned Opcode) {
    665   switch (Opcode) {
    666   case AMDGPU::PRED_X:
    667     return true;
    668   default:
    669     return false;
    670   }
    671 }
    672 
    673 static MachineInstr *
    674 findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
    675                              MachineBasicBlock::iterator I) {
    676   while (I != MBB.begin()) {
    677     --I;
    678     MachineInstr *MI = I;
    679     if (isPredicateSetter(MI->getOpcode()))
    680       return MI;
    681   }
    682 
    683   return nullptr;
    684 }
    685 
    686 static
    687 bool isJump(unsigned Opcode) {
    688   return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
    689 }
    690 
    691 static bool isBranch(unsigned Opcode) {
    692   return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 ||
    693       Opcode == AMDGPU::BRANCH_COND_f32;
    694 }
    695 
    696 bool
    697 R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
    698                              MachineBasicBlock *&TBB,
    699                              MachineBasicBlock *&FBB,
    700                              SmallVectorImpl<MachineOperand> &Cond,
    701                              bool AllowModify) const {
    702   // Most of the following comes from the ARM implementation of AnalyzeBranch
    703 
    704   // If the block has no terminators, it just falls into the block after it.
    705   MachineBasicBlock::iterator I = MBB.end();
    706   if (I == MBB.begin())
    707     return false;
    708   --I;
    709   while (I->isDebugValue()) {
    710     if (I == MBB.begin())
    711       return false;
    712     --I;
    713   }
    714   // AMDGPU::BRANCH* instructions are only available after isel and are not
    715   // handled
    716   if (isBranch(I->getOpcode()))
    717     return true;
    718   if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) {
    719     return false;
    720   }
    721 
    722   // Remove successive JUMP
    723   while (I != MBB.begin() && std::prev(I)->getOpcode() == AMDGPU::JUMP) {
    724       MachineBasicBlock::iterator PriorI = std::prev(I);
    725       if (AllowModify)
    726         I->removeFromParent();
    727       I = PriorI;
    728   }
    729   MachineInstr *LastInst = I;
    730 
    731   // If there is only one terminator instruction, process it.
    732   unsigned LastOpc = LastInst->getOpcode();
    733   if (I == MBB.begin() ||
    734           !isJump(static_cast<MachineInstr *>(--I)->getOpcode())) {
    735     if (LastOpc == AMDGPU::JUMP) {
    736       TBB = LastInst->getOperand(0).getMBB();
    737       return false;
    738     } else if (LastOpc == AMDGPU::JUMP_COND) {
    739       MachineInstr *predSet = I;
    740       while (!isPredicateSetter(predSet->getOpcode())) {
    741         predSet = --I;
    742       }
    743       TBB = LastInst->getOperand(0).getMBB();
    744       Cond.push_back(predSet->getOperand(1));
    745       Cond.push_back(predSet->getOperand(2));
    746       Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
    747       return false;
    748     }
    749     return true;  // Can't handle indirect branch.
    750   }
    751 
    752   // Get the instruction before it if it is a terminator.
    753   MachineInstr *SecondLastInst = I;
    754   unsigned SecondLastOpc = SecondLastInst->getOpcode();
    755 
    756   // If the block ends with a B and a Bcc, handle it.
    757   if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) {
    758     MachineInstr *predSet = --I;
    759     while (!isPredicateSetter(predSet->getOpcode())) {
    760       predSet = --I;
    761     }
    762     TBB = SecondLastInst->getOperand(0).getMBB();
    763     FBB = LastInst->getOperand(0).getMBB();
    764     Cond.push_back(predSet->getOperand(1));
    765     Cond.push_back(predSet->getOperand(2));
    766     Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
    767     return false;
    768   }
    769 
    770   // Otherwise, can't handle this.
    771   return true;
    772 }
    773 
    774 static
    775 MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) {
    776   for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
    777       It != E; ++It) {
    778     if (It->getOpcode() == AMDGPU::CF_ALU ||
    779         It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
    780       return std::prev(It.base());
    781   }
    782   return MBB.end();
    783 }
    784 
    785 unsigned
    786 R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
    787                             MachineBasicBlock *TBB,
    788                             MachineBasicBlock *FBB,
    789                             const SmallVectorImpl<MachineOperand> &Cond,
    790                             DebugLoc DL) const {
    791   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
    792 
    793   if (!FBB) {
    794     if (Cond.empty()) {
    795       BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB);
    796       return 1;
    797     } else {
    798       MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
    799       assert(PredSet && "No previous predicate !");
    800       addFlag(PredSet, 0, MO_FLAG_PUSH);
    801       PredSet->getOperand(2).setImm(Cond[1].getImm());
    802 
    803       BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
    804              .addMBB(TBB)
    805              .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
    806       MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
    807       if (CfAlu == MBB.end())
    808         return 1;
    809       assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
    810       CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
    811       return 1;
    812     }
    813   } else {
    814     MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
    815     assert(PredSet && "No previous predicate !");
    816     addFlag(PredSet, 0, MO_FLAG_PUSH);
    817     PredSet->getOperand(2).setImm(Cond[1].getImm());
    818     BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
    819             .addMBB(TBB)
    820             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
    821     BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
    822     MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
    823     if (CfAlu == MBB.end())
    824       return 2;
    825     assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
    826     CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
    827     return 2;
    828   }
    829 }
    830 
    831 unsigned
    832 R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
    833 
    834   // Note : we leave PRED* instructions there.
    835   // They may be needed when predicating instructions.
    836 
    837   MachineBasicBlock::iterator I = MBB.end();
    838 
    839   if (I == MBB.begin()) {
    840     return 0;
    841   }
    842   --I;
    843   switch (I->getOpcode()) {
    844   default:
    845     return 0;
    846   case AMDGPU::JUMP_COND: {
    847     MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
    848     clearFlag(predSet, 0, MO_FLAG_PUSH);
    849     I->eraseFromParent();
    850     MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
    851     if (CfAlu == MBB.end())
    852       break;
    853     assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
    854     CfAlu->setDesc(get(AMDGPU::CF_ALU));
    855     break;
    856   }
    857   case AMDGPU::JUMP:
    858     I->eraseFromParent();
    859     break;
    860   }
    861   I = MBB.end();
    862 
    863   if (I == MBB.begin()) {
    864     return 1;
    865   }
    866   --I;
    867   switch (I->getOpcode()) {
    868     // FIXME: only one case??
    869   default:
    870     return 1;
    871   case AMDGPU::JUMP_COND: {
    872     MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
    873     clearFlag(predSet, 0, MO_FLAG_PUSH);
    874     I->eraseFromParent();
    875     MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
    876     if (CfAlu == MBB.end())
    877       break;
    878     assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
    879     CfAlu->setDesc(get(AMDGPU::CF_ALU));
    880     break;
    881   }
    882   case AMDGPU::JUMP:
    883     I->eraseFromParent();
    884     break;
    885   }
    886   return 2;
    887 }
    888 
    889 bool
    890 R600InstrInfo::isPredicated(const MachineInstr *MI) const {
    891   int idx = MI->findFirstPredOperandIdx();
    892   if (idx < 0)
    893     return false;
    894 
    895   unsigned Reg = MI->getOperand(idx).getReg();
    896   switch (Reg) {
    897   default: return false;
    898   case AMDGPU::PRED_SEL_ONE:
    899   case AMDGPU::PRED_SEL_ZERO:
    900   case AMDGPU::PREDICATE_BIT:
    901     return true;
    902   }
    903 }
    904 
    905 bool
    906 R600InstrInfo::isPredicable(MachineInstr *MI) const {
    907   // XXX: KILL* instructions can be predicated, but they must be the last
    908   // instruction in a clause, so this means any instructions after them cannot
    909   // be predicated.  Until we have proper support for instruction clauses in the
    910   // backend, we will mark KILL* instructions as unpredicable.
    911 
    912   if (MI->getOpcode() == AMDGPU::KILLGT) {
    913     return false;
    914   } else if (MI->getOpcode() == AMDGPU::CF_ALU) {
    915     // If the clause start in the middle of MBB then the MBB has more
    916     // than a single clause, unable to predicate several clauses.
    917     if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI))
    918       return false;
    919     // TODO: We don't support KC merging atm
    920     if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0)
    921       return false;
    922     return true;
    923   } else if (isVector(*MI)) {
    924     return false;
    925   } else {
    926     return AMDGPUInstrInfo::isPredicable(MI);
    927   }
    928 }
    929 
    930 
    931 bool
    932 R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
    933                                    unsigned NumCyles,
    934                                    unsigned ExtraPredCycles,
    935                                    const BranchProbability &Probability) const{
    936   return true;
    937 }
    938 
    939 bool
    940 R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
    941                                    unsigned NumTCycles,
    942                                    unsigned ExtraTCycles,
    943                                    MachineBasicBlock &FMBB,
    944                                    unsigned NumFCycles,
    945                                    unsigned ExtraFCycles,
    946                                    const BranchProbability &Probability) const {
    947   return true;
    948 }
    949 
    950 bool
    951 R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
    952                                          unsigned NumCyles,
    953                                          const BranchProbability &Probability)
    954                                          const {
    955   return true;
    956 }
    957 
    958 bool
    959 R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
    960                                          MachineBasicBlock &FMBB) const {
    961   return false;
    962 }
    963 
    964 
    965 bool
    966 R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
    967   MachineOperand &MO = Cond[1];
    968   switch (MO.getImm()) {
    969   case OPCODE_IS_ZERO_INT:
    970     MO.setImm(OPCODE_IS_NOT_ZERO_INT);
    971     break;
    972   case OPCODE_IS_NOT_ZERO_INT:
    973     MO.setImm(OPCODE_IS_ZERO_INT);
    974     break;
    975   case OPCODE_IS_ZERO:
    976     MO.setImm(OPCODE_IS_NOT_ZERO);
    977     break;
    978   case OPCODE_IS_NOT_ZERO:
    979     MO.setImm(OPCODE_IS_ZERO);
    980     break;
    981   default:
    982     return true;
    983   }
    984 
    985   MachineOperand &MO2 = Cond[2];
    986   switch (MO2.getReg()) {
    987   case AMDGPU::PRED_SEL_ZERO:
    988     MO2.setReg(AMDGPU::PRED_SEL_ONE);
    989     break;
    990   case AMDGPU::PRED_SEL_ONE:
    991     MO2.setReg(AMDGPU::PRED_SEL_ZERO);
    992     break;
    993   default:
    994     return true;
    995   }
    996   return false;
    997 }
    998 
    999 bool
   1000 R600InstrInfo::DefinesPredicate(MachineInstr *MI,
   1001                                 std::vector<MachineOperand> &Pred) const {
   1002   return isPredicateSetter(MI->getOpcode());
   1003 }
   1004 
   1005 
   1006 bool
   1007 R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
   1008                        const SmallVectorImpl<MachineOperand> &Pred2) const {
   1009   return false;
   1010 }
   1011 
   1012 
   1013 bool
   1014 R600InstrInfo::PredicateInstruction(MachineInstr *MI,
   1015                       const SmallVectorImpl<MachineOperand> &Pred) const {
   1016   int PIdx = MI->findFirstPredOperandIdx();
   1017 
   1018   if (MI->getOpcode() == AMDGPU::CF_ALU) {
   1019     MI->getOperand(8).setImm(0);
   1020     return true;
   1021   }
   1022 
   1023   if (MI->getOpcode() == AMDGPU::DOT_4) {
   1024     MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_X))
   1025         .setReg(Pred[2].getReg());
   1026     MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Y))
   1027         .setReg(Pred[2].getReg());
   1028     MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Z))
   1029         .setReg(Pred[2].getReg());
   1030     MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_W))
   1031         .setReg(Pred[2].getReg());
   1032     MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
   1033     MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
   1034     return true;
   1035   }
   1036 
   1037   if (PIdx != -1) {
   1038     MachineOperand &PMO = MI->getOperand(PIdx);
   1039     PMO.setReg(Pred[2].getReg());
   1040     MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
   1041     MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
   1042     return true;
   1043   }
   1044 
   1045   return false;
   1046 }
   1047 
   1048 unsigned int R600InstrInfo::getPredicationCost(const MachineInstr *) const {
   1049   return 2;
   1050 }
   1051 
   1052 unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
   1053                                             const MachineInstr *MI,
   1054                                             unsigned *PredCost) const {
   1055   if (PredCost)
   1056     *PredCost = 2;
   1057   return 2;
   1058 }
   1059 
   1060 bool R600InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
   1061 
   1062   switch(MI->getOpcode()) {
   1063   default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
   1064   case AMDGPU::R600_EXTRACT_ELT_V2:
   1065   case AMDGPU::R600_EXTRACT_ELT_V4:
   1066     buildIndirectRead(MI->getParent(), MI, MI->getOperand(0).getReg(),
   1067                       RI.getHWRegIndex(MI->getOperand(1).getReg()), //  Address
   1068                       MI->getOperand(2).getReg(),
   1069                       RI.getHWRegChan(MI->getOperand(1).getReg()));
   1070     break;
   1071   case AMDGPU::R600_INSERT_ELT_V2:
   1072   case AMDGPU::R600_INSERT_ELT_V4:
   1073     buildIndirectWrite(MI->getParent(), MI, MI->getOperand(2).getReg(), // Value
   1074                        RI.getHWRegIndex(MI->getOperand(1).getReg()),  // Address
   1075                        MI->getOperand(3).getReg(),                    // Offset
   1076                        RI.getHWRegChan(MI->getOperand(1).getReg()));  // Channel
   1077     break;
   1078   }
   1079   MI->eraseFromParent();
   1080   return true;
   1081 }
   1082 
   1083 void  R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
   1084                                              const MachineFunction &MF) const {
   1085   const AMDGPUFrameLowering *TFL =
   1086     static_cast<const AMDGPUFrameLowering*>(
   1087     MF.getTarget().getFrameLowering());
   1088 
   1089   unsigned StackWidth = TFL->getStackWidth(MF);
   1090   int End = getIndirectIndexEnd(MF);
   1091 
   1092   if (End == -1)
   1093     return;
   1094 
   1095   for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
   1096     unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
   1097     Reserved.set(SuperReg);
   1098     for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
   1099       unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
   1100       Reserved.set(Reg);
   1101     }
   1102   }
   1103 }
   1104 
   1105 unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
   1106                                                  unsigned Channel) const {
   1107   // XXX: Remove when we support a stack width > 2
   1108   assert(Channel == 0);
   1109   return RegIndex;
   1110 }
   1111 
   1112 const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const {
   1113   return &AMDGPU::R600_TReg32_XRegClass;
   1114 }
   1115 
   1116 MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
   1117                                        MachineBasicBlock::iterator I,
   1118                                        unsigned ValueReg, unsigned Address,
   1119                                        unsigned OffsetReg) const {
   1120   return buildIndirectWrite(MBB, I, ValueReg, Address, OffsetReg, 0);
   1121 }
   1122 
   1123 MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
   1124                                        MachineBasicBlock::iterator I,
   1125                                        unsigned ValueReg, unsigned Address,
   1126                                        unsigned OffsetReg,
   1127                                        unsigned AddrChan) const {
   1128   unsigned AddrReg;
   1129   switch (AddrChan) {
   1130     default: llvm_unreachable("Invalid Channel");
   1131     case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
   1132     case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
   1133     case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
   1134     case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
   1135   }
   1136   MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
   1137                                                AMDGPU::AR_X, OffsetReg);
   1138   setImmOperand(MOVA, AMDGPU::OpName::write, 0);
   1139 
   1140   MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
   1141                                       AddrReg, ValueReg)
   1142                                       .addReg(AMDGPU::AR_X,
   1143                                            RegState::Implicit | RegState::Kill);
   1144   setImmOperand(Mov, AMDGPU::OpName::dst_rel, 1);
   1145   return Mov;
   1146 }
   1147 
   1148 MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
   1149                                        MachineBasicBlock::iterator I,
   1150                                        unsigned ValueReg, unsigned Address,
   1151                                        unsigned OffsetReg) const {
   1152   return buildIndirectRead(MBB, I, ValueReg, Address, OffsetReg, 0);
   1153 }
   1154 
   1155 MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
   1156                                        MachineBasicBlock::iterator I,
   1157                                        unsigned ValueReg, unsigned Address,
   1158                                        unsigned OffsetReg,
   1159                                        unsigned AddrChan) const {
   1160   unsigned AddrReg;
   1161   switch (AddrChan) {
   1162     default: llvm_unreachable("Invalid Channel");
   1163     case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
   1164     case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
   1165     case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
   1166     case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
   1167   }
   1168   MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
   1169                                                        AMDGPU::AR_X,
   1170                                                        OffsetReg);
   1171   setImmOperand(MOVA, AMDGPU::OpName::write, 0);
   1172   MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
   1173                                       ValueReg,
   1174                                       AddrReg)
   1175                                       .addReg(AMDGPU::AR_X,
   1176                                            RegState::Implicit | RegState::Kill);
   1177   setImmOperand(Mov, AMDGPU::OpName::src0_rel, 1);
   1178 
   1179   return Mov;
   1180 }
   1181 
   1182 unsigned R600InstrInfo::getMaxAlusPerClause() const {
   1183   return 115;
   1184 }
   1185 
   1186 MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB,
   1187                                                   MachineBasicBlock::iterator I,
   1188                                                   unsigned Opcode,
   1189                                                   unsigned DstReg,
   1190                                                   unsigned Src0Reg,
   1191                                                   unsigned Src1Reg) const {
   1192   MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode),
   1193     DstReg);           // $dst
   1194 
   1195   if (Src1Reg) {
   1196     MIB.addImm(0)     // $update_exec_mask
   1197        .addImm(0);    // $update_predicate
   1198   }
   1199   MIB.addImm(1)        // $write
   1200      .addImm(0)        // $omod
   1201      .addImm(0)        // $dst_rel
   1202      .addImm(0)        // $dst_clamp
   1203      .addReg(Src0Reg)  // $src0
   1204      .addImm(0)        // $src0_neg
   1205      .addImm(0)        // $src0_rel
   1206      .addImm(0)        // $src0_abs
   1207      .addImm(-1);       // $src0_sel
   1208 
   1209   if (Src1Reg) {
   1210     MIB.addReg(Src1Reg) // $src1
   1211        .addImm(0)       // $src1_neg
   1212        .addImm(0)       // $src1_rel
   1213        .addImm(0)       // $src1_abs
   1214        .addImm(-1);      // $src1_sel
   1215   }
   1216 
   1217   //XXX: The r600g finalizer expects this to be 1, once we've moved the
   1218   //scheduling to the backend, we can change the default to 0.
   1219   MIB.addImm(1)        // $last
   1220       .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
   1221       .addImm(0)         // $literal
   1222       .addImm(0);        // $bank_swizzle
   1223 
   1224   return MIB;
   1225 }
   1226 
   1227 #define OPERAND_CASE(Label) \
   1228   case Label: { \
   1229     static const unsigned Ops[] = \
   1230     { \
   1231       Label##_X, \
   1232       Label##_Y, \
   1233       Label##_Z, \
   1234       Label##_W \
   1235     }; \
   1236     return Ops[Slot]; \
   1237   }
   1238 
   1239 static unsigned getSlotedOps(unsigned  Op, unsigned Slot) {
   1240   switch (Op) {
   1241   OPERAND_CASE(AMDGPU::OpName::update_exec_mask)
   1242   OPERAND_CASE(AMDGPU::OpName::update_pred)
   1243   OPERAND_CASE(AMDGPU::OpName::write)
   1244   OPERAND_CASE(AMDGPU::OpName::omod)
   1245   OPERAND_CASE(AMDGPU::OpName::dst_rel)
   1246   OPERAND_CASE(AMDGPU::OpName::clamp)
   1247   OPERAND_CASE(AMDGPU::OpName::src0)
   1248   OPERAND_CASE(AMDGPU::OpName::src0_neg)
   1249   OPERAND_CASE(AMDGPU::OpName::src0_rel)
   1250   OPERAND_CASE(AMDGPU::OpName::src0_abs)
   1251   OPERAND_CASE(AMDGPU::OpName::src0_sel)
   1252   OPERAND_CASE(AMDGPU::OpName::src1)
   1253   OPERAND_CASE(AMDGPU::OpName::src1_neg)
   1254   OPERAND_CASE(AMDGPU::OpName::src1_rel)
   1255   OPERAND_CASE(AMDGPU::OpName::src1_abs)
   1256   OPERAND_CASE(AMDGPU::OpName::src1_sel)
   1257   OPERAND_CASE(AMDGPU::OpName::pred_sel)
   1258   default:
   1259     llvm_unreachable("Wrong Operand");
   1260   }
   1261 }
   1262 
   1263 #undef OPERAND_CASE
   1264 
   1265 MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
   1266     MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg)
   1267     const {
   1268   assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
   1269   unsigned Opcode;
   1270   if (ST.getGeneration() <= AMDGPUSubtarget::R700)
   1271     Opcode = AMDGPU::DOT4_r600;
   1272   else
   1273     Opcode = AMDGPU::DOT4_eg;
   1274   MachineBasicBlock::iterator I = MI;
   1275   MachineOperand &Src0 = MI->getOperand(
   1276       getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot)));
   1277   MachineOperand &Src1 = MI->getOperand(
   1278       getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot)));
   1279   MachineInstr *MIB = buildDefaultInstruction(
   1280       MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg());
   1281   static const unsigned  Operands[14] = {
   1282     AMDGPU::OpName::update_exec_mask,
   1283     AMDGPU::OpName::update_pred,
   1284     AMDGPU::OpName::write,
   1285     AMDGPU::OpName::omod,
   1286     AMDGPU::OpName::dst_rel,
   1287     AMDGPU::OpName::clamp,
   1288     AMDGPU::OpName::src0_neg,
   1289     AMDGPU::OpName::src0_rel,
   1290     AMDGPU::OpName::src0_abs,
   1291     AMDGPU::OpName::src0_sel,
   1292     AMDGPU::OpName::src1_neg,
   1293     AMDGPU::OpName::src1_rel,
   1294     AMDGPU::OpName::src1_abs,
   1295     AMDGPU::OpName::src1_sel,
   1296   };
   1297 
   1298   MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
   1299       getSlotedOps(AMDGPU::OpName::pred_sel, Slot)));
   1300   MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel))
   1301       .setReg(MO.getReg());
   1302 
   1303   for (unsigned i = 0; i < 14; i++) {
   1304     MachineOperand &MO = MI->getOperand(
   1305         getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot)));
   1306     assert (MO.isImm());
   1307     setImmOperand(MIB, Operands[i], MO.getImm());
   1308   }
   1309   MIB->getOperand(20).setImm(0);
   1310   return MIB;
   1311 }
   1312 
   1313 MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
   1314                                          MachineBasicBlock::iterator I,
   1315                                          unsigned DstReg,
   1316                                          uint64_t Imm) const {
   1317   MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
   1318                                                   AMDGPU::ALU_LITERAL_X);
   1319   setImmOperand(MovImm, AMDGPU::OpName::literal, Imm);
   1320   return MovImm;
   1321 }
   1322 
   1323 MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB,
   1324                                        MachineBasicBlock::iterator I,
   1325                                        unsigned DstReg, unsigned SrcReg) const {
   1326   return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg);
   1327 }
   1328 
   1329 int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
   1330   return getOperandIdx(MI.getOpcode(), Op);
   1331 }
   1332 
   1333 int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const {
   1334   return AMDGPU::getNamedOperandIdx(Opcode, Op);
   1335 }
   1336 
   1337 void R600InstrInfo::setImmOperand(MachineInstr *MI, unsigned Op,
   1338                                   int64_t Imm) const {
   1339   int Idx = getOperandIdx(*MI, Op);
   1340   assert(Idx != -1 && "Operand not supported for this instruction.");
   1341   assert(MI->getOperand(Idx).isImm());
   1342   MI->getOperand(Idx).setImm(Imm);
   1343 }
   1344 
   1345 //===----------------------------------------------------------------------===//
   1346 // Instruction flag getters/setters
   1347 //===----------------------------------------------------------------------===//
   1348 
   1349 bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const {
   1350   return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
   1351 }
   1352 
   1353 MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx,
   1354                                          unsigned Flag) const {
   1355   unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
   1356   int FlagIndex = 0;
   1357   if (Flag != 0) {
   1358     // If we pass something other than the default value of Flag to this
   1359     // function, it means we are want to set a flag on an instruction
   1360     // that uses native encoding.
   1361     assert(HAS_NATIVE_OPERANDS(TargetFlags));
   1362     bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
   1363     switch (Flag) {
   1364     case MO_FLAG_CLAMP:
   1365       FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::clamp);
   1366       break;
   1367     case MO_FLAG_MASK:
   1368       FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::write);
   1369       break;
   1370     case MO_FLAG_NOT_LAST:
   1371     case MO_FLAG_LAST:
   1372       FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::last);
   1373       break;
   1374     case MO_FLAG_NEG:
   1375       switch (SrcIdx) {
   1376       case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_neg); break;
   1377       case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_neg); break;
   1378       case 2: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src2_neg); break;
   1379       }
   1380       break;
   1381 
   1382     case MO_FLAG_ABS:
   1383       assert(!IsOP3 && "Cannot set absolute value modifier for OP3 "
   1384                        "instructions.");
   1385       (void)IsOP3;
   1386       switch (SrcIdx) {
   1387       case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_abs); break;
   1388       case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_abs); break;
   1389       }
   1390       break;
   1391 
   1392     default:
   1393       FlagIndex = -1;
   1394       break;
   1395     }
   1396     assert(FlagIndex != -1 && "Flag not supported for this instruction");
   1397   } else {
   1398       FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags);
   1399       assert(FlagIndex != 0 &&
   1400          "Instruction flags not supported for this instruction");
   1401   }
   1402 
   1403   MachineOperand &FlagOp = MI->getOperand(FlagIndex);
   1404   assert(FlagOp.isImm());
   1405   return FlagOp;
   1406 }
   1407 
   1408 void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand,
   1409                             unsigned Flag) const {
   1410   unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
   1411   if (Flag == 0) {
   1412     return;
   1413   }
   1414   if (HAS_NATIVE_OPERANDS(TargetFlags)) {
   1415     MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
   1416     if (Flag == MO_FLAG_NOT_LAST) {
   1417       clearFlag(MI, Operand, MO_FLAG_LAST);
   1418     } else if (Flag == MO_FLAG_MASK) {
   1419       clearFlag(MI, Operand, Flag);
   1420     } else {
   1421       FlagOp.setImm(1);
   1422     }
   1423   } else {
   1424       MachineOperand &FlagOp = getFlagOp(MI, Operand);
   1425       FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
   1426   }
   1427 }
   1428 
   1429 void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand,
   1430                               unsigned Flag) const {
   1431   unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
   1432   if (HAS_NATIVE_OPERANDS(TargetFlags)) {
   1433     MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
   1434     FlagOp.setImm(0);
   1435   } else {
   1436     MachineOperand &FlagOp = getFlagOp(MI);
   1437     unsigned InstFlags = FlagOp.getImm();
   1438     InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
   1439     FlagOp.setImm(InstFlags);
   1440   }
   1441 }
   1442