Home | History | Annotate | Download | only in R600
      1 //===-- SIInstrInfo.cpp - SI Instruction Information  ---------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// \brief SI Implementation of TargetInstrInfo.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 
     16 #include "SIInstrInfo.h"
     17 #include "AMDGPUTargetMachine.h"
     18 #include "SIDefines.h"
     19 #include "SIMachineFunctionInfo.h"
     20 #include "llvm/CodeGen/MachineInstrBuilder.h"
     21 #include "llvm/CodeGen/MachineRegisterInfo.h"
     22 #include "llvm/IR/Function.h"
     23 #include "llvm/MC/MCInstrDesc.h"
     24 
     25 using namespace llvm;
     26 
     27 SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st)
     28   : AMDGPUInstrInfo(st),
     29     RI(st) { }
     30 
     31 //===----------------------------------------------------------------------===//
     32 // TargetInstrInfo callbacks
     33 //===----------------------------------------------------------------------===//
     34 
     35 void
     36 SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     37                          MachineBasicBlock::iterator MI, DebugLoc DL,
     38                          unsigned DestReg, unsigned SrcReg,
     39                          bool KillSrc) const {
     40 
     41   // If we are trying to copy to or from SCC, there is a bug somewhere else in
     42   // the backend.  While it may be theoretically possible to do this, it should
     43   // never be necessary.
     44   assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
     45 
     46   static const int16_t Sub0_15[] = {
     47     AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
     48     AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
     49     AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
     50     AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0
     51   };
     52 
     53   static const int16_t Sub0_7[] = {
     54     AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
     55     AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0
     56   };
     57 
     58   static const int16_t Sub0_3[] = {
     59     AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
     60   };
     61 
     62   static const int16_t Sub0_2[] = {
     63     AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0
     64   };
     65 
     66   static const int16_t Sub0_1[] = {
     67     AMDGPU::sub0, AMDGPU::sub1, 0
     68   };
     69 
     70   unsigned Opcode;
     71   const int16_t *SubIndices;
     72 
     73   if (AMDGPU::M0 == DestReg) {
     74     // Check if M0 isn't already set to this value
     75     for (MachineBasicBlock::reverse_iterator E = MBB.rend(),
     76       I = MachineBasicBlock::reverse_iterator(MI); I != E; ++I) {
     77 
     78       if (!I->definesRegister(AMDGPU::M0))
     79         continue;
     80 
     81       unsigned Opc = I->getOpcode();
     82       if (Opc != TargetOpcode::COPY && Opc != AMDGPU::S_MOV_B32)
     83         break;
     84 
     85       if (!I->readsRegister(SrcReg))
     86         break;
     87 
     88       // The copy isn't necessary
     89       return;
     90     }
     91   }
     92 
     93   if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
     94     assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
     95     BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
     96             .addReg(SrcReg, getKillRegState(KillSrc));
     97     return;
     98 
     99   } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
    100     assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
    101     BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
    102             .addReg(SrcReg, getKillRegState(KillSrc));
    103     return;
    104 
    105   } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
    106     assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
    107     Opcode = AMDGPU::S_MOV_B32;
    108     SubIndices = Sub0_3;
    109 
    110   } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) {
    111     assert(AMDGPU::SReg_256RegClass.contains(SrcReg));
    112     Opcode = AMDGPU::S_MOV_B32;
    113     SubIndices = Sub0_7;
    114 
    115   } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
    116     assert(AMDGPU::SReg_512RegClass.contains(SrcReg));
    117     Opcode = AMDGPU::S_MOV_B32;
    118     SubIndices = Sub0_15;
    119 
    120   } else if (AMDGPU::VReg_32RegClass.contains(DestReg)) {
    121     assert(AMDGPU::VReg_32RegClass.contains(SrcReg) ||
    122            AMDGPU::SReg_32RegClass.contains(SrcReg));
    123     BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
    124             .addReg(SrcReg, getKillRegState(KillSrc));
    125     return;
    126 
    127   } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
    128     assert(AMDGPU::VReg_64RegClass.contains(SrcReg) ||
    129            AMDGPU::SReg_64RegClass.contains(SrcReg));
    130     Opcode = AMDGPU::V_MOV_B32_e32;
    131     SubIndices = Sub0_1;
    132 
    133   } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) {
    134     assert(AMDGPU::VReg_96RegClass.contains(SrcReg));
    135     Opcode = AMDGPU::V_MOV_B32_e32;
    136     SubIndices = Sub0_2;
    137 
    138   } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
    139     assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
    140            AMDGPU::SReg_128RegClass.contains(SrcReg));
    141     Opcode = AMDGPU::V_MOV_B32_e32;
    142     SubIndices = Sub0_3;
    143 
    144   } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) {
    145     assert(AMDGPU::VReg_256RegClass.contains(SrcReg) ||
    146            AMDGPU::SReg_256RegClass.contains(SrcReg));
    147     Opcode = AMDGPU::V_MOV_B32_e32;
    148     SubIndices = Sub0_7;
    149 
    150   } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) {
    151     assert(AMDGPU::VReg_512RegClass.contains(SrcReg) ||
    152            AMDGPU::SReg_512RegClass.contains(SrcReg));
    153     Opcode = AMDGPU::V_MOV_B32_e32;
    154     SubIndices = Sub0_15;
    155 
    156   } else {
    157     llvm_unreachable("Can't copy register!");
    158   }
    159 
    160   while (unsigned SubIdx = *SubIndices++) {
    161     MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
    162       get(Opcode), RI.getSubReg(DestReg, SubIdx));
    163 
    164     Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc));
    165 
    166     if (*SubIndices)
    167       Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
    168   }
    169 }
    170 
    171 unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
    172   int NewOpc;
    173 
    174   // Try to map original to commuted opcode
    175   if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1)
    176     return NewOpc;
    177 
    178   // Try to map commuted to original opcode
    179   if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1)
    180     return NewOpc;
    181 
    182   return Opcode;
    183 }
    184 
    185 void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
    186                                       MachineBasicBlock::iterator MI,
    187                                       unsigned SrcReg, bool isKill,
    188                                       int FrameIndex,
    189                                       const TargetRegisterClass *RC,
    190                                       const TargetRegisterInfo *TRI) const {
    191   MachineFunction *MF = MBB.getParent();
    192   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
    193   MachineRegisterInfo &MRI = MF->getRegInfo();
    194   DebugLoc DL = MBB.findDebugLoc(MI);
    195   unsigned KillFlag = isKill ? RegState::Kill : 0;
    196 
    197   if (RI.hasVGPRs(RC)) {
    198     LLVMContext &Ctx = MF->getFunction()->getContext();
    199     Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Can't spill VGPR!");
    200     BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0)
    201             .addReg(SrcReg);
    202   } else if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) {
    203     unsigned Lane = MFI->SpillTracker.reserveLanes(MRI, MF);
    204     unsigned TgtReg = MFI->SpillTracker.LaneVGPR;
    205 
    206     BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), TgtReg)
    207             .addReg(SrcReg, KillFlag)
    208             .addImm(Lane);
    209     MFI->SpillTracker.addSpilledReg(FrameIndex, TgtReg, Lane);
    210   } else if (RI.isSGPRClass(RC)) {
    211     // We are only allowed to create one new instruction when spilling
    212     // registers, so we need to use pseudo instruction for vector
    213     // registers.
    214     //
    215     // Reserve a spot in the spill tracker for each sub-register of
    216     // the vector register.
    217     unsigned NumSubRegs = RC->getSize() / 4;
    218     unsigned FirstLane = MFI->SpillTracker.reserveLanes(MRI, MF, NumSubRegs);
    219     MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR,
    220                                     FirstLane);
    221 
    222     unsigned Opcode;
    223     switch (RC->getSize() * 8) {
    224     case 64:  Opcode = AMDGPU::SI_SPILL_S64_SAVE;  break;
    225     case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break;
    226     case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
    227     case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
    228     default: llvm_unreachable("Cannot spill register class");
    229     }
    230 
    231     BuildMI(MBB, MI, DL, get(Opcode), MFI->SpillTracker.LaneVGPR)
    232             .addReg(SrcReg)
    233             .addImm(FrameIndex);
    234   } else {
    235     llvm_unreachable("VGPR spilling not supported");
    236   }
    237 }
    238 
    239 void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
    240                                        MachineBasicBlock::iterator MI,
    241                                        unsigned DestReg, int FrameIndex,
    242                                        const TargetRegisterClass *RC,
    243                                        const TargetRegisterInfo *TRI) const {
    244   MachineFunction *MF = MBB.getParent();
    245   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
    246   DebugLoc DL = MBB.findDebugLoc(MI);
    247 
    248   if (RI.hasVGPRs(RC)) {
    249     LLVMContext &Ctx = MF->getFunction()->getContext();
    250     Ctx.emitError("SIInstrInfo::loadRegToStackSlot - Can't retrieve spilled VGPR!");
    251     BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
    252             .addImm(0);
    253   } else if (RI.isSGPRClass(RC)){
    254     unsigned Opcode;
    255     switch(RC->getSize() * 8) {
    256     case 32:  Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break;
    257     case 64:  Opcode = AMDGPU::SI_SPILL_S64_RESTORE;  break;
    258     case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break;
    259     case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
    260     case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
    261     default: llvm_unreachable("Cannot spill register class");
    262     }
    263 
    264     SIMachineFunctionInfo::SpilledReg Spill =
    265         MFI->SpillTracker.getSpilledReg(FrameIndex);
    266 
    267     BuildMI(MBB, MI, DL, get(Opcode), DestReg)
    268             .addReg(Spill.VGPR)
    269             .addImm(FrameIndex);
    270   } else {
    271     llvm_unreachable("VGPR spilling not supported");
    272   }
    273 }
    274 
    275 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
    276 
    277   switch (Op) {
    278   case AMDGPU::SI_SPILL_S512_SAVE:
    279   case AMDGPU::SI_SPILL_S512_RESTORE:
    280     return 16;
    281   case AMDGPU::SI_SPILL_S256_SAVE:
    282   case AMDGPU::SI_SPILL_S256_RESTORE:
    283     return 8;
    284   case AMDGPU::SI_SPILL_S128_SAVE:
    285   case AMDGPU::SI_SPILL_S128_RESTORE:
    286     return 4;
    287   case AMDGPU::SI_SPILL_S64_SAVE:
    288   case AMDGPU::SI_SPILL_S64_RESTORE:
    289     return 2;
    290   case AMDGPU::SI_SPILL_S32_RESTORE:
    291     return 1;
    292   default: llvm_unreachable("Invalid spill opcode");
    293   }
    294 }
    295 
    296 void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI,
    297                              int Count) const {
    298   while (Count > 0) {
    299     int Arg;
    300     if (Count >= 8)
    301       Arg = 7;
    302     else
    303       Arg = Count - 1;
    304     Count -= 8;
    305     BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::S_NOP))
    306             .addImm(Arg);
    307   }
    308 }
    309 
    310 bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
    311   SIMachineFunctionInfo *MFI =
    312       MI->getParent()->getParent()->getInfo<SIMachineFunctionInfo>();
    313   MachineBasicBlock &MBB = *MI->getParent();
    314   DebugLoc DL = MBB.findDebugLoc(MI);
    315   switch (MI->getOpcode()) {
    316   default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
    317 
    318   // SGPR register spill
    319   case AMDGPU::SI_SPILL_S512_SAVE:
    320   case AMDGPU::SI_SPILL_S256_SAVE:
    321   case AMDGPU::SI_SPILL_S128_SAVE:
    322   case AMDGPU::SI_SPILL_S64_SAVE: {
    323     unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
    324     unsigned FrameIndex = MI->getOperand(2).getImm();
    325 
    326     for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
    327       SIMachineFunctionInfo::SpilledReg Spill;
    328       unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(1).getReg(),
    329                                             &AMDGPU::SGPR_32RegClass, i);
    330       Spill = MFI->SpillTracker.getSpilledReg(FrameIndex);
    331 
    332       BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32),
    333               MI->getOperand(0).getReg())
    334               .addReg(SubReg)
    335               .addImm(Spill.Lane + i);
    336     }
    337     MI->eraseFromParent();
    338     break;
    339   }
    340 
    341   // SGPR register restore
    342   case AMDGPU::SI_SPILL_S512_RESTORE:
    343   case AMDGPU::SI_SPILL_S256_RESTORE:
    344   case AMDGPU::SI_SPILL_S128_RESTORE:
    345   case AMDGPU::SI_SPILL_S64_RESTORE:
    346   case AMDGPU::SI_SPILL_S32_RESTORE: {
    347     unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
    348 
    349     for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
    350       SIMachineFunctionInfo::SpilledReg Spill;
    351       unsigned FrameIndex = MI->getOperand(2).getImm();
    352       unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(0).getReg(),
    353                                    &AMDGPU::SGPR_32RegClass, i);
    354       Spill = MFI->SpillTracker.getSpilledReg(FrameIndex);
    355 
    356       BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), SubReg)
    357               .addReg(MI->getOperand(1).getReg())
    358               .addImm(Spill.Lane + i);
    359     }
    360     insertNOPs(MI, 3);
    361     MI->eraseFromParent();
    362     break;
    363   }
    364   }
    365   return true;
    366 }
    367 
    368 MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
    369                                               bool NewMI) const {
    370 
    371   MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
    372   if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg())
    373     return nullptr;
    374 
    375   // Cannot commute VOP2 if src0 is SGPR.
    376   if (isVOP2(MI->getOpcode()) && MI->getOperand(1).isReg() &&
    377       RI.isSGPRClass(MRI.getRegClass(MI->getOperand(1).getReg())))
    378    return nullptr;
    379 
    380   if (!MI->getOperand(2).isReg()) {
    381     // XXX: Commute instructions with FPImm operands
    382     if (NewMI || MI->getOperand(2).isFPImm() ||
    383        (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) {
    384       return nullptr;
    385     }
    386 
    387     // XXX: Commute VOP3 instructions with abs and neg set.
    388     if (isVOP3(MI->getOpcode()) &&
    389         (MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
    390                         AMDGPU::OpName::abs)).getImm() ||
    391          MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
    392                         AMDGPU::OpName::neg)).getImm()))
    393       return nullptr;
    394 
    395     unsigned Reg = MI->getOperand(1).getReg();
    396     unsigned SubReg = MI->getOperand(1).getSubReg();
    397     MI->getOperand(1).ChangeToImmediate(MI->getOperand(2).getImm());
    398     MI->getOperand(2).ChangeToRegister(Reg, false);
    399     MI->getOperand(2).setSubReg(SubReg);
    400   } else {
    401     MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
    402   }
    403 
    404   if (MI)
    405     MI->setDesc(get(commuteOpcode(MI->getOpcode())));
    406 
    407   return MI;
    408 }
    409 
    410 MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB,
    411                                          MachineBasicBlock::iterator I,
    412                                          unsigned DstReg,
    413                                          unsigned SrcReg) const {
    414   return BuildMI(*MBB, I, MBB->findDebugLoc(I), get(AMDGPU::V_MOV_B32_e32),
    415                  DstReg) .addReg(SrcReg);
    416 }
    417 
    418 bool SIInstrInfo::isMov(unsigned Opcode) const {
    419   switch(Opcode) {
    420   default: return false;
    421   case AMDGPU::S_MOV_B32:
    422   case AMDGPU::S_MOV_B64:
    423   case AMDGPU::V_MOV_B32_e32:
    424   case AMDGPU::V_MOV_B32_e64:
    425     return true;
    426   }
    427 }
    428 
    429 bool
    430 SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
    431   return RC != &AMDGPU::EXECRegRegClass;
    432 }
    433 
    434 bool
    435 SIInstrInfo::isTriviallyReMaterializable(const MachineInstr *MI,
    436                                          AliasAnalysis *AA) const {
    437   switch(MI->getOpcode()) {
    438   default: return AMDGPUInstrInfo::isTriviallyReMaterializable(MI, AA);
    439   case AMDGPU::S_MOV_B32:
    440   case AMDGPU::S_MOV_B64:
    441   case AMDGPU::V_MOV_B32_e32:
    442     return MI->getOperand(1).isImm();
    443   }
    444 }
    445 
    446 namespace llvm {
    447 namespace AMDGPU {
    448 // Helper function generated by tablegen.  We are wrapping this with
    449 // an SIInstrInfo function that reutrns bool rather than int.
    450 int isDS(uint16_t Opcode);
    451 }
    452 }
    453 
    454 bool SIInstrInfo::isDS(uint16_t Opcode) const {
    455   return ::AMDGPU::isDS(Opcode) != -1;
    456 }
    457 
    458 int SIInstrInfo::isMIMG(uint16_t Opcode) const {
    459   return get(Opcode).TSFlags & SIInstrFlags::MIMG;
    460 }
    461 
    462 int SIInstrInfo::isSMRD(uint16_t Opcode) const {
    463   return get(Opcode).TSFlags & SIInstrFlags::SMRD;
    464 }
    465 
    466 bool SIInstrInfo::isVOP1(uint16_t Opcode) const {
    467   return get(Opcode).TSFlags & SIInstrFlags::VOP1;
    468 }
    469 
    470 bool SIInstrInfo::isVOP2(uint16_t Opcode) const {
    471   return get(Opcode).TSFlags & SIInstrFlags::VOP2;
    472 }
    473 
    474 bool SIInstrInfo::isVOP3(uint16_t Opcode) const {
    475   return get(Opcode).TSFlags & SIInstrFlags::VOP3;
    476 }
    477 
    478 bool SIInstrInfo::isVOPC(uint16_t Opcode) const {
    479   return get(Opcode).TSFlags & SIInstrFlags::VOPC;
    480 }
    481 
    482 bool SIInstrInfo::isSALUInstr(const MachineInstr &MI) const {
    483   return get(MI.getOpcode()).TSFlags & SIInstrFlags::SALU;
    484 }
    485 
    486 bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
    487   int32_t Val = Imm.getSExtValue();
    488   if (Val >= -16 && Val <= 64)
    489     return true;
    490 
    491   // The actual type of the operand does not seem to matter as long
    492   // as the bits match one of the inline immediate values.  For example:
    493   //
    494   // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
    495   // so it is a legal inline immediate.
    496   //
    497   // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
    498   // floating-point, so it is a legal inline immediate.
    499 
    500   return (APInt::floatToBits(0.0f) == Imm) ||
    501          (APInt::floatToBits(1.0f) == Imm) ||
    502          (APInt::floatToBits(-1.0f) == Imm) ||
    503          (APInt::floatToBits(0.5f) == Imm) ||
    504          (APInt::floatToBits(-0.5f) == Imm) ||
    505          (APInt::floatToBits(2.0f) == Imm) ||
    506          (APInt::floatToBits(-2.0f) == Imm) ||
    507          (APInt::floatToBits(4.0f) == Imm) ||
    508          (APInt::floatToBits(-4.0f) == Imm);
    509 }
    510 
    511 bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const {
    512   if (MO.isImm())
    513     return isInlineConstant(APInt(32, MO.getImm(), true));
    514 
    515   if (MO.isFPImm()) {
    516     APFloat FpImm = MO.getFPImm()->getValueAPF();
    517     return isInlineConstant(FpImm.bitcastToAPInt());
    518   }
    519 
    520   return false;
    521 }
    522 
    523 bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const {
    524   return (MO.isImm() || MO.isFPImm()) && !isInlineConstant(MO);
    525 }
    526 
    527 static bool compareMachineOp(const MachineOperand &Op0,
    528                              const MachineOperand &Op1) {
    529   if (Op0.getType() != Op1.getType())
    530     return false;
    531 
    532   switch (Op0.getType()) {
    533   case MachineOperand::MO_Register:
    534     return Op0.getReg() == Op1.getReg();
    535   case MachineOperand::MO_Immediate:
    536     return Op0.getImm() == Op1.getImm();
    537   case MachineOperand::MO_FPImmediate:
    538     return Op0.getFPImm() == Op1.getFPImm();
    539   default:
    540     llvm_unreachable("Didn't expect to be comparing these operand types");
    541   }
    542 }
    543 
    544 bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
    545                                     StringRef &ErrInfo) const {
    546   uint16_t Opcode = MI->getOpcode();
    547   int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
    548   int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
    549   int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
    550 
    551   // Make sure the number of operands is correct.
    552   const MCInstrDesc &Desc = get(Opcode);
    553   if (!Desc.isVariadic() &&
    554       Desc.getNumOperands() != MI->getNumExplicitOperands()) {
    555      ErrInfo = "Instruction has wrong number of operands.";
    556      return false;
    557   }
    558 
    559   // Make sure the register classes are correct
    560   for (unsigned i = 0, e = Desc.getNumOperands(); i != e; ++i) {
    561     switch (Desc.OpInfo[i].OperandType) {
    562     case MCOI::OPERAND_REGISTER: {
    563       int RegClass = Desc.OpInfo[i].RegClass;
    564       if (!RI.regClassCanUseImmediate(RegClass) &&
    565           (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm())) {
    566         ErrInfo = "Expected register, but got immediate";
    567         return false;
    568       }
    569     }
    570       break;
    571     case MCOI::OPERAND_IMMEDIATE:
    572       if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFPImm()) {
    573         ErrInfo = "Expected immediate, but got non-immediate";
    574         return false;
    575       }
    576       // Fall-through
    577     default:
    578       continue;
    579     }
    580 
    581     if (!MI->getOperand(i).isReg())
    582       continue;
    583 
    584     int RegClass = Desc.OpInfo[i].RegClass;
    585     if (RegClass != -1) {
    586       unsigned Reg = MI->getOperand(i).getReg();
    587       if (TargetRegisterInfo::isVirtualRegister(Reg))
    588         continue;
    589 
    590       const TargetRegisterClass *RC = RI.getRegClass(RegClass);
    591       if (!RC->contains(Reg)) {
    592         ErrInfo = "Operand has incorrect register class.";
    593         return false;
    594       }
    595     }
    596   }
    597 
    598 
    599   // Verify VOP*
    600   if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) {
    601     unsigned ConstantBusCount = 0;
    602     unsigned SGPRUsed = AMDGPU::NoRegister;
    603     for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
    604       const MachineOperand &MO = MI->getOperand(i);
    605       if (MO.isReg() && MO.isUse() &&
    606           !TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
    607 
    608         // EXEC register uses the constant bus.
    609         if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
    610           ++ConstantBusCount;
    611 
    612         // SGPRs use the constant bus
    613         if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC ||
    614             (!MO.isImplicit() &&
    615             (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
    616             AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) {
    617           if (SGPRUsed != MO.getReg()) {
    618             ++ConstantBusCount;
    619             SGPRUsed = MO.getReg();
    620           }
    621         }
    622       }
    623       // Literal constants use the constant bus.
    624       if (isLiteralConstant(MO))
    625         ++ConstantBusCount;
    626     }
    627     if (ConstantBusCount > 1) {
    628       ErrInfo = "VOP* instruction uses the constant bus more than once";
    629       return false;
    630     }
    631   }
    632 
    633   // Verify SRC1 for VOP2 and VOPC
    634   if (Src1Idx != -1 && (isVOP2(Opcode) || isVOPC(Opcode))) {
    635     const MachineOperand &Src1 = MI->getOperand(Src1Idx);
    636     if (Src1.isImm() || Src1.isFPImm()) {
    637       ErrInfo = "VOP[2C] src1 cannot be an immediate.";
    638       return false;
    639     }
    640   }
    641 
    642   // Verify VOP3
    643   if (isVOP3(Opcode)) {
    644     if (Src0Idx != -1 && isLiteralConstant(MI->getOperand(Src0Idx))) {
    645       ErrInfo = "VOP3 src0 cannot be a literal constant.";
    646       return false;
    647     }
    648     if (Src1Idx != -1 && isLiteralConstant(MI->getOperand(Src1Idx))) {
    649       ErrInfo = "VOP3 src1 cannot be a literal constant.";
    650       return false;
    651     }
    652     if (Src2Idx != -1 && isLiteralConstant(MI->getOperand(Src2Idx))) {
    653       ErrInfo = "VOP3 src2 cannot be a literal constant.";
    654       return false;
    655     }
    656   }
    657 
    658   // Verify misc. restrictions on specific instructions.
    659   if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
    660       Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
    661     MI->dump();
    662 
    663     const MachineOperand &Src0 = MI->getOperand(2);
    664     const MachineOperand &Src1 = MI->getOperand(3);
    665     const MachineOperand &Src2 = MI->getOperand(4);
    666     if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
    667       if (!compareMachineOp(Src0, Src1) &&
    668           !compareMachineOp(Src0, Src2)) {
    669         ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2";
    670         return false;
    671       }
    672     }
    673   }
    674 
    675   return true;
    676 }
    677 
    678 unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
    679   switch (MI.getOpcode()) {
    680   default: return AMDGPU::INSTRUCTION_LIST_END;
    681   case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
    682   case AMDGPU::COPY: return AMDGPU::COPY;
    683   case AMDGPU::PHI: return AMDGPU::PHI;
    684   case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
    685   case AMDGPU::S_MOV_B32:
    686     return MI.getOperand(1).isReg() ?
    687            AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
    688   case AMDGPU::S_ADD_I32: return AMDGPU::V_ADD_I32_e32;
    689   case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
    690   case AMDGPU::S_SUB_I32: return AMDGPU::V_SUB_I32_e32;
    691   case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
    692   case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32;
    693   case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32;
    694   case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32;
    695   case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32;
    696   case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32;
    697   case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32;
    698   case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32;
    699   case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
    700   case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
    701   case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
    702   case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
    703   case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
    704   case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
    705   case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
    706   case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
    707   case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
    708   case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
    709   case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
    710   case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
    711   case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
    712   case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
    713   case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
    714   case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
    715   case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
    716   case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
    717   case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
    718   case AMDGPU::S_LOAD_DWORD_IMM:
    719   case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64;
    720   case AMDGPU::S_LOAD_DWORDX2_IMM:
    721   case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
    722   case AMDGPU::S_LOAD_DWORDX4_IMM:
    723   case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
    724   case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e32;
    725   case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
    726   case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
    727   }
    728 }
    729 
    730 bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const {
    731   return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END;
    732 }
    733 
    734 const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
    735                                                       unsigned OpNo) const {
    736   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
    737   const MCInstrDesc &Desc = get(MI.getOpcode());
    738   if (MI.isVariadic() || OpNo >= Desc.getNumOperands() ||
    739       Desc.OpInfo[OpNo].RegClass == -1)
    740     return MRI.getRegClass(MI.getOperand(OpNo).getReg());
    741 
    742   unsigned RCID = Desc.OpInfo[OpNo].RegClass;
    743   return RI.getRegClass(RCID);
    744 }
    745 
    746 bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
    747   switch (MI.getOpcode()) {
    748   case AMDGPU::COPY:
    749   case AMDGPU::REG_SEQUENCE:
    750   case AMDGPU::PHI:
    751   case AMDGPU::INSERT_SUBREG:
    752     return RI.hasVGPRs(getOpRegClass(MI, 0));
    753   default:
    754     return RI.hasVGPRs(getOpRegClass(MI, OpNo));
    755   }
    756 }
    757 
    758 void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
    759   MachineBasicBlock::iterator I = MI;
    760   MachineOperand &MO = MI->getOperand(OpIdx);
    761   MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
    762   unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass;
    763   const TargetRegisterClass *RC = RI.getRegClass(RCID);
    764   unsigned Opcode = AMDGPU::V_MOV_B32_e32;
    765   if (MO.isReg()) {
    766     Opcode = AMDGPU::COPY;
    767   } else if (RI.isSGPRClass(RC)) {
    768     Opcode = AMDGPU::S_MOV_B32;
    769   }
    770 
    771   const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
    772   unsigned Reg = MRI.createVirtualRegister(VRC);
    773   BuildMI(*MI->getParent(), I, MI->getParent()->findDebugLoc(I), get(Opcode),
    774           Reg).addOperand(MO);
    775   MO.ChangeToRegister(Reg, false);
    776 }
    777 
    778 unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
    779                                          MachineRegisterInfo &MRI,
    780                                          MachineOperand &SuperReg,
    781                                          const TargetRegisterClass *SuperRC,
    782                                          unsigned SubIdx,
    783                                          const TargetRegisterClass *SubRC)
    784                                          const {
    785   assert(SuperReg.isReg());
    786 
    787   unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
    788   unsigned SubReg = MRI.createVirtualRegister(SubRC);
    789 
    790   // Just in case the super register is itself a sub-register, copy it to a new
    791   // value so we don't need to worry about merging its subreg index with the
    792   // SubIdx passed to this function. The register coalescer should be able to
    793   // eliminate this extra copy.
    794   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
    795           NewSuperReg)
    796           .addOperand(SuperReg);
    797 
    798   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
    799           SubReg)
    800           .addReg(NewSuperReg, 0, SubIdx);
    801   return SubReg;
    802 }
    803 
    804 MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
    805   MachineBasicBlock::iterator MII,
    806   MachineRegisterInfo &MRI,
    807   MachineOperand &Op,
    808   const TargetRegisterClass *SuperRC,
    809   unsigned SubIdx,
    810   const TargetRegisterClass *SubRC) const {
    811   if (Op.isImm()) {
    812     // XXX - Is there a better way to do this?
    813     if (SubIdx == AMDGPU::sub0)
    814       return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF);
    815     if (SubIdx == AMDGPU::sub1)
    816       return MachineOperand::CreateImm(Op.getImm() >> 32);
    817 
    818     llvm_unreachable("Unhandled register index for immediate");
    819   }
    820 
    821   unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
    822                                        SubIdx, SubRC);
    823   return MachineOperand::CreateReg(SubReg, false);
    824 }
    825 
    826 unsigned SIInstrInfo::split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist,
    827                                     MachineBasicBlock::iterator MI,
    828                                     MachineRegisterInfo &MRI,
    829                                     const TargetRegisterClass *RC,
    830                                     const MachineOperand &Op) const {
    831   MachineBasicBlock *MBB = MI->getParent();
    832   DebugLoc DL = MI->getDebugLoc();
    833   unsigned LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
    834   unsigned HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
    835   unsigned Dst = MRI.createVirtualRegister(RC);
    836 
    837   MachineInstr *Lo = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
    838                              LoDst)
    839     .addImm(Op.getImm() & 0xFFFFFFFF);
    840   MachineInstr *Hi = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
    841                              HiDst)
    842     .addImm(Op.getImm() >> 32);
    843 
    844   BuildMI(*MBB, MI, DL, get(TargetOpcode::REG_SEQUENCE), Dst)
    845     .addReg(LoDst)
    846     .addImm(AMDGPU::sub0)
    847     .addReg(HiDst)
    848     .addImm(AMDGPU::sub1);
    849 
    850   Worklist.push_back(Lo);
    851   Worklist.push_back(Hi);
    852 
    853   return Dst;
    854 }
    855 
    856 void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
    857   MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
    858   int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
    859                                            AMDGPU::OpName::src0);
    860   int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
    861                                            AMDGPU::OpName::src1);
    862   int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
    863                                            AMDGPU::OpName::src2);
    864 
    865   // Legalize VOP2
    866   if (isVOP2(MI->getOpcode()) && Src1Idx != -1) {
    867     MachineOperand &Src0 = MI->getOperand(Src0Idx);
    868     MachineOperand &Src1 = MI->getOperand(Src1Idx);
    869 
    870     // If the instruction implicitly reads VCC, we can't have any SGPR operands,
    871     // so move any.
    872     bool ReadsVCC = MI->readsRegister(AMDGPU::VCC, &RI);
    873     if (ReadsVCC && Src0.isReg() &&
    874         RI.isSGPRClass(MRI.getRegClass(Src0.getReg()))) {
    875       legalizeOpWithMove(MI, Src0Idx);
    876       return;
    877     }
    878 
    879     if (ReadsVCC && Src1.isReg() &&
    880         RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) {
    881       legalizeOpWithMove(MI, Src1Idx);
    882       return;
    883     }
    884 
    885     // Legalize VOP2 instructions where src1 is not a VGPR. An SGPR input must
    886     // be the first operand, and there can only be one.
    887     if (Src1.isImm() || Src1.isFPImm() ||
    888         (Src1.isReg() && RI.isSGPRClass(MRI.getRegClass(Src1.getReg())))) {
    889       if (MI->isCommutable()) {
    890         if (commuteInstruction(MI))
    891           return;
    892       }
    893       legalizeOpWithMove(MI, Src1Idx);
    894     }
    895   }
    896 
    897   // XXX - Do any VOP3 instructions read VCC?
    898   // Legalize VOP3
    899   if (isVOP3(MI->getOpcode())) {
    900     int VOP3Idx[3] = {Src0Idx, Src1Idx, Src2Idx};
    901     unsigned SGPRReg = AMDGPU::NoRegister;
    902     for (unsigned i = 0; i < 3; ++i) {
    903       int Idx = VOP3Idx[i];
    904       if (Idx == -1)
    905         continue;
    906       MachineOperand &MO = MI->getOperand(Idx);
    907 
    908       if (MO.isReg()) {
    909         if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
    910           continue; // VGPRs are legal
    911 
    912         assert(MO.getReg() != AMDGPU::SCC && "SCC operand to VOP3 instruction");
    913 
    914         if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
    915           SGPRReg = MO.getReg();
    916           // We can use one SGPR in each VOP3 instruction.
    917           continue;
    918         }
    919       } else if (!isLiteralConstant(MO)) {
    920         // If it is not a register and not a literal constant, then it must be
    921         // an inline constant which is always legal.
    922         continue;
    923       }
    924       // If we make it this far, then the operand is not legal and we must
    925       // legalize it.
    926       legalizeOpWithMove(MI, Idx);
    927     }
    928   }
    929 
    930   // Legalize REG_SEQUENCE and PHI
    931   // The register class of the operands much be the same type as the register
    932   // class of the output.
    933   if (MI->getOpcode() == AMDGPU::REG_SEQUENCE ||
    934       MI->getOpcode() == AMDGPU::PHI) {
    935     const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
    936     for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
    937       if (!MI->getOperand(i).isReg() ||
    938           !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
    939         continue;
    940       const TargetRegisterClass *OpRC =
    941               MRI.getRegClass(MI->getOperand(i).getReg());
    942       if (RI.hasVGPRs(OpRC)) {
    943         VRC = OpRC;
    944       } else {
    945         SRC = OpRC;
    946       }
    947     }
    948 
    949     // If any of the operands are VGPR registers, then they all most be
    950     // otherwise we will create illegal VGPR->SGPR copies when legalizing
    951     // them.
    952     if (VRC || !RI.isSGPRClass(getOpRegClass(*MI, 0))) {
    953       if (!VRC) {
    954         assert(SRC);
    955         VRC = RI.getEquivalentVGPRClass(SRC);
    956       }
    957       RC = VRC;
    958     } else {
    959       RC = SRC;
    960     }
    961 
    962     // Update all the operands so they have the same type.
    963     for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
    964       if (!MI->getOperand(i).isReg() ||
    965           !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
    966         continue;
    967       unsigned DstReg = MRI.createVirtualRegister(RC);
    968       MachineBasicBlock *InsertBB;
    969       MachineBasicBlock::iterator Insert;
    970       if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
    971         InsertBB = MI->getParent();
    972         Insert = MI;
    973       } else {
    974         // MI is a PHI instruction.
    975         InsertBB = MI->getOperand(i + 1).getMBB();
    976         Insert = InsertBB->getFirstTerminator();
    977       }
    978       BuildMI(*InsertBB, Insert, MI->getDebugLoc(),
    979               get(AMDGPU::COPY), DstReg)
    980               .addOperand(MI->getOperand(i));
    981       MI->getOperand(i).setReg(DstReg);
    982     }
    983   }
    984 
    985   // Legalize INSERT_SUBREG
    986   // src0 must have the same register class as dst
    987   if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) {
    988     unsigned Dst = MI->getOperand(0).getReg();
    989     unsigned Src0 = MI->getOperand(1).getReg();
    990     const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
    991     const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
    992     if (DstRC != Src0RC) {
    993       MachineBasicBlock &MBB = *MI->getParent();
    994       unsigned NewSrc0 = MRI.createVirtualRegister(DstRC);
    995       BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), NewSrc0)
    996               .addReg(Src0);
    997       MI->getOperand(1).setReg(NewSrc0);
    998     }
    999     return;
   1000   }
   1001 
   1002   // Legalize MUBUF* instructions
   1003   // FIXME: If we start using the non-addr64 instructions for compute, we
   1004   // may need to legalize them here.
   1005 
   1006   int SRsrcIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
   1007                                             AMDGPU::OpName::srsrc);
   1008   int VAddrIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
   1009                                              AMDGPU::OpName::vaddr);
   1010   if (SRsrcIdx != -1 && VAddrIdx != -1) {
   1011     const TargetRegisterClass *VAddrRC =
   1012         RI.getRegClass(get(MI->getOpcode()).OpInfo[VAddrIdx].RegClass);
   1013 
   1014     if(VAddrRC->getSize() == 8 &&
   1015        MRI.getRegClass(MI->getOperand(SRsrcIdx).getReg()) != VAddrRC) {
   1016       // We have a MUBUF instruction that uses a 64-bit vaddr register and
   1017       // srsrc has the incorrect register class.  In order to fix this, we
   1018       // need to extract the pointer from the resource descriptor (srsrc),
   1019       // add it to the value of vadd,  then store the result in the vaddr
   1020       // operand.  Then, we need to set the pointer field of the resource
   1021       // descriptor to zero.
   1022 
   1023       MachineBasicBlock &MBB = *MI->getParent();
   1024       MachineOperand &SRsrcOp = MI->getOperand(SRsrcIdx);
   1025       MachineOperand &VAddrOp = MI->getOperand(VAddrIdx);
   1026       unsigned SRsrcPtrLo, SRsrcPtrHi, VAddrLo, VAddrHi;
   1027       unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
   1028       unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
   1029       unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
   1030       unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
   1031       unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
   1032       unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
   1033       unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
   1034 
   1035       // SRsrcPtrLo = srsrc:sub0
   1036       SRsrcPtrLo = buildExtractSubReg(MI, MRI, SRsrcOp,
   1037           &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
   1038 
   1039       // SRsrcPtrHi = srsrc:sub1
   1040       SRsrcPtrHi = buildExtractSubReg(MI, MRI, SRsrcOp,
   1041           &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
   1042 
   1043       // VAddrLo = vaddr:sub0
   1044       VAddrLo = buildExtractSubReg(MI, MRI, VAddrOp,
   1045           &AMDGPU::VReg_64RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
   1046 
   1047       // VAddrHi = vaddr:sub1
   1048       VAddrHi = buildExtractSubReg(MI, MRI, VAddrOp,
   1049           &AMDGPU::VReg_64RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
   1050 
   1051       // NewVaddrLo = SRsrcPtrLo + VAddrLo
   1052       BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
   1053               NewVAddrLo)
   1054               .addReg(SRsrcPtrLo)
   1055               .addReg(VAddrLo)
   1056               .addReg(AMDGPU::VCC, RegState::Define | RegState::Implicit);
   1057 
   1058       // NewVaddrHi = SRsrcPtrHi + VAddrHi
   1059       BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
   1060               NewVAddrHi)
   1061               .addReg(SRsrcPtrHi)
   1062               .addReg(VAddrHi)
   1063               .addReg(AMDGPU::VCC, RegState::ImplicitDefine)
   1064               .addReg(AMDGPU::VCC, RegState::Implicit);
   1065 
   1066       // NewVaddr = {NewVaddrHi, NewVaddrLo}
   1067       BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
   1068               NewVAddr)
   1069               .addReg(NewVAddrLo)
   1070               .addImm(AMDGPU::sub0)
   1071               .addReg(NewVAddrHi)
   1072               .addImm(AMDGPU::sub1);
   1073 
   1074       // Zero64 = 0
   1075       BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
   1076               Zero64)
   1077               .addImm(0);
   1078 
   1079       // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
   1080       BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
   1081               SRsrcFormatLo)
   1082               .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
   1083 
   1084       // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
   1085       BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
   1086               SRsrcFormatHi)
   1087               .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
   1088 
   1089       // NewSRsrc = {Zero64, SRsrcFormat}
   1090       BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
   1091               NewSRsrc)
   1092               .addReg(Zero64)
   1093               .addImm(AMDGPU::sub0_sub1)
   1094               .addReg(SRsrcFormatLo)
   1095               .addImm(AMDGPU::sub2)
   1096               .addReg(SRsrcFormatHi)
   1097               .addImm(AMDGPU::sub3);
   1098 
   1099       // Update the instruction to use NewVaddr
   1100       MI->getOperand(VAddrIdx).setReg(NewVAddr);
   1101       // Update the instruction to use NewSRsrc
   1102       MI->getOperand(SRsrcIdx).setReg(NewSRsrc);
   1103     }
   1104   }
   1105 }
   1106 
   1107 void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const {
   1108   MachineBasicBlock *MBB = MI->getParent();
   1109   switch (MI->getOpcode()) {
   1110     case AMDGPU::S_LOAD_DWORD_IMM:
   1111     case AMDGPU::S_LOAD_DWORD_SGPR:
   1112     case AMDGPU::S_LOAD_DWORDX2_IMM:
   1113     case AMDGPU::S_LOAD_DWORDX2_SGPR:
   1114     case AMDGPU::S_LOAD_DWORDX4_IMM:
   1115     case AMDGPU::S_LOAD_DWORDX4_SGPR:
   1116       unsigned NewOpcode = getVALUOp(*MI);
   1117       unsigned RegOffset;
   1118       unsigned ImmOffset;
   1119 
   1120       if (MI->getOperand(2).isReg()) {
   1121         RegOffset = MI->getOperand(2).getReg();
   1122         ImmOffset = 0;
   1123       } else {
   1124         assert(MI->getOperand(2).isImm());
   1125         // SMRD instructions take a dword offsets and MUBUF instructions
   1126         // take a byte offset.
   1127         ImmOffset = MI->getOperand(2).getImm() << 2;
   1128         RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
   1129         if (isUInt<12>(ImmOffset)) {
   1130           BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
   1131                   RegOffset)
   1132                   .addImm(0);
   1133         } else {
   1134           BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
   1135                   RegOffset)
   1136                   .addImm(ImmOffset);
   1137           ImmOffset = 0;
   1138         }
   1139       }
   1140 
   1141       unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
   1142       unsigned DWord0 = RegOffset;
   1143       unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
   1144       unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
   1145       unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
   1146 
   1147       BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1)
   1148               .addImm(0);
   1149       BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2)
   1150               .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
   1151       BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3)
   1152               .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
   1153       BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc)
   1154               .addReg(DWord0)
   1155               .addImm(AMDGPU::sub0)
   1156               .addReg(DWord1)
   1157               .addImm(AMDGPU::sub1)
   1158               .addReg(DWord2)
   1159               .addImm(AMDGPU::sub2)
   1160               .addReg(DWord3)
   1161               .addImm(AMDGPU::sub3);
   1162      MI->setDesc(get(NewOpcode));
   1163      if (MI->getOperand(2).isReg()) {
   1164        MI->getOperand(2).setReg(MI->getOperand(1).getReg());
   1165      } else {
   1166        MI->getOperand(2).ChangeToRegister(MI->getOperand(1).getReg(), false);
   1167      }
   1168      MI->getOperand(1).setReg(SRsrc);
   1169      MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
   1170   }
   1171 }
   1172 
   1173 void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
   1174   SmallVector<MachineInstr *, 128> Worklist;
   1175   Worklist.push_back(&TopInst);
   1176 
   1177   while (!Worklist.empty()) {
   1178     MachineInstr *Inst = Worklist.pop_back_val();
   1179     MachineBasicBlock *MBB = Inst->getParent();
   1180     MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
   1181 
   1182     unsigned Opcode = Inst->getOpcode();
   1183     unsigned NewOpcode = getVALUOp(*Inst);
   1184 
   1185     // Handle some special cases
   1186     switch (Opcode) {
   1187     default:
   1188       if (isSMRD(Inst->getOpcode())) {
   1189         moveSMRDToVALU(Inst, MRI);
   1190       }
   1191       break;
   1192     case AMDGPU::S_MOV_B64: {
   1193       DebugLoc DL = Inst->getDebugLoc();
   1194 
   1195       // If the source operand is a register we can replace this with a
   1196       // copy.
   1197       if (Inst->getOperand(1).isReg()) {
   1198         MachineInstr *Copy = BuildMI(*MBB, Inst, DL, get(TargetOpcode::COPY))
   1199           .addOperand(Inst->getOperand(0))
   1200           .addOperand(Inst->getOperand(1));
   1201         Worklist.push_back(Copy);
   1202       } else {
   1203         // Otherwise, we need to split this into two movs, because there is
   1204         // no 64-bit VALU move instruction.
   1205         unsigned Reg = Inst->getOperand(0).getReg();
   1206         unsigned Dst = split64BitImm(Worklist,
   1207                                      Inst,
   1208                                      MRI,
   1209                                      MRI.getRegClass(Reg),
   1210                                      Inst->getOperand(1));
   1211         MRI.replaceRegWith(Reg, Dst);
   1212       }
   1213       Inst->eraseFromParent();
   1214       continue;
   1215     }
   1216     case AMDGPU::S_AND_B64:
   1217       splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32);
   1218       Inst->eraseFromParent();
   1219       continue;
   1220 
   1221     case AMDGPU::S_OR_B64:
   1222       splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32);
   1223       Inst->eraseFromParent();
   1224       continue;
   1225 
   1226     case AMDGPU::S_XOR_B64:
   1227       splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32);
   1228       Inst->eraseFromParent();
   1229       continue;
   1230 
   1231     case AMDGPU::S_NOT_B64:
   1232       splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
   1233       Inst->eraseFromParent();
   1234       continue;
   1235 
   1236     case AMDGPU::S_BCNT1_I32_B64:
   1237       splitScalar64BitBCNT(Worklist, Inst);
   1238       Inst->eraseFromParent();
   1239       continue;
   1240 
   1241     case AMDGPU::S_BFE_U64:
   1242     case AMDGPU::S_BFE_I64:
   1243     case AMDGPU::S_BFM_B64:
   1244       llvm_unreachable("Moving this op to VALU not implemented");
   1245     }
   1246 
   1247     if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
   1248       // We cannot move this instruction to the VALU, so we should try to
   1249       // legalize its operands instead.
   1250       legalizeOperands(Inst);
   1251       continue;
   1252     }
   1253 
   1254     // Use the new VALU Opcode.
   1255     const MCInstrDesc &NewDesc = get(NewOpcode);
   1256     Inst->setDesc(NewDesc);
   1257 
   1258     // Remove any references to SCC. Vector instructions can't read from it, and
   1259     // We're just about to add the implicit use / defs of VCC, and we don't want
   1260     // both.
   1261     for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) {
   1262       MachineOperand &Op = Inst->getOperand(i);
   1263       if (Op.isReg() && Op.getReg() == AMDGPU::SCC)
   1264         Inst->RemoveOperand(i);
   1265     }
   1266 
   1267     if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
   1268       // We are converting these to a BFE, so we need to add the missing
   1269       // operands for the size and offset.
   1270       unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
   1271       Inst->addOperand(Inst->getOperand(1));
   1272       Inst->getOperand(1).ChangeToImmediate(0);
   1273       Inst->addOperand(MachineOperand::CreateImm(0));
   1274       Inst->addOperand(MachineOperand::CreateImm(0));
   1275       Inst->addOperand(MachineOperand::CreateImm(0));
   1276       Inst->addOperand(MachineOperand::CreateImm(Size));
   1277 
   1278       // XXX - Other pointless operands. There are 4, but it seems you only need
   1279       // 3 to not hit an assertion later in MCInstLower.
   1280       Inst->addOperand(MachineOperand::CreateImm(0));
   1281       Inst->addOperand(MachineOperand::CreateImm(0));
   1282     } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
   1283       // The VALU version adds the second operand to the result, so insert an
   1284       // extra 0 operand.
   1285       Inst->addOperand(MachineOperand::CreateImm(0));
   1286     }
   1287 
   1288     addDescImplicitUseDef(NewDesc, Inst);
   1289 
   1290     if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
   1291       const MachineOperand &OffsetWidthOp = Inst->getOperand(2);
   1292       // If we need to move this to VGPRs, we need to unpack the second operand
   1293       // back into the 2 separate ones for bit offset and width.
   1294       assert(OffsetWidthOp.isImm() &&
   1295              "Scalar BFE is only implemented for constant width and offset");
   1296       uint32_t Imm = OffsetWidthOp.getImm();
   1297 
   1298       uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
   1299       uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
   1300 
   1301       Inst->RemoveOperand(2); // Remove old immediate.
   1302       Inst->addOperand(Inst->getOperand(1));
   1303       Inst->getOperand(1).ChangeToImmediate(0);
   1304       Inst->addOperand(MachineOperand::CreateImm(0));
   1305       Inst->addOperand(MachineOperand::CreateImm(Offset));
   1306       Inst->addOperand(MachineOperand::CreateImm(0));
   1307       Inst->addOperand(MachineOperand::CreateImm(BitWidth));
   1308       Inst->addOperand(MachineOperand::CreateImm(0));
   1309       Inst->addOperand(MachineOperand::CreateImm(0));
   1310     }
   1311 
   1312     // Update the destination register class.
   1313 
   1314     const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0);
   1315 
   1316     switch (Opcode) {
   1317       // For target instructions, getOpRegClass just returns the virtual
   1318       // register class associated with the operand, so we need to find an
   1319       // equivalent VGPR register class in order to move the instruction to the
   1320       // VALU.
   1321     case AMDGPU::COPY:
   1322     case AMDGPU::PHI:
   1323     case AMDGPU::REG_SEQUENCE:
   1324     case AMDGPU::INSERT_SUBREG:
   1325       if (RI.hasVGPRs(NewDstRC))
   1326         continue;
   1327       NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
   1328       if (!NewDstRC)
   1329         continue;
   1330       break;
   1331     default:
   1332       break;
   1333     }
   1334 
   1335     unsigned DstReg = Inst->getOperand(0).getReg();
   1336     unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
   1337     MRI.replaceRegWith(DstReg, NewDstReg);
   1338 
   1339     // Legalize the operands
   1340     legalizeOperands(Inst);
   1341 
   1342     for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
   1343            E = MRI.use_end(); I != E; ++I) {
   1344       MachineInstr &UseMI = *I->getParent();
   1345       if (!canReadVGPR(UseMI, I.getOperandNo())) {
   1346         Worklist.push_back(&UseMI);
   1347       }
   1348     }
   1349   }
   1350 }
   1351 
   1352 //===----------------------------------------------------------------------===//
   1353 // Indirect addressing callbacks
   1354 //===----------------------------------------------------------------------===//
   1355 
   1356 unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex,
   1357                                                  unsigned Channel) const {
   1358   assert(Channel == 0);
   1359   return RegIndex;
   1360 }
   1361 
   1362 const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
   1363   return &AMDGPU::VReg_32RegClass;
   1364 }
   1365 
   1366 void SIInstrInfo::splitScalar64BitUnaryOp(
   1367   SmallVectorImpl<MachineInstr *> &Worklist,
   1368   MachineInstr *Inst,
   1369   unsigned Opcode) const {
   1370   MachineBasicBlock &MBB = *Inst->getParent();
   1371   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
   1372 
   1373   MachineOperand &Dest = Inst->getOperand(0);
   1374   MachineOperand &Src0 = Inst->getOperand(1);
   1375   DebugLoc DL = Inst->getDebugLoc();
   1376 
   1377   MachineBasicBlock::iterator MII = Inst;
   1378 
   1379   const MCInstrDesc &InstDesc = get(Opcode);
   1380   const TargetRegisterClass *Src0RC = Src0.isReg() ?
   1381     MRI.getRegClass(Src0.getReg()) :
   1382     &AMDGPU::SGPR_32RegClass;
   1383 
   1384   const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
   1385 
   1386   MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
   1387                                                        AMDGPU::sub0, Src0SubRC);
   1388 
   1389   const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
   1390   const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
   1391 
   1392   unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
   1393   MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
   1394     .addOperand(SrcReg0Sub0);
   1395 
   1396   MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
   1397                                                        AMDGPU::sub1, Src0SubRC);
   1398 
   1399   unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
   1400   MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
   1401     .addOperand(SrcReg0Sub1);
   1402 
   1403   unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
   1404   BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
   1405     .addReg(DestSub0)
   1406     .addImm(AMDGPU::sub0)
   1407     .addReg(DestSub1)
   1408     .addImm(AMDGPU::sub1);
   1409 
   1410   MRI.replaceRegWith(Dest.getReg(), FullDestReg);
   1411 
   1412   // Try to legalize the operands in case we need to swap the order to keep it
   1413   // valid.
   1414   Worklist.push_back(LoHalf);
   1415   Worklist.push_back(HiHalf);
   1416 }
   1417 
   1418 void SIInstrInfo::splitScalar64BitBinaryOp(
   1419   SmallVectorImpl<MachineInstr *> &Worklist,
   1420   MachineInstr *Inst,
   1421   unsigned Opcode) const {
   1422   MachineBasicBlock &MBB = *Inst->getParent();
   1423   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
   1424 
   1425   MachineOperand &Dest = Inst->getOperand(0);
   1426   MachineOperand &Src0 = Inst->getOperand(1);
   1427   MachineOperand &Src1 = Inst->getOperand(2);
   1428   DebugLoc DL = Inst->getDebugLoc();
   1429 
   1430   MachineBasicBlock::iterator MII = Inst;
   1431 
   1432   const MCInstrDesc &InstDesc = get(Opcode);
   1433   const TargetRegisterClass *Src0RC = Src0.isReg() ?
   1434     MRI.getRegClass(Src0.getReg()) :
   1435     &AMDGPU::SGPR_32RegClass;
   1436 
   1437   const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
   1438   const TargetRegisterClass *Src1RC = Src1.isReg() ?
   1439     MRI.getRegClass(Src1.getReg()) :
   1440     &AMDGPU::SGPR_32RegClass;
   1441 
   1442   const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
   1443 
   1444   MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
   1445                                                        AMDGPU::sub0, Src0SubRC);
   1446   MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
   1447                                                        AMDGPU::sub0, Src1SubRC);
   1448 
   1449   const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
   1450   const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
   1451 
   1452   unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
   1453   MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
   1454     .addOperand(SrcReg0Sub0)
   1455     .addOperand(SrcReg1Sub0);
   1456 
   1457   MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
   1458                                                        AMDGPU::sub1, Src0SubRC);
   1459   MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
   1460                                                        AMDGPU::sub1, Src1SubRC);
   1461 
   1462   unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
   1463   MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
   1464     .addOperand(SrcReg0Sub1)
   1465     .addOperand(SrcReg1Sub1);
   1466 
   1467   unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
   1468   BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
   1469     .addReg(DestSub0)
   1470     .addImm(AMDGPU::sub0)
   1471     .addReg(DestSub1)
   1472     .addImm(AMDGPU::sub1);
   1473 
   1474   MRI.replaceRegWith(Dest.getReg(), FullDestReg);
   1475 
   1476   // Try to legalize the operands in case we need to swap the order to keep it
   1477   // valid.
   1478   Worklist.push_back(LoHalf);
   1479   Worklist.push_back(HiHalf);
   1480 }
   1481 
   1482 void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
   1483                                        MachineInstr *Inst) const {
   1484   MachineBasicBlock &MBB = *Inst->getParent();
   1485   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
   1486 
   1487   MachineBasicBlock::iterator MII = Inst;
   1488   DebugLoc DL = Inst->getDebugLoc();
   1489 
   1490   MachineOperand &Dest = Inst->getOperand(0);
   1491   MachineOperand &Src = Inst->getOperand(1);
   1492 
   1493   const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e32);
   1494   const TargetRegisterClass *SrcRC = Src.isReg() ?
   1495     MRI.getRegClass(Src.getReg()) :
   1496     &AMDGPU::SGPR_32RegClass;
   1497 
   1498   unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
   1499   unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
   1500 
   1501   const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
   1502 
   1503   MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
   1504                                                       AMDGPU::sub0, SrcSubRC);
   1505   MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
   1506                                                       AMDGPU::sub1, SrcSubRC);
   1507 
   1508   MachineInstr *First = BuildMI(MBB, MII, DL, InstDesc, MidReg)
   1509     .addOperand(SrcRegSub0)
   1510     .addImm(0);
   1511 
   1512   MachineInstr *Second = BuildMI(MBB, MII, DL, InstDesc, ResultReg)
   1513     .addOperand(SrcRegSub1)
   1514     .addReg(MidReg);
   1515 
   1516   MRI.replaceRegWith(Dest.getReg(), ResultReg);
   1517 
   1518   Worklist.push_back(First);
   1519   Worklist.push_back(Second);
   1520 }
   1521 
   1522 void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc,
   1523                                         MachineInstr *Inst) const {
   1524   // Add the implict and explicit register definitions.
   1525   if (NewDesc.ImplicitUses) {
   1526     for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) {
   1527       unsigned Reg = NewDesc.ImplicitUses[i];
   1528       Inst->addOperand(MachineOperand::CreateReg(Reg, false, true));
   1529     }
   1530   }
   1531 
   1532   if (NewDesc.ImplicitDefs) {
   1533     for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) {
   1534       unsigned Reg = NewDesc.ImplicitDefs[i];
   1535       Inst->addOperand(MachineOperand::CreateReg(Reg, true, true));
   1536     }
   1537   }
   1538 }
   1539 
   1540 MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
   1541                                    MachineBasicBlock *MBB,
   1542                                    MachineBasicBlock::iterator I,
   1543                                    unsigned ValueReg,
   1544                                    unsigned Address, unsigned OffsetReg) const {
   1545   const DebugLoc &DL = MBB->findDebugLoc(I);
   1546   unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister(
   1547                                       getIndirectIndexBegin(*MBB->getParent()));
   1548 
   1549   return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1))
   1550           .addReg(IndirectBaseReg, RegState::Define)
   1551           .addOperand(I->getOperand(0))
   1552           .addReg(IndirectBaseReg)
   1553           .addReg(OffsetReg)
   1554           .addImm(0)
   1555           .addReg(ValueReg);
   1556 }
   1557 
   1558 MachineInstrBuilder SIInstrInfo::buildIndirectRead(
   1559                                    MachineBasicBlock *MBB,
   1560                                    MachineBasicBlock::iterator I,
   1561                                    unsigned ValueReg,
   1562                                    unsigned Address, unsigned OffsetReg) const {
   1563   const DebugLoc &DL = MBB->findDebugLoc(I);
   1564   unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister(
   1565                                       getIndirectIndexBegin(*MBB->getParent()));
   1566 
   1567   return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC))
   1568           .addOperand(I->getOperand(0))
   1569           .addOperand(I->getOperand(1))
   1570           .addReg(IndirectBaseReg)
   1571           .addReg(OffsetReg)
   1572           .addImm(0);
   1573 
   1574 }
   1575 
   1576 void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved,
   1577                                             const MachineFunction &MF) const {
   1578   int End = getIndirectIndexEnd(MF);
   1579   int Begin = getIndirectIndexBegin(MF);
   1580 
   1581   if (End == -1)
   1582     return;
   1583 
   1584 
   1585   for (int Index = Begin; Index <= End; ++Index)
   1586     Reserved.set(AMDGPU::VReg_32RegClass.getRegister(Index));
   1587 
   1588   for (int Index = std::max(0, Begin - 1); Index <= End; ++Index)
   1589     Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index));
   1590 
   1591   for (int Index = std::max(0, Begin - 2); Index <= End; ++Index)
   1592     Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index));
   1593 
   1594   for (int Index = std::max(0, Begin - 3); Index <= End; ++Index)
   1595     Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index));
   1596 
   1597   for (int Index = std::max(0, Begin - 7); Index <= End; ++Index)
   1598     Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index));
   1599 
   1600   for (int Index = std::max(0, Begin - 15); Index <= End; ++Index)
   1601     Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index));
   1602 }
   1603