Home | History | Annotate | Download | only in AArch64
      1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains the AArch64 implementation of the TargetInstrInfo class.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "AArch64InstrInfo.h"
     15 #include "AArch64MachineCombinerPattern.h"
     16 #include "AArch64Subtarget.h"
     17 #include "MCTargetDesc/AArch64AddressingModes.h"
     18 #include "llvm/CodeGen/MachineFrameInfo.h"
     19 #include "llvm/CodeGen/MachineInstrBuilder.h"
     20 #include "llvm/CodeGen/MachineMemOperand.h"
     21 #include "llvm/CodeGen/MachineRegisterInfo.h"
     22 #include "llvm/CodeGen/PseudoSourceValue.h"
     23 #include "llvm/MC/MCInst.h"
     24 #include "llvm/Support/ErrorHandling.h"
     25 #include "llvm/Support/TargetRegistry.h"
     26 
     27 using namespace llvm;
     28 
     29 #define GET_INSTRINFO_CTOR_DTOR
     30 #include "AArch64GenInstrInfo.inc"
     31 
     32 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
     33     : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
     34       RI(STI.getTargetTriple()), Subtarget(STI) {}
     35 
     36 /// GetInstSize - Return the number of bytes of code the specified
     37 /// instruction may be.  This returns the maximum number of bytes.
     38 unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
     39   const MachineBasicBlock &MBB = *MI->getParent();
     40   const MachineFunction *MF = MBB.getParent();
     41   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
     42 
     43   if (MI->getOpcode() == AArch64::INLINEASM)
     44     return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
     45 
     46   const MCInstrDesc &Desc = MI->getDesc();
     47   switch (Desc.getOpcode()) {
     48   default:
     49     // Anything not explicitly designated otherwise is a nomal 4-byte insn.
     50     return 4;
     51   case TargetOpcode::DBG_VALUE:
     52   case TargetOpcode::EH_LABEL:
     53   case TargetOpcode::IMPLICIT_DEF:
     54   case TargetOpcode::KILL:
     55     return 0;
     56   }
     57 
     58   llvm_unreachable("GetInstSizeInBytes()- Unable to determin insn size");
     59 }
     60 
     61 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
     62                             SmallVectorImpl<MachineOperand> &Cond) {
     63   // Block ends with fall-through condbranch.
     64   switch (LastInst->getOpcode()) {
     65   default:
     66     llvm_unreachable("Unknown branch instruction?");
     67   case AArch64::Bcc:
     68     Target = LastInst->getOperand(1).getMBB();
     69     Cond.push_back(LastInst->getOperand(0));
     70     break;
     71   case AArch64::CBZW:
     72   case AArch64::CBZX:
     73   case AArch64::CBNZW:
     74   case AArch64::CBNZX:
     75     Target = LastInst->getOperand(1).getMBB();
     76     Cond.push_back(MachineOperand::CreateImm(-1));
     77     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
     78     Cond.push_back(LastInst->getOperand(0));
     79     break;
     80   case AArch64::TBZW:
     81   case AArch64::TBZX:
     82   case AArch64::TBNZW:
     83   case AArch64::TBNZX:
     84     Target = LastInst->getOperand(2).getMBB();
     85     Cond.push_back(MachineOperand::CreateImm(-1));
     86     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
     87     Cond.push_back(LastInst->getOperand(0));
     88     Cond.push_back(LastInst->getOperand(1));
     89   }
     90 }
     91 
     92 // Branch analysis.
     93 bool AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
     94                                    MachineBasicBlock *&TBB,
     95                                    MachineBasicBlock *&FBB,
     96                                    SmallVectorImpl<MachineOperand> &Cond,
     97                                    bool AllowModify) const {
     98   // If the block has no terminators, it just falls into the block after it.
     99   MachineBasicBlock::iterator I = MBB.end();
    100   if (I == MBB.begin())
    101     return false;
    102   --I;
    103   while (I->isDebugValue()) {
    104     if (I == MBB.begin())
    105       return false;
    106     --I;
    107   }
    108   if (!isUnpredicatedTerminator(I))
    109     return false;
    110 
    111   // Get the last instruction in the block.
    112   MachineInstr *LastInst = I;
    113 
    114   // If there is only one terminator instruction, process it.
    115   unsigned LastOpc = LastInst->getOpcode();
    116   if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
    117     if (isUncondBranchOpcode(LastOpc)) {
    118       TBB = LastInst->getOperand(0).getMBB();
    119       return false;
    120     }
    121     if (isCondBranchOpcode(LastOpc)) {
    122       // Block ends with fall-through condbranch.
    123       parseCondBranch(LastInst, TBB, Cond);
    124       return false;
    125     }
    126     return true; // Can't handle indirect branch.
    127   }
    128 
    129   // Get the instruction before it if it is a terminator.
    130   MachineInstr *SecondLastInst = I;
    131   unsigned SecondLastOpc = SecondLastInst->getOpcode();
    132 
    133   // If AllowModify is true and the block ends with two or more unconditional
    134   // branches, delete all but the first unconditional branch.
    135   if (AllowModify && isUncondBranchOpcode(LastOpc)) {
    136     while (isUncondBranchOpcode(SecondLastOpc)) {
    137       LastInst->eraseFromParent();
    138       LastInst = SecondLastInst;
    139       LastOpc = LastInst->getOpcode();
    140       if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
    141         // Return now the only terminator is an unconditional branch.
    142         TBB = LastInst->getOperand(0).getMBB();
    143         return false;
    144       } else {
    145         SecondLastInst = I;
    146         SecondLastOpc = SecondLastInst->getOpcode();
    147       }
    148     }
    149   }
    150 
    151   // If there are three terminators, we don't know what sort of block this is.
    152   if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
    153     return true;
    154 
    155   // If the block ends with a B and a Bcc, handle it.
    156   if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
    157     parseCondBranch(SecondLastInst, TBB, Cond);
    158     FBB = LastInst->getOperand(0).getMBB();
    159     return false;
    160   }
    161 
    162   // If the block ends with two unconditional branches, handle it.  The second
    163   // one is not executed, so remove it.
    164   if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
    165     TBB = SecondLastInst->getOperand(0).getMBB();
    166     I = LastInst;
    167     if (AllowModify)
    168       I->eraseFromParent();
    169     return false;
    170   }
    171 
    172   // ...likewise if it ends with an indirect branch followed by an unconditional
    173   // branch.
    174   if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
    175     I = LastInst;
    176     if (AllowModify)
    177       I->eraseFromParent();
    178     return true;
    179   }
    180 
    181   // Otherwise, can't handle this.
    182   return true;
    183 }
    184 
    185 bool AArch64InstrInfo::ReverseBranchCondition(
    186     SmallVectorImpl<MachineOperand> &Cond) const {
    187   if (Cond[0].getImm() != -1) {
    188     // Regular Bcc
    189     AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
    190     Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
    191   } else {
    192     // Folded compare-and-branch
    193     switch (Cond[1].getImm()) {
    194     default:
    195       llvm_unreachable("Unknown conditional branch!");
    196     case AArch64::CBZW:
    197       Cond[1].setImm(AArch64::CBNZW);
    198       break;
    199     case AArch64::CBNZW:
    200       Cond[1].setImm(AArch64::CBZW);
    201       break;
    202     case AArch64::CBZX:
    203       Cond[1].setImm(AArch64::CBNZX);
    204       break;
    205     case AArch64::CBNZX:
    206       Cond[1].setImm(AArch64::CBZX);
    207       break;
    208     case AArch64::TBZW:
    209       Cond[1].setImm(AArch64::TBNZW);
    210       break;
    211     case AArch64::TBNZW:
    212       Cond[1].setImm(AArch64::TBZW);
    213       break;
    214     case AArch64::TBZX:
    215       Cond[1].setImm(AArch64::TBNZX);
    216       break;
    217     case AArch64::TBNZX:
    218       Cond[1].setImm(AArch64::TBZX);
    219       break;
    220     }
    221   }
    222 
    223   return false;
    224 }
    225 
    226 unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
    227   MachineBasicBlock::iterator I = MBB.end();
    228   if (I == MBB.begin())
    229     return 0;
    230   --I;
    231   while (I->isDebugValue()) {
    232     if (I == MBB.begin())
    233       return 0;
    234     --I;
    235   }
    236   if (!isUncondBranchOpcode(I->getOpcode()) &&
    237       !isCondBranchOpcode(I->getOpcode()))
    238     return 0;
    239 
    240   // Remove the branch.
    241   I->eraseFromParent();
    242 
    243   I = MBB.end();
    244 
    245   if (I == MBB.begin())
    246     return 1;
    247   --I;
    248   if (!isCondBranchOpcode(I->getOpcode()))
    249     return 1;
    250 
    251   // Remove the branch.
    252   I->eraseFromParent();
    253   return 2;
    254 }
    255 
    256 void AArch64InstrInfo::instantiateCondBranch(
    257     MachineBasicBlock &MBB, DebugLoc DL, MachineBasicBlock *TBB,
    258     const SmallVectorImpl<MachineOperand> &Cond) const {
    259   if (Cond[0].getImm() != -1) {
    260     // Regular Bcc
    261     BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
    262   } else {
    263     // Folded compare-and-branch
    264     // Note that we use addOperand instead of addReg to keep the flags.
    265     const MachineInstrBuilder MIB =
    266         BuildMI(&MBB, DL, get(Cond[1].getImm())).addOperand(Cond[2]);
    267     if (Cond.size() > 3)
    268       MIB.addImm(Cond[3].getImm());
    269     MIB.addMBB(TBB);
    270   }
    271 }
    272 
    273 unsigned AArch64InstrInfo::InsertBranch(
    274     MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
    275     const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const {
    276   // Shouldn't be a fall through.
    277   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
    278 
    279   if (!FBB) {
    280     if (Cond.empty()) // Unconditional branch?
    281       BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
    282     else
    283       instantiateCondBranch(MBB, DL, TBB, Cond);
    284     return 1;
    285   }
    286 
    287   // Two-way conditional branch.
    288   instantiateCondBranch(MBB, DL, TBB, Cond);
    289   BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
    290   return 2;
    291 }
    292 
    293 // Find the original register that VReg is copied from.
    294 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
    295   while (TargetRegisterInfo::isVirtualRegister(VReg)) {
    296     const MachineInstr *DefMI = MRI.getVRegDef(VReg);
    297     if (!DefMI->isFullCopy())
    298       return VReg;
    299     VReg = DefMI->getOperand(1).getReg();
    300   }
    301   return VReg;
    302 }
    303 
    304 // Determine if VReg is defined by an instruction that can be folded into a
    305 // csel instruction. If so, return the folded opcode, and the replacement
    306 // register.
    307 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
    308                                 unsigned *NewVReg = nullptr) {
    309   VReg = removeCopies(MRI, VReg);
    310   if (!TargetRegisterInfo::isVirtualRegister(VReg))
    311     return 0;
    312 
    313   bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
    314   const MachineInstr *DefMI = MRI.getVRegDef(VReg);
    315   unsigned Opc = 0;
    316   unsigned SrcOpNum = 0;
    317   switch (DefMI->getOpcode()) {
    318   case AArch64::ADDSXri:
    319   case AArch64::ADDSWri:
    320     // if NZCV is used, do not fold.
    321     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
    322       return 0;
    323   // fall-through to ADDXri and ADDWri.
    324   case AArch64::ADDXri:
    325   case AArch64::ADDWri:
    326     // add x, 1 -> csinc.
    327     if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
    328         DefMI->getOperand(3).getImm() != 0)
    329       return 0;
    330     SrcOpNum = 1;
    331     Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
    332     break;
    333 
    334   case AArch64::ORNXrr:
    335   case AArch64::ORNWrr: {
    336     // not x -> csinv, represented as orn dst, xzr, src.
    337     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
    338     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
    339       return 0;
    340     SrcOpNum = 2;
    341     Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
    342     break;
    343   }
    344 
    345   case AArch64::SUBSXrr:
    346   case AArch64::SUBSWrr:
    347     // if NZCV is used, do not fold.
    348     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
    349       return 0;
    350   // fall-through to SUBXrr and SUBWrr.
    351   case AArch64::SUBXrr:
    352   case AArch64::SUBWrr: {
    353     // neg x -> csneg, represented as sub dst, xzr, src.
    354     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
    355     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
    356       return 0;
    357     SrcOpNum = 2;
    358     Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
    359     break;
    360   }
    361   default:
    362     return 0;
    363   }
    364   assert(Opc && SrcOpNum && "Missing parameters");
    365 
    366   if (NewVReg)
    367     *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
    368   return Opc;
    369 }
    370 
    371 bool AArch64InstrInfo::canInsertSelect(
    372     const MachineBasicBlock &MBB, const SmallVectorImpl<MachineOperand> &Cond,
    373     unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles,
    374     int &FalseCycles) const {
    375   // Check register classes.
    376   const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
    377   const TargetRegisterClass *RC =
    378       RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
    379   if (!RC)
    380     return false;
    381 
    382   // Expanding cbz/tbz requires an extra cycle of latency on the condition.
    383   unsigned ExtraCondLat = Cond.size() != 1;
    384 
    385   // GPRs are handled by csel.
    386   // FIXME: Fold in x+1, -x, and ~x when applicable.
    387   if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
    388       AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
    389     // Single-cycle csel, csinc, csinv, and csneg.
    390     CondCycles = 1 + ExtraCondLat;
    391     TrueCycles = FalseCycles = 1;
    392     if (canFoldIntoCSel(MRI, TrueReg))
    393       TrueCycles = 0;
    394     else if (canFoldIntoCSel(MRI, FalseReg))
    395       FalseCycles = 0;
    396     return true;
    397   }
    398 
    399   // Scalar floating point is handled by fcsel.
    400   // FIXME: Form fabs, fmin, and fmax when applicable.
    401   if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
    402       AArch64::FPR32RegClass.hasSubClassEq(RC)) {
    403     CondCycles = 5 + ExtraCondLat;
    404     TrueCycles = FalseCycles = 2;
    405     return true;
    406   }
    407 
    408   // Can't do vectors.
    409   return false;
    410 }
    411 
    412 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
    413                                     MachineBasicBlock::iterator I, DebugLoc DL,
    414                                     unsigned DstReg,
    415                                     const SmallVectorImpl<MachineOperand> &Cond,
    416                                     unsigned TrueReg, unsigned FalseReg) const {
    417   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
    418 
    419   // Parse the condition code, see parseCondBranch() above.
    420   AArch64CC::CondCode CC;
    421   switch (Cond.size()) {
    422   default:
    423     llvm_unreachable("Unknown condition opcode in Cond");
    424   case 1: // b.cc
    425     CC = AArch64CC::CondCode(Cond[0].getImm());
    426     break;
    427   case 3: { // cbz/cbnz
    428     // We must insert a compare against 0.
    429     bool Is64Bit;
    430     switch (Cond[1].getImm()) {
    431     default:
    432       llvm_unreachable("Unknown branch opcode in Cond");
    433     case AArch64::CBZW:
    434       Is64Bit = 0;
    435       CC = AArch64CC::EQ;
    436       break;
    437     case AArch64::CBZX:
    438       Is64Bit = 1;
    439       CC = AArch64CC::EQ;
    440       break;
    441     case AArch64::CBNZW:
    442       Is64Bit = 0;
    443       CC = AArch64CC::NE;
    444       break;
    445     case AArch64::CBNZX:
    446       Is64Bit = 1;
    447       CC = AArch64CC::NE;
    448       break;
    449     }
    450     unsigned SrcReg = Cond[2].getReg();
    451     if (Is64Bit) {
    452       // cmp reg, #0 is actually subs xzr, reg, #0.
    453       MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
    454       BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
    455           .addReg(SrcReg)
    456           .addImm(0)
    457           .addImm(0);
    458     } else {
    459       MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
    460       BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
    461           .addReg(SrcReg)
    462           .addImm(0)
    463           .addImm(0);
    464     }
    465     break;
    466   }
    467   case 4: { // tbz/tbnz
    468     // We must insert a tst instruction.
    469     switch (Cond[1].getImm()) {
    470     default:
    471       llvm_unreachable("Unknown branch opcode in Cond");
    472     case AArch64::TBZW:
    473     case AArch64::TBZX:
    474       CC = AArch64CC::EQ;
    475       break;
    476     case AArch64::TBNZW:
    477     case AArch64::TBNZX:
    478       CC = AArch64CC::NE;
    479       break;
    480     }
    481     // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
    482     if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
    483       BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
    484           .addReg(Cond[2].getReg())
    485           .addImm(
    486               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
    487     else
    488       BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
    489           .addReg(Cond[2].getReg())
    490           .addImm(
    491               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
    492     break;
    493   }
    494   }
    495 
    496   unsigned Opc = 0;
    497   const TargetRegisterClass *RC = nullptr;
    498   bool TryFold = false;
    499   if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
    500     RC = &AArch64::GPR64RegClass;
    501     Opc = AArch64::CSELXr;
    502     TryFold = true;
    503   } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
    504     RC = &AArch64::GPR32RegClass;
    505     Opc = AArch64::CSELWr;
    506     TryFold = true;
    507   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
    508     RC = &AArch64::FPR64RegClass;
    509     Opc = AArch64::FCSELDrrr;
    510   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
    511     RC = &AArch64::FPR32RegClass;
    512     Opc = AArch64::FCSELSrrr;
    513   }
    514   assert(RC && "Unsupported regclass");
    515 
    516   // Try folding simple instructions into the csel.
    517   if (TryFold) {
    518     unsigned NewVReg = 0;
    519     unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
    520     if (FoldedOpc) {
    521       // The folded opcodes csinc, csinc and csneg apply the operation to
    522       // FalseReg, so we need to invert the condition.
    523       CC = AArch64CC::getInvertedCondCode(CC);
    524       TrueReg = FalseReg;
    525     } else
    526       FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
    527 
    528     // Fold the operation. Leave any dead instructions for DCE to clean up.
    529     if (FoldedOpc) {
    530       FalseReg = NewVReg;
    531       Opc = FoldedOpc;
    532       // The extends the live range of NewVReg.
    533       MRI.clearKillFlags(NewVReg);
    534     }
    535   }
    536 
    537   // Pull all virtual register into the appropriate class.
    538   MRI.constrainRegClass(TrueReg, RC);
    539   MRI.constrainRegClass(FalseReg, RC);
    540 
    541   // Insert the csel.
    542   BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm(
    543       CC);
    544 }
    545 
    546 // FIXME: this implementation should be micro-architecture dependent, so a
    547 // micro-architecture target hook should be introduced here in future.
    548 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const {
    549   if (!Subtarget.isCortexA57() && !Subtarget.isCortexA53())
    550     return MI->isAsCheapAsAMove();
    551 
    552   switch (MI->getOpcode()) {
    553   default:
    554     return false;
    555 
    556   // add/sub on register without shift
    557   case AArch64::ADDWri:
    558   case AArch64::ADDXri:
    559   case AArch64::SUBWri:
    560   case AArch64::SUBXri:
    561     return (MI->getOperand(3).getImm() == 0);
    562 
    563   // logical ops on immediate
    564   case AArch64::ANDWri:
    565   case AArch64::ANDXri:
    566   case AArch64::EORWri:
    567   case AArch64::EORXri:
    568   case AArch64::ORRWri:
    569   case AArch64::ORRXri:
    570     return true;
    571 
    572   // logical ops on register without shift
    573   case AArch64::ANDWrr:
    574   case AArch64::ANDXrr:
    575   case AArch64::BICWrr:
    576   case AArch64::BICXrr:
    577   case AArch64::EONWrr:
    578   case AArch64::EONXrr:
    579   case AArch64::EORWrr:
    580   case AArch64::EORXrr:
    581   case AArch64::ORNWrr:
    582   case AArch64::ORNXrr:
    583   case AArch64::ORRWrr:
    584   case AArch64::ORRXrr:
    585     return true;
    586   }
    587 
    588   llvm_unreachable("Unknown opcode to check as cheap as a move!");
    589 }
    590 
    591 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
    592                                              unsigned &SrcReg, unsigned &DstReg,
    593                                              unsigned &SubIdx) const {
    594   switch (MI.getOpcode()) {
    595   default:
    596     return false;
    597   case AArch64::SBFMXri: // aka sxtw
    598   case AArch64::UBFMXri: // aka uxtw
    599     // Check for the 32 -> 64 bit extension case, these instructions can do
    600     // much more.
    601     if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
    602       return false;
    603     // This is a signed or unsigned 32 -> 64 bit extension.
    604     SrcReg = MI.getOperand(1).getReg();
    605     DstReg = MI.getOperand(0).getReg();
    606     SubIdx = AArch64::sub_32;
    607     return true;
    608   }
    609 }
    610 
    611 bool
    612 AArch64InstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
    613                                                   MachineInstr *MIb,
    614                                                   AliasAnalysis *AA) const {
    615   const TargetRegisterInfo *TRI = &getRegisterInfo();
    616   unsigned BaseRegA = 0, BaseRegB = 0;
    617   int OffsetA = 0, OffsetB = 0;
    618   int WidthA = 0, WidthB = 0;
    619 
    620   assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
    621          "MIa must be a store or a load");
    622   assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
    623          "MIb must be a store or a load");
    624 
    625   if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() ||
    626       MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef())
    627     return false;
    628 
    629   // Retrieve the base register, offset from the base register and width. Width
    630   // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8).  If
    631   // base registers are identical, and the offset of a lower memory access +
    632   // the width doesn't overlap the offset of a higher memory access,
    633   // then the memory accesses are different.
    634   if (getLdStBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
    635       getLdStBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
    636     if (BaseRegA == BaseRegB) {
    637       int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
    638       int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
    639       int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
    640       if (LowOffset + LowWidth <= HighOffset)
    641         return true;
    642     }
    643   }
    644   return false;
    645 }
    646 
    647 /// analyzeCompare - For a comparison instruction, return the source registers
    648 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
    649 /// Return true if the comparison instruction can be analyzed.
    650 bool AArch64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
    651                                       unsigned &SrcReg2, int &CmpMask,
    652                                       int &CmpValue) const {
    653   switch (MI->getOpcode()) {
    654   default:
    655     break;
    656   case AArch64::SUBSWrr:
    657   case AArch64::SUBSWrs:
    658   case AArch64::SUBSWrx:
    659   case AArch64::SUBSXrr:
    660   case AArch64::SUBSXrs:
    661   case AArch64::SUBSXrx:
    662   case AArch64::ADDSWrr:
    663   case AArch64::ADDSWrs:
    664   case AArch64::ADDSWrx:
    665   case AArch64::ADDSXrr:
    666   case AArch64::ADDSXrs:
    667   case AArch64::ADDSXrx:
    668     // Replace SUBSWrr with SUBWrr if NZCV is not used.
    669     SrcReg = MI->getOperand(1).getReg();
    670     SrcReg2 = MI->getOperand(2).getReg();
    671     CmpMask = ~0;
    672     CmpValue = 0;
    673     return true;
    674   case AArch64::SUBSWri:
    675   case AArch64::ADDSWri:
    676   case AArch64::SUBSXri:
    677   case AArch64::ADDSXri:
    678     SrcReg = MI->getOperand(1).getReg();
    679     SrcReg2 = 0;
    680     CmpMask = ~0;
    681     // FIXME: In order to convert CmpValue to 0 or 1
    682     CmpValue = (MI->getOperand(2).getImm() != 0);
    683     return true;
    684   case AArch64::ANDSWri:
    685   case AArch64::ANDSXri:
    686     // ANDS does not use the same encoding scheme as the others xxxS
    687     // instructions.
    688     SrcReg = MI->getOperand(1).getReg();
    689     SrcReg2 = 0;
    690     CmpMask = ~0;
    691     // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
    692     // while the type of CmpValue is int. When converting uint64_t to int,
    693     // the high 32 bits of uint64_t will be lost.
    694     // In fact it causes a bug in spec2006-483.xalancbmk
    695     // CmpValue is only used to compare with zero in OptimizeCompareInstr
    696     CmpValue = (AArch64_AM::decodeLogicalImmediate(
    697                     MI->getOperand(2).getImm(),
    698                     MI->getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0);
    699     return true;
    700   }
    701 
    702   return false;
    703 }
    704 
    705 static bool UpdateOperandRegClass(MachineInstr *Instr) {
    706   MachineBasicBlock *MBB = Instr->getParent();
    707   assert(MBB && "Can't get MachineBasicBlock here");
    708   MachineFunction *MF = MBB->getParent();
    709   assert(MF && "Can't get MachineFunction here");
    710   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
    711   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
    712   MachineRegisterInfo *MRI = &MF->getRegInfo();
    713 
    714   for (unsigned OpIdx = 0, EndIdx = Instr->getNumOperands(); OpIdx < EndIdx;
    715        ++OpIdx) {
    716     MachineOperand &MO = Instr->getOperand(OpIdx);
    717     const TargetRegisterClass *OpRegCstraints =
    718         Instr->getRegClassConstraint(OpIdx, TII, TRI);
    719 
    720     // If there's no constraint, there's nothing to do.
    721     if (!OpRegCstraints)
    722       continue;
    723     // If the operand is a frame index, there's nothing to do here.
    724     // A frame index operand will resolve correctly during PEI.
    725     if (MO.isFI())
    726       continue;
    727 
    728     assert(MO.isReg() &&
    729            "Operand has register constraints without being a register!");
    730 
    731     unsigned Reg = MO.getReg();
    732     if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
    733       if (!OpRegCstraints->contains(Reg))
    734         return false;
    735     } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
    736                !MRI->constrainRegClass(Reg, OpRegCstraints))
    737       return false;
    738   }
    739 
    740   return true;
    741 }
    742 
    743 /// \brief Return the opcode that does not set flags when possible - otherwise
    744 /// return the original opcode. The caller is responsible to do the actual
    745 /// substitution and legality checking.
    746 static unsigned convertFlagSettingOpcode(const MachineInstr *MI) {
    747   // Don't convert all compare instructions, because for some the zero register
    748   // encoding becomes the sp register.
    749   bool MIDefinesZeroReg = false;
    750   if (MI->definesRegister(AArch64::WZR) || MI->definesRegister(AArch64::XZR))
    751     MIDefinesZeroReg = true;
    752 
    753   switch (MI->getOpcode()) {
    754   default:
    755     return MI->getOpcode();
    756   case AArch64::ADDSWrr:
    757     return AArch64::ADDWrr;
    758   case AArch64::ADDSWri:
    759     return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
    760   case AArch64::ADDSWrs:
    761     return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
    762   case AArch64::ADDSWrx:
    763     return AArch64::ADDWrx;
    764   case AArch64::ADDSXrr:
    765     return AArch64::ADDXrr;
    766   case AArch64::ADDSXri:
    767     return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
    768   case AArch64::ADDSXrs:
    769     return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
    770   case AArch64::ADDSXrx:
    771     return AArch64::ADDXrx;
    772   case AArch64::SUBSWrr:
    773     return AArch64::SUBWrr;
    774   case AArch64::SUBSWri:
    775     return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
    776   case AArch64::SUBSWrs:
    777     return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
    778   case AArch64::SUBSWrx:
    779     return AArch64::SUBWrx;
    780   case AArch64::SUBSXrr:
    781     return AArch64::SUBXrr;
    782   case AArch64::SUBSXri:
    783     return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
    784   case AArch64::SUBSXrs:
    785     return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
    786   case AArch64::SUBSXrx:
    787     return AArch64::SUBXrx;
    788   }
    789 }
    790 
    791 /// True when condition code could be modified on the instruction
    792 /// trace starting at from and ending at to.
    793 static bool modifiesConditionCode(MachineInstr *From, MachineInstr *To,
    794                                   const bool CheckOnlyCCWrites,
    795                                   const TargetRegisterInfo *TRI) {
    796   // We iterate backward starting \p To until we hit \p From
    797   MachineBasicBlock::iterator I = To, E = From, B = To->getParent()->begin();
    798 
    799   // Early exit if To is at the beginning of the BB.
    800   if (I == B)
    801     return true;
    802 
    803   // Check whether the definition of SrcReg is in the same basic block as
    804   // Compare. If not, assume the condition code gets modified on some path.
    805   if (To->getParent() != From->getParent())
    806     return true;
    807 
    808   // Check that NZCV isn't set on the trace.
    809   for (--I; I != E; --I) {
    810     const MachineInstr &Instr = *I;
    811 
    812     if (Instr.modifiesRegister(AArch64::NZCV, TRI) ||
    813         (!CheckOnlyCCWrites && Instr.readsRegister(AArch64::NZCV, TRI)))
    814       // This instruction modifies or uses NZCV after the one we want to
    815       // change.
    816       return true;
    817     if (I == B)
    818       // We currently don't allow the instruction trace to cross basic
    819       // block boundaries
    820       return true;
    821   }
    822   return false;
    823 }
    824 /// optimizeCompareInstr - Convert the instruction supplying the argument to the
    825 /// comparison into one that sets the zero bit in the flags register.
    826 bool AArch64InstrInfo::optimizeCompareInstr(
    827     MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
    828     int CmpValue, const MachineRegisterInfo *MRI) const {
    829 
    830   // Replace SUBSWrr with SUBWrr if NZCV is not used.
    831   int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV, true);
    832   if (Cmp_NZCV != -1) {
    833     if (CmpInstr->definesRegister(AArch64::WZR) ||
    834         CmpInstr->definesRegister(AArch64::XZR)) {
    835       CmpInstr->eraseFromParent();
    836       return true;
    837     }
    838     unsigned Opc = CmpInstr->getOpcode();
    839     unsigned NewOpc = convertFlagSettingOpcode(CmpInstr);
    840     if (NewOpc == Opc)
    841       return false;
    842     const MCInstrDesc &MCID = get(NewOpc);
    843     CmpInstr->setDesc(MCID);
    844     CmpInstr->RemoveOperand(Cmp_NZCV);
    845     bool succeeded = UpdateOperandRegClass(CmpInstr);
    846     (void)succeeded;
    847     assert(succeeded && "Some operands reg class are incompatible!");
    848     return true;
    849   }
    850 
    851   // Continue only if we have a "ri" where immediate is zero.
    852   // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
    853   // function.
    854   assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
    855   if (CmpValue != 0 || SrcReg2 != 0)
    856     return false;
    857 
    858   // CmpInstr is a Compare instruction if destination register is not used.
    859   if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg()))
    860     return false;
    861 
    862   // Get the unique definition of SrcReg.
    863   MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
    864   if (!MI)
    865     return false;
    866 
    867   bool CheckOnlyCCWrites = false;
    868   const TargetRegisterInfo *TRI = &getRegisterInfo();
    869   if (modifiesConditionCode(MI, CmpInstr, CheckOnlyCCWrites, TRI))
    870     return false;
    871 
    872   unsigned NewOpc = MI->getOpcode();
    873   switch (MI->getOpcode()) {
    874   default:
    875     return false;
    876   case AArch64::ADDSWrr:
    877   case AArch64::ADDSWri:
    878   case AArch64::ADDSXrr:
    879   case AArch64::ADDSXri:
    880   case AArch64::SUBSWrr:
    881   case AArch64::SUBSWri:
    882   case AArch64::SUBSXrr:
    883   case AArch64::SUBSXri:
    884     break;
    885   case AArch64::ADDWrr:    NewOpc = AArch64::ADDSWrr; break;
    886   case AArch64::ADDWri:    NewOpc = AArch64::ADDSWri; break;
    887   case AArch64::ADDXrr:    NewOpc = AArch64::ADDSXrr; break;
    888   case AArch64::ADDXri:    NewOpc = AArch64::ADDSXri; break;
    889   case AArch64::ADCWr:     NewOpc = AArch64::ADCSWr; break;
    890   case AArch64::ADCXr:     NewOpc = AArch64::ADCSXr; break;
    891   case AArch64::SUBWrr:    NewOpc = AArch64::SUBSWrr; break;
    892   case AArch64::SUBWri:    NewOpc = AArch64::SUBSWri; break;
    893   case AArch64::SUBXrr:    NewOpc = AArch64::SUBSXrr; break;
    894   case AArch64::SUBXri:    NewOpc = AArch64::SUBSXri; break;
    895   case AArch64::SBCWr:     NewOpc = AArch64::SBCSWr; break;
    896   case AArch64::SBCXr:     NewOpc = AArch64::SBCSXr; break;
    897   case AArch64::ANDWri:    NewOpc = AArch64::ANDSWri; break;
    898   case AArch64::ANDXri:    NewOpc = AArch64::ANDSXri; break;
    899   }
    900 
    901   // Scan forward for the use of NZCV.
    902   // When checking against MI: if it's a conditional code requires
    903   // checking of V bit, then this is not safe to do.
    904   // It is safe to remove CmpInstr if NZCV is redefined or killed.
    905   // If we are done with the basic block, we need to check whether NZCV is
    906   // live-out.
    907   bool IsSafe = false;
    908   for (MachineBasicBlock::iterator I = CmpInstr,
    909                                    E = CmpInstr->getParent()->end();
    910        !IsSafe && ++I != E;) {
    911     const MachineInstr &Instr = *I;
    912     for (unsigned IO = 0, EO = Instr.getNumOperands(); !IsSafe && IO != EO;
    913          ++IO) {
    914       const MachineOperand &MO = Instr.getOperand(IO);
    915       if (MO.isRegMask() && MO.clobbersPhysReg(AArch64::NZCV)) {
    916         IsSafe = true;
    917         break;
    918       }
    919       if (!MO.isReg() || MO.getReg() != AArch64::NZCV)
    920         continue;
    921       if (MO.isDef()) {
    922         IsSafe = true;
    923         break;
    924       }
    925 
    926       // Decode the condition code.
    927       unsigned Opc = Instr.getOpcode();
    928       AArch64CC::CondCode CC;
    929       switch (Opc) {
    930       default:
    931         return false;
    932       case AArch64::Bcc:
    933         CC = (AArch64CC::CondCode)Instr.getOperand(IO - 2).getImm();
    934         break;
    935       case AArch64::CSINVWr:
    936       case AArch64::CSINVXr:
    937       case AArch64::CSINCWr:
    938       case AArch64::CSINCXr:
    939       case AArch64::CSELWr:
    940       case AArch64::CSELXr:
    941       case AArch64::CSNEGWr:
    942       case AArch64::CSNEGXr:
    943       case AArch64::FCSELSrrr:
    944       case AArch64::FCSELDrrr:
    945         CC = (AArch64CC::CondCode)Instr.getOperand(IO - 1).getImm();
    946         break;
    947       }
    948 
    949       // It is not safe to remove Compare instruction if Overflow(V) is used.
    950       switch (CC) {
    951       default:
    952         // NZCV can be used multiple times, we should continue.
    953         break;
    954       case AArch64CC::VS:
    955       case AArch64CC::VC:
    956       case AArch64CC::GE:
    957       case AArch64CC::LT:
    958       case AArch64CC::GT:
    959       case AArch64CC::LE:
    960         return false;
    961       }
    962     }
    963   }
    964 
    965   // If NZCV is not killed nor re-defined, we should check whether it is
    966   // live-out. If it is live-out, do not optimize.
    967   if (!IsSafe) {
    968     MachineBasicBlock *ParentBlock = CmpInstr->getParent();
    969     for (auto *MBB : ParentBlock->successors())
    970       if (MBB->isLiveIn(AArch64::NZCV))
    971         return false;
    972   }
    973 
    974   // Update the instruction to set NZCV.
    975   MI->setDesc(get(NewOpc));
    976   CmpInstr->eraseFromParent();
    977   bool succeeded = UpdateOperandRegClass(MI);
    978   (void)succeeded;
    979   assert(succeeded && "Some operands reg class are incompatible!");
    980   MI->addRegisterDefined(AArch64::NZCV, TRI);
    981   return true;
    982 }
    983 
    984 bool
    985 AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
    986   if (MI->getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
    987     return false;
    988 
    989   MachineBasicBlock &MBB = *MI->getParent();
    990   DebugLoc DL = MI->getDebugLoc();
    991   unsigned Reg = MI->getOperand(0).getReg();
    992   const GlobalValue *GV =
    993       cast<GlobalValue>((*MI->memoperands_begin())->getValue());
    994   const TargetMachine &TM = MBB.getParent()->getTarget();
    995   unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
    996   const unsigned char MO_NC = AArch64II::MO_NC;
    997 
    998   if ((OpFlags & AArch64II::MO_GOT) != 0) {
    999     BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
   1000         .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
   1001     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
   1002         .addReg(Reg, RegState::Kill).addImm(0)
   1003         .addMemOperand(*MI->memoperands_begin());
   1004   } else if (TM.getCodeModel() == CodeModel::Large) {
   1005     BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
   1006         .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48);
   1007     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
   1008         .addReg(Reg, RegState::Kill)
   1009         .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32);
   1010     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
   1011         .addReg(Reg, RegState::Kill)
   1012         .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16);
   1013     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
   1014         .addReg(Reg, RegState::Kill)
   1015         .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0);
   1016     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
   1017         .addReg(Reg, RegState::Kill).addImm(0)
   1018         .addMemOperand(*MI->memoperands_begin());
   1019   } else {
   1020     BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
   1021         .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
   1022     unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
   1023     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
   1024         .addReg(Reg, RegState::Kill)
   1025         .addGlobalAddress(GV, 0, LoFlags)
   1026         .addMemOperand(*MI->memoperands_begin());
   1027   }
   1028 
   1029   MBB.erase(MI);
   1030 
   1031   return true;
   1032 }
   1033 
   1034 /// Return true if this is this instruction has a non-zero immediate
   1035 bool AArch64InstrInfo::hasShiftedReg(const MachineInstr *MI) const {
   1036   switch (MI->getOpcode()) {
   1037   default:
   1038     break;
   1039   case AArch64::ADDSWrs:
   1040   case AArch64::ADDSXrs:
   1041   case AArch64::ADDWrs:
   1042   case AArch64::ADDXrs:
   1043   case AArch64::ANDSWrs:
   1044   case AArch64::ANDSXrs:
   1045   case AArch64::ANDWrs:
   1046   case AArch64::ANDXrs:
   1047   case AArch64::BICSWrs:
   1048   case AArch64::BICSXrs:
   1049   case AArch64::BICWrs:
   1050   case AArch64::BICXrs:
   1051   case AArch64::CRC32Brr:
   1052   case AArch64::CRC32CBrr:
   1053   case AArch64::CRC32CHrr:
   1054   case AArch64::CRC32CWrr:
   1055   case AArch64::CRC32CXrr:
   1056   case AArch64::CRC32Hrr:
   1057   case AArch64::CRC32Wrr:
   1058   case AArch64::CRC32Xrr:
   1059   case AArch64::EONWrs:
   1060   case AArch64::EONXrs:
   1061   case AArch64::EORWrs:
   1062   case AArch64::EORXrs:
   1063   case AArch64::ORNWrs:
   1064   case AArch64::ORNXrs:
   1065   case AArch64::ORRWrs:
   1066   case AArch64::ORRXrs:
   1067   case AArch64::SUBSWrs:
   1068   case AArch64::SUBSXrs:
   1069   case AArch64::SUBWrs:
   1070   case AArch64::SUBXrs:
   1071     if (MI->getOperand(3).isImm()) {
   1072       unsigned val = MI->getOperand(3).getImm();
   1073       return (val != 0);
   1074     }
   1075     break;
   1076   }
   1077   return false;
   1078 }
   1079 
   1080 /// Return true if this is this instruction has a non-zero immediate
   1081 bool AArch64InstrInfo::hasExtendedReg(const MachineInstr *MI) const {
   1082   switch (MI->getOpcode()) {
   1083   default:
   1084     break;
   1085   case AArch64::ADDSWrx:
   1086   case AArch64::ADDSXrx:
   1087   case AArch64::ADDSXrx64:
   1088   case AArch64::ADDWrx:
   1089   case AArch64::ADDXrx:
   1090   case AArch64::ADDXrx64:
   1091   case AArch64::SUBSWrx:
   1092   case AArch64::SUBSXrx:
   1093   case AArch64::SUBSXrx64:
   1094   case AArch64::SUBWrx:
   1095   case AArch64::SUBXrx:
   1096   case AArch64::SUBXrx64:
   1097     if (MI->getOperand(3).isImm()) {
   1098       unsigned val = MI->getOperand(3).getImm();
   1099       return (val != 0);
   1100     }
   1101     break;
   1102   }
   1103 
   1104   return false;
   1105 }
   1106 
   1107 // Return true if this instruction simply sets its single destination register
   1108 // to zero. This is equivalent to a register rename of the zero-register.
   1109 bool AArch64InstrInfo::isGPRZero(const MachineInstr *MI) const {
   1110   switch (MI->getOpcode()) {
   1111   default:
   1112     break;
   1113   case AArch64::MOVZWi:
   1114   case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
   1115     if (MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) {
   1116       assert(MI->getDesc().getNumOperands() == 3 &&
   1117              MI->getOperand(2).getImm() == 0 && "invalid MOVZi operands");
   1118       return true;
   1119     }
   1120     break;
   1121   case AArch64::ANDWri: // and Rd, Rzr, #imm
   1122     return MI->getOperand(1).getReg() == AArch64::WZR;
   1123   case AArch64::ANDXri:
   1124     return MI->getOperand(1).getReg() == AArch64::XZR;
   1125   case TargetOpcode::COPY:
   1126     return MI->getOperand(1).getReg() == AArch64::WZR;
   1127   }
   1128   return false;
   1129 }
   1130 
   1131 // Return true if this instruction simply renames a general register without
   1132 // modifying bits.
   1133 bool AArch64InstrInfo::isGPRCopy(const MachineInstr *MI) const {
   1134   switch (MI->getOpcode()) {
   1135   default:
   1136     break;
   1137   case TargetOpcode::COPY: {
   1138     // GPR32 copies will by lowered to ORRXrs
   1139     unsigned DstReg = MI->getOperand(0).getReg();
   1140     return (AArch64::GPR32RegClass.contains(DstReg) ||
   1141             AArch64::GPR64RegClass.contains(DstReg));
   1142   }
   1143   case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
   1144     if (MI->getOperand(1).getReg() == AArch64::XZR) {
   1145       assert(MI->getDesc().getNumOperands() == 4 &&
   1146              MI->getOperand(3).getImm() == 0 && "invalid ORRrs operands");
   1147       return true;
   1148     }
   1149     break;
   1150   case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
   1151     if (MI->getOperand(2).getImm() == 0) {
   1152       assert(MI->getDesc().getNumOperands() == 4 &&
   1153              MI->getOperand(3).getImm() == 0 && "invalid ADDXri operands");
   1154       return true;
   1155     }
   1156     break;
   1157   }
   1158   return false;
   1159 }
   1160 
   1161 // Return true if this instruction simply renames a general register without
   1162 // modifying bits.
   1163 bool AArch64InstrInfo::isFPRCopy(const MachineInstr *MI) const {
   1164   switch (MI->getOpcode()) {
   1165   default:
   1166     break;
   1167   case TargetOpcode::COPY: {
   1168     // FPR64 copies will by lowered to ORR.16b
   1169     unsigned DstReg = MI->getOperand(0).getReg();
   1170     return (AArch64::FPR64RegClass.contains(DstReg) ||
   1171             AArch64::FPR128RegClass.contains(DstReg));
   1172   }
   1173   case AArch64::ORRv16i8:
   1174     if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
   1175       assert(MI->getDesc().getNumOperands() == 3 && MI->getOperand(0).isReg() &&
   1176              "invalid ORRv16i8 operands");
   1177       return true;
   1178     }
   1179     break;
   1180   }
   1181   return false;
   1182 }
   1183 
   1184 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
   1185                                                int &FrameIndex) const {
   1186   switch (MI->getOpcode()) {
   1187   default:
   1188     break;
   1189   case AArch64::LDRWui:
   1190   case AArch64::LDRXui:
   1191   case AArch64::LDRBui:
   1192   case AArch64::LDRHui:
   1193   case AArch64::LDRSui:
   1194   case AArch64::LDRDui:
   1195   case AArch64::LDRQui:
   1196     if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() &&
   1197         MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) {
   1198       FrameIndex = MI->getOperand(1).getIndex();
   1199       return MI->getOperand(0).getReg();
   1200     }
   1201     break;
   1202   }
   1203 
   1204   return 0;
   1205 }
   1206 
   1207 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
   1208                                               int &FrameIndex) const {
   1209   switch (MI->getOpcode()) {
   1210   default:
   1211     break;
   1212   case AArch64::STRWui:
   1213   case AArch64::STRXui:
   1214   case AArch64::STRBui:
   1215   case AArch64::STRHui:
   1216   case AArch64::STRSui:
   1217   case AArch64::STRDui:
   1218   case AArch64::STRQui:
   1219     if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() &&
   1220         MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) {
   1221       FrameIndex = MI->getOperand(1).getIndex();
   1222       return MI->getOperand(0).getReg();
   1223     }
   1224     break;
   1225   }
   1226   return 0;
   1227 }
   1228 
   1229 /// Return true if this is load/store scales or extends its register offset.
   1230 /// This refers to scaling a dynamic index as opposed to scaled immediates.
   1231 /// MI should be a memory op that allows scaled addressing.
   1232 bool AArch64InstrInfo::isScaledAddr(const MachineInstr *MI) const {
   1233   switch (MI->getOpcode()) {
   1234   default:
   1235     break;
   1236   case AArch64::LDRBBroW:
   1237   case AArch64::LDRBroW:
   1238   case AArch64::LDRDroW:
   1239   case AArch64::LDRHHroW:
   1240   case AArch64::LDRHroW:
   1241   case AArch64::LDRQroW:
   1242   case AArch64::LDRSBWroW:
   1243   case AArch64::LDRSBXroW:
   1244   case AArch64::LDRSHWroW:
   1245   case AArch64::LDRSHXroW:
   1246   case AArch64::LDRSWroW:
   1247   case AArch64::LDRSroW:
   1248   case AArch64::LDRWroW:
   1249   case AArch64::LDRXroW:
   1250   case AArch64::STRBBroW:
   1251   case AArch64::STRBroW:
   1252   case AArch64::STRDroW:
   1253   case AArch64::STRHHroW:
   1254   case AArch64::STRHroW:
   1255   case AArch64::STRQroW:
   1256   case AArch64::STRSroW:
   1257   case AArch64::STRWroW:
   1258   case AArch64::STRXroW:
   1259   case AArch64::LDRBBroX:
   1260   case AArch64::LDRBroX:
   1261   case AArch64::LDRDroX:
   1262   case AArch64::LDRHHroX:
   1263   case AArch64::LDRHroX:
   1264   case AArch64::LDRQroX:
   1265   case AArch64::LDRSBWroX:
   1266   case AArch64::LDRSBXroX:
   1267   case AArch64::LDRSHWroX:
   1268   case AArch64::LDRSHXroX:
   1269   case AArch64::LDRSWroX:
   1270   case AArch64::LDRSroX:
   1271   case AArch64::LDRWroX:
   1272   case AArch64::LDRXroX:
   1273   case AArch64::STRBBroX:
   1274   case AArch64::STRBroX:
   1275   case AArch64::STRDroX:
   1276   case AArch64::STRHHroX:
   1277   case AArch64::STRHroX:
   1278   case AArch64::STRQroX:
   1279   case AArch64::STRSroX:
   1280   case AArch64::STRWroX:
   1281   case AArch64::STRXroX:
   1282 
   1283     unsigned Val = MI->getOperand(3).getImm();
   1284     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
   1285     return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
   1286   }
   1287   return false;
   1288 }
   1289 
   1290 /// Check all MachineMemOperands for a hint to suppress pairing.
   1291 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr *MI) const {
   1292   assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) &&
   1293          "Too many target MO flags");
   1294   for (auto *MM : MI->memoperands()) {
   1295     if (MM->getFlags() &
   1296         (MOSuppressPair << MachineMemOperand::MOTargetStartBit)) {
   1297       return true;
   1298     }
   1299   }
   1300   return false;
   1301 }
   1302 
   1303 /// Set a flag on the first MachineMemOperand to suppress pairing.
   1304 void AArch64InstrInfo::suppressLdStPair(MachineInstr *MI) const {
   1305   if (MI->memoperands_empty())
   1306     return;
   1307 
   1308   assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) &&
   1309          "Too many target MO flags");
   1310   (*MI->memoperands_begin())
   1311       ->setFlags(MOSuppressPair << MachineMemOperand::MOTargetStartBit);
   1312 }
   1313 
   1314 bool
   1315 AArch64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
   1316                                        unsigned &Offset,
   1317                                        const TargetRegisterInfo *TRI) const {
   1318   switch (LdSt->getOpcode()) {
   1319   default:
   1320     return false;
   1321   case AArch64::STRSui:
   1322   case AArch64::STRDui:
   1323   case AArch64::STRQui:
   1324   case AArch64::STRXui:
   1325   case AArch64::STRWui:
   1326   case AArch64::LDRSui:
   1327   case AArch64::LDRDui:
   1328   case AArch64::LDRQui:
   1329   case AArch64::LDRXui:
   1330   case AArch64::LDRWui:
   1331     if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
   1332       return false;
   1333     BaseReg = LdSt->getOperand(1).getReg();
   1334     MachineFunction &MF = *LdSt->getParent()->getParent();
   1335     unsigned Width = getRegClass(LdSt->getDesc(), 0, TRI, MF)->getSize();
   1336     Offset = LdSt->getOperand(2).getImm() * Width;
   1337     return true;
   1338   };
   1339 }
   1340 
   1341 bool AArch64InstrInfo::getLdStBaseRegImmOfsWidth(
   1342     MachineInstr *LdSt, unsigned &BaseReg, int &Offset, int &Width,
   1343     const TargetRegisterInfo *TRI) const {
   1344   // Handle only loads/stores with base register followed by immediate offset.
   1345   if (LdSt->getNumOperands() != 3)
   1346     return false;
   1347   if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
   1348     return false;
   1349 
   1350   // Offset is calculated as the immediate operand multiplied by the scaling factor.
   1351   // Unscaled instructions have scaling factor set to 1.
   1352   int Scale = 0;
   1353   switch (LdSt->getOpcode()) {
   1354   default:
   1355     return false;
   1356   case AArch64::LDURQi:
   1357   case AArch64::STURQi:
   1358     Width = 16;
   1359     Scale = 1;
   1360     break;
   1361   case AArch64::LDURXi:
   1362   case AArch64::LDURDi:
   1363   case AArch64::STURXi:
   1364   case AArch64::STURDi:
   1365     Width = 8;
   1366     Scale = 1;
   1367     break;
   1368   case AArch64::LDURWi:
   1369   case AArch64::LDURSi:
   1370   case AArch64::LDURSWi:
   1371   case AArch64::STURWi:
   1372   case AArch64::STURSi:
   1373     Width = 4;
   1374     Scale = 1;
   1375     break;
   1376   case AArch64::LDURHi:
   1377   case AArch64::LDURHHi:
   1378   case AArch64::LDURSHXi:
   1379   case AArch64::LDURSHWi:
   1380   case AArch64::STURHi:
   1381   case AArch64::STURHHi:
   1382     Width = 2;
   1383     Scale = 1;
   1384     break;
   1385   case AArch64::LDURBi:
   1386   case AArch64::LDURBBi:
   1387   case AArch64::LDURSBXi:
   1388   case AArch64::LDURSBWi:
   1389   case AArch64::STURBi:
   1390   case AArch64::STURBBi:
   1391     Width = 1;
   1392     Scale = 1;
   1393     break;
   1394   case AArch64::LDRXui:
   1395   case AArch64::STRXui:
   1396     Scale = Width = 8;
   1397     break;
   1398   case AArch64::LDRWui:
   1399   case AArch64::STRWui:
   1400     Scale = Width = 4;
   1401     break;
   1402   case AArch64::LDRBui:
   1403   case AArch64::STRBui:
   1404     Scale = Width = 1;
   1405     break;
   1406   case AArch64::LDRHui:
   1407   case AArch64::STRHui:
   1408     Scale = Width = 2;
   1409     break;
   1410   case AArch64::LDRSui:
   1411   case AArch64::STRSui:
   1412     Scale = Width = 4;
   1413     break;
   1414   case AArch64::LDRDui:
   1415   case AArch64::STRDui:
   1416     Scale = Width = 8;
   1417     break;
   1418   case AArch64::LDRQui:
   1419   case AArch64::STRQui:
   1420     Scale = Width = 16;
   1421     break;
   1422   case AArch64::LDRBBui:
   1423   case AArch64::STRBBui:
   1424     Scale = Width = 1;
   1425     break;
   1426   case AArch64::LDRHHui:
   1427   case AArch64::STRHHui:
   1428     Scale = Width = 2;
   1429     break;
   1430   };
   1431 
   1432   BaseReg = LdSt->getOperand(1).getReg();
   1433   Offset = LdSt->getOperand(2).getImm() * Scale;
   1434   return true;
   1435 }
   1436 
   1437 /// Detect opportunities for ldp/stp formation.
   1438 ///
   1439 /// Only called for LdSt for which getLdStBaseRegImmOfs returns true.
   1440 bool AArch64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
   1441                                           MachineInstr *SecondLdSt,
   1442                                           unsigned NumLoads) const {
   1443   // Only cluster up to a single pair.
   1444   if (NumLoads > 1)
   1445     return false;
   1446   if (FirstLdSt->getOpcode() != SecondLdSt->getOpcode())
   1447     return false;
   1448   // getLdStBaseRegImmOfs guarantees that oper 2 isImm.
   1449   unsigned Ofs1 = FirstLdSt->getOperand(2).getImm();
   1450   // Allow 6 bits of positive range.
   1451   if (Ofs1 > 64)
   1452     return false;
   1453   // The caller should already have ordered First/SecondLdSt by offset.
   1454   unsigned Ofs2 = SecondLdSt->getOperand(2).getImm();
   1455   return Ofs1 + 1 == Ofs2;
   1456 }
   1457 
   1458 bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First,
   1459                                               MachineInstr *Second) const {
   1460   // Cyclone can fuse CMN, CMP followed by Bcc.
   1461 
   1462   // FIXME: B0 can also fuse:
   1463   // AND, BIC, ORN, ORR, or EOR (optional S) followed by Bcc or CBZ or CBNZ.
   1464   if (Second->getOpcode() != AArch64::Bcc)
   1465     return false;
   1466   switch (First->getOpcode()) {
   1467   default:
   1468     return false;
   1469   case AArch64::SUBSWri:
   1470   case AArch64::ADDSWri:
   1471   case AArch64::ANDSWri:
   1472   case AArch64::SUBSXri:
   1473   case AArch64::ADDSXri:
   1474   case AArch64::ANDSXri:
   1475     return true;
   1476   }
   1477 }
   1478 
   1479 MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(
   1480     MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var,
   1481     const MDNode *Expr, DebugLoc DL) const {
   1482   MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
   1483                                 .addFrameIndex(FrameIx)
   1484                                 .addImm(0)
   1485                                 .addImm(Offset)
   1486                                 .addMetadata(Var)
   1487                                 .addMetadata(Expr);
   1488   return &*MIB;
   1489 }
   1490 
   1491 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
   1492                                             unsigned Reg, unsigned SubIdx,
   1493                                             unsigned State,
   1494                                             const TargetRegisterInfo *TRI) {
   1495   if (!SubIdx)
   1496     return MIB.addReg(Reg, State);
   1497 
   1498   if (TargetRegisterInfo::isPhysicalRegister(Reg))
   1499     return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
   1500   return MIB.addReg(Reg, State, SubIdx);
   1501 }
   1502 
   1503 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
   1504                                         unsigned NumRegs) {
   1505   // We really want the positive remainder mod 32 here, that happens to be
   1506   // easily obtainable with a mask.
   1507   return ((DestReg - SrcReg) & 0x1f) < NumRegs;
   1508 }
   1509 
   1510 void AArch64InstrInfo::copyPhysRegTuple(
   1511     MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
   1512     unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
   1513     llvm::ArrayRef<unsigned> Indices) const {
   1514   assert(Subtarget.hasNEON() &&
   1515          "Unexpected register copy without NEON");
   1516   const TargetRegisterInfo *TRI = &getRegisterInfo();
   1517   uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
   1518   uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
   1519   unsigned NumRegs = Indices.size();
   1520 
   1521   int SubReg = 0, End = NumRegs, Incr = 1;
   1522   if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
   1523     SubReg = NumRegs - 1;
   1524     End = -1;
   1525     Incr = -1;
   1526   }
   1527 
   1528   for (; SubReg != End; SubReg += Incr) {
   1529     const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
   1530     AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
   1531     AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
   1532     AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
   1533   }
   1534 }
   1535 
   1536 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   1537                                    MachineBasicBlock::iterator I, DebugLoc DL,
   1538                                    unsigned DestReg, unsigned SrcReg,
   1539                                    bool KillSrc) const {
   1540   if (AArch64::GPR32spRegClass.contains(DestReg) &&
   1541       (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
   1542     const TargetRegisterInfo *TRI = &getRegisterInfo();
   1543 
   1544     if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
   1545       // If either operand is WSP, expand to ADD #0.
   1546       if (Subtarget.hasZeroCycleRegMove()) {
   1547         // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
   1548         unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
   1549                                                      &AArch64::GPR64spRegClass);
   1550         unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
   1551                                                     &AArch64::GPR64spRegClass);
   1552         // This instruction is reading and writing X registers.  This may upset
   1553         // the register scavenger and machine verifier, so we need to indicate
   1554         // that we are reading an undefined value from SrcRegX, but a proper
   1555         // value from SrcReg.
   1556         BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
   1557             .addReg(SrcRegX, RegState::Undef)
   1558             .addImm(0)
   1559             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
   1560             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
   1561       } else {
   1562         BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
   1563             .addReg(SrcReg, getKillRegState(KillSrc))
   1564             .addImm(0)
   1565             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
   1566       }
   1567     } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
   1568       BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm(
   1569           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
   1570     } else {
   1571       if (Subtarget.hasZeroCycleRegMove()) {
   1572         // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
   1573         unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
   1574                                                      &AArch64::GPR64spRegClass);
   1575         unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
   1576                                                     &AArch64::GPR64spRegClass);
   1577         // This instruction is reading and writing X registers.  This may upset
   1578         // the register scavenger and machine verifier, so we need to indicate
   1579         // that we are reading an undefined value from SrcRegX, but a proper
   1580         // value from SrcReg.
   1581         BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
   1582             .addReg(AArch64::XZR)
   1583             .addReg(SrcRegX, RegState::Undef)
   1584             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
   1585       } else {
   1586         // Otherwise, expand to ORR WZR.
   1587         BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
   1588             .addReg(AArch64::WZR)
   1589             .addReg(SrcReg, getKillRegState(KillSrc));
   1590       }
   1591     }
   1592     return;
   1593   }
   1594 
   1595   if (AArch64::GPR64spRegClass.contains(DestReg) &&
   1596       (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
   1597     if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
   1598       // If either operand is SP, expand to ADD #0.
   1599       BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
   1600           .addReg(SrcReg, getKillRegState(KillSrc))
   1601           .addImm(0)
   1602           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
   1603     } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
   1604       BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm(
   1605           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
   1606     } else {
   1607       // Otherwise, expand to ORR XZR.
   1608       BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
   1609           .addReg(AArch64::XZR)
   1610           .addReg(SrcReg, getKillRegState(KillSrc));
   1611     }
   1612     return;
   1613   }
   1614 
   1615   // Copy a DDDD register quad by copying the individual sub-registers.
   1616   if (AArch64::DDDDRegClass.contains(DestReg) &&
   1617       AArch64::DDDDRegClass.contains(SrcReg)) {
   1618     static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
   1619                                         AArch64::dsub2, AArch64::dsub3 };
   1620     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
   1621                      Indices);
   1622     return;
   1623   }
   1624 
   1625   // Copy a DDD register triple by copying the individual sub-registers.
   1626   if (AArch64::DDDRegClass.contains(DestReg) &&
   1627       AArch64::DDDRegClass.contains(SrcReg)) {
   1628     static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
   1629                                         AArch64::dsub2 };
   1630     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
   1631                      Indices);
   1632     return;
   1633   }
   1634 
   1635   // Copy a DD register pair by copying the individual sub-registers.
   1636   if (AArch64::DDRegClass.contains(DestReg) &&
   1637       AArch64::DDRegClass.contains(SrcReg)) {
   1638     static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 };
   1639     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
   1640                      Indices);
   1641     return;
   1642   }
   1643 
   1644   // Copy a QQQQ register quad by copying the individual sub-registers.
   1645   if (AArch64::QQQQRegClass.contains(DestReg) &&
   1646       AArch64::QQQQRegClass.contains(SrcReg)) {
   1647     static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
   1648                                         AArch64::qsub2, AArch64::qsub3 };
   1649     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
   1650                      Indices);
   1651     return;
   1652   }
   1653 
   1654   // Copy a QQQ register triple by copying the individual sub-registers.
   1655   if (AArch64::QQQRegClass.contains(DestReg) &&
   1656       AArch64::QQQRegClass.contains(SrcReg)) {
   1657     static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
   1658                                         AArch64::qsub2 };
   1659     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
   1660                      Indices);
   1661     return;
   1662   }
   1663 
   1664   // Copy a QQ register pair by copying the individual sub-registers.
   1665   if (AArch64::QQRegClass.contains(DestReg) &&
   1666       AArch64::QQRegClass.contains(SrcReg)) {
   1667     static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 };
   1668     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
   1669                      Indices);
   1670     return;
   1671   }
   1672 
   1673   if (AArch64::FPR128RegClass.contains(DestReg) &&
   1674       AArch64::FPR128RegClass.contains(SrcReg)) {
   1675     if(Subtarget.hasNEON()) {
   1676       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
   1677           .addReg(SrcReg)
   1678           .addReg(SrcReg, getKillRegState(KillSrc));
   1679     } else {
   1680       BuildMI(MBB, I, DL, get(AArch64::STRQpre))
   1681         .addReg(AArch64::SP, RegState::Define)
   1682         .addReg(SrcReg, getKillRegState(KillSrc))
   1683         .addReg(AArch64::SP)
   1684         .addImm(-16);
   1685       BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
   1686         .addReg(AArch64::SP, RegState::Define)
   1687         .addReg(DestReg, RegState::Define)
   1688         .addReg(AArch64::SP)
   1689         .addImm(16);
   1690     }
   1691     return;
   1692   }
   1693 
   1694   if (AArch64::FPR64RegClass.contains(DestReg) &&
   1695       AArch64::FPR64RegClass.contains(SrcReg)) {
   1696     if(Subtarget.hasNEON()) {
   1697       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
   1698                                        &AArch64::FPR128RegClass);
   1699       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
   1700                                       &AArch64::FPR128RegClass);
   1701       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
   1702           .addReg(SrcReg)
   1703           .addReg(SrcReg, getKillRegState(KillSrc));
   1704     } else {
   1705       BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
   1706           .addReg(SrcReg, getKillRegState(KillSrc));
   1707     }
   1708     return;
   1709   }
   1710 
   1711   if (AArch64::FPR32RegClass.contains(DestReg) &&
   1712       AArch64::FPR32RegClass.contains(SrcReg)) {
   1713     if(Subtarget.hasNEON()) {
   1714       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
   1715                                        &AArch64::FPR128RegClass);
   1716       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
   1717                                       &AArch64::FPR128RegClass);
   1718       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
   1719           .addReg(SrcReg)
   1720           .addReg(SrcReg, getKillRegState(KillSrc));
   1721     } else {
   1722       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
   1723           .addReg(SrcReg, getKillRegState(KillSrc));
   1724     }
   1725     return;
   1726   }
   1727 
   1728   if (AArch64::FPR16RegClass.contains(DestReg) &&
   1729       AArch64::FPR16RegClass.contains(SrcReg)) {
   1730     if(Subtarget.hasNEON()) {
   1731       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
   1732                                        &AArch64::FPR128RegClass);
   1733       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
   1734                                       &AArch64::FPR128RegClass);
   1735       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
   1736           .addReg(SrcReg)
   1737           .addReg(SrcReg, getKillRegState(KillSrc));
   1738     } else {
   1739       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
   1740                                        &AArch64::FPR32RegClass);
   1741       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
   1742                                       &AArch64::FPR32RegClass);
   1743       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
   1744           .addReg(SrcReg, getKillRegState(KillSrc));
   1745     }
   1746     return;
   1747   }
   1748 
   1749   if (AArch64::FPR8RegClass.contains(DestReg) &&
   1750       AArch64::FPR8RegClass.contains(SrcReg)) {
   1751     if(Subtarget.hasNEON()) {
   1752       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
   1753                                        &AArch64::FPR128RegClass);
   1754       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
   1755                                       &AArch64::FPR128RegClass);
   1756       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
   1757           .addReg(SrcReg)
   1758           .addReg(SrcReg, getKillRegState(KillSrc));
   1759     } else {
   1760       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
   1761                                        &AArch64::FPR32RegClass);
   1762       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
   1763                                       &AArch64::FPR32RegClass);
   1764       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
   1765           .addReg(SrcReg, getKillRegState(KillSrc));
   1766     }
   1767     return;
   1768   }
   1769 
   1770   // Copies between GPR64 and FPR64.
   1771   if (AArch64::FPR64RegClass.contains(DestReg) &&
   1772       AArch64::GPR64RegClass.contains(SrcReg)) {
   1773     BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
   1774         .addReg(SrcReg, getKillRegState(KillSrc));
   1775     return;
   1776   }
   1777   if (AArch64::GPR64RegClass.contains(DestReg) &&
   1778       AArch64::FPR64RegClass.contains(SrcReg)) {
   1779     BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
   1780         .addReg(SrcReg, getKillRegState(KillSrc));
   1781     return;
   1782   }
   1783   // Copies between GPR32 and FPR32.
   1784   if (AArch64::FPR32RegClass.contains(DestReg) &&
   1785       AArch64::GPR32RegClass.contains(SrcReg)) {
   1786     BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
   1787         .addReg(SrcReg, getKillRegState(KillSrc));
   1788     return;
   1789   }
   1790   if (AArch64::GPR32RegClass.contains(DestReg) &&
   1791       AArch64::FPR32RegClass.contains(SrcReg)) {
   1792     BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
   1793         .addReg(SrcReg, getKillRegState(KillSrc));
   1794     return;
   1795   }
   1796 
   1797   if (DestReg == AArch64::NZCV) {
   1798     assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
   1799     BuildMI(MBB, I, DL, get(AArch64::MSR))
   1800       .addImm(AArch64SysReg::NZCV)
   1801       .addReg(SrcReg, getKillRegState(KillSrc))
   1802       .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
   1803     return;
   1804   }
   1805 
   1806   if (SrcReg == AArch64::NZCV) {
   1807     assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
   1808     BuildMI(MBB, I, DL, get(AArch64::MRS))
   1809       .addReg(DestReg)
   1810       .addImm(AArch64SysReg::NZCV)
   1811       .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
   1812     return;
   1813   }
   1814 
   1815   llvm_unreachable("unimplemented reg-to-reg copy");
   1816 }
   1817 
   1818 void AArch64InstrInfo::storeRegToStackSlot(
   1819     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
   1820     bool isKill, int FI, const TargetRegisterClass *RC,
   1821     const TargetRegisterInfo *TRI) const {
   1822   DebugLoc DL;
   1823   if (MBBI != MBB.end())
   1824     DL = MBBI->getDebugLoc();
   1825   MachineFunction &MF = *MBB.getParent();
   1826   MachineFrameInfo &MFI = *MF.getFrameInfo();
   1827   unsigned Align = MFI.getObjectAlignment(FI);
   1828 
   1829   MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI));
   1830   MachineMemOperand *MMO = MF.getMachineMemOperand(
   1831       PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
   1832   unsigned Opc = 0;
   1833   bool Offset = true;
   1834   switch (RC->getSize()) {
   1835   case 1:
   1836     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
   1837       Opc = AArch64::STRBui;
   1838     break;
   1839   case 2:
   1840     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
   1841       Opc = AArch64::STRHui;
   1842     break;
   1843   case 4:
   1844     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
   1845       Opc = AArch64::STRWui;
   1846       if (TargetRegisterInfo::isVirtualRegister(SrcReg))
   1847         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
   1848       else
   1849         assert(SrcReg != AArch64::WSP);
   1850     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
   1851       Opc = AArch64::STRSui;
   1852     break;
   1853   case 8:
   1854     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
   1855       Opc = AArch64::STRXui;
   1856       if (TargetRegisterInfo::isVirtualRegister(SrcReg))
   1857         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
   1858       else
   1859         assert(SrcReg != AArch64::SP);
   1860     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
   1861       Opc = AArch64::STRDui;
   1862     break;
   1863   case 16:
   1864     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
   1865       Opc = AArch64::STRQui;
   1866     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
   1867       assert(Subtarget.hasNEON() &&
   1868              "Unexpected register store without NEON");
   1869       Opc = AArch64::ST1Twov1d, Offset = false;
   1870     }
   1871     break;
   1872   case 24:
   1873     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
   1874       assert(Subtarget.hasNEON() &&
   1875              "Unexpected register store without NEON");
   1876       Opc = AArch64::ST1Threev1d, Offset = false;
   1877     }
   1878     break;
   1879   case 32:
   1880     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
   1881       assert(Subtarget.hasNEON() &&
   1882              "Unexpected register store without NEON");
   1883       Opc = AArch64::ST1Fourv1d, Offset = false;
   1884     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
   1885       assert(Subtarget.hasNEON() &&
   1886              "Unexpected register store without NEON");
   1887       Opc = AArch64::ST1Twov2d, Offset = false;
   1888     }
   1889     break;
   1890   case 48:
   1891     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
   1892       assert(Subtarget.hasNEON() &&
   1893              "Unexpected register store without NEON");
   1894       Opc = AArch64::ST1Threev2d, Offset = false;
   1895     }
   1896     break;
   1897   case 64:
   1898     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
   1899       assert(Subtarget.hasNEON() &&
   1900              "Unexpected register store without NEON");
   1901       Opc = AArch64::ST1Fourv2d, Offset = false;
   1902     }
   1903     break;
   1904   }
   1905   assert(Opc && "Unknown register class");
   1906 
   1907   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
   1908                                       .addReg(SrcReg, getKillRegState(isKill))
   1909                                       .addFrameIndex(FI);
   1910 
   1911   if (Offset)
   1912     MI.addImm(0);
   1913   MI.addMemOperand(MMO);
   1914 }
   1915 
   1916 void AArch64InstrInfo::loadRegFromStackSlot(
   1917     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
   1918     int FI, const TargetRegisterClass *RC,
   1919     const TargetRegisterInfo *TRI) const {
   1920   DebugLoc DL;
   1921   if (MBBI != MBB.end())
   1922     DL = MBBI->getDebugLoc();
   1923   MachineFunction &MF = *MBB.getParent();
   1924   MachineFrameInfo &MFI = *MF.getFrameInfo();
   1925   unsigned Align = MFI.getObjectAlignment(FI);
   1926   MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI));
   1927   MachineMemOperand *MMO = MF.getMachineMemOperand(
   1928       PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
   1929 
   1930   unsigned Opc = 0;
   1931   bool Offset = true;
   1932   switch (RC->getSize()) {
   1933   case 1:
   1934     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
   1935       Opc = AArch64::LDRBui;
   1936     break;
   1937   case 2:
   1938     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
   1939       Opc = AArch64::LDRHui;
   1940     break;
   1941   case 4:
   1942     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
   1943       Opc = AArch64::LDRWui;
   1944       if (TargetRegisterInfo::isVirtualRegister(DestReg))
   1945         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
   1946       else
   1947         assert(DestReg != AArch64::WSP);
   1948     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
   1949       Opc = AArch64::LDRSui;
   1950     break;
   1951   case 8:
   1952     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
   1953       Opc = AArch64::LDRXui;
   1954       if (TargetRegisterInfo::isVirtualRegister(DestReg))
   1955         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
   1956       else
   1957         assert(DestReg != AArch64::SP);
   1958     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
   1959       Opc = AArch64::LDRDui;
   1960     break;
   1961   case 16:
   1962     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
   1963       Opc = AArch64::LDRQui;
   1964     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
   1965       assert(Subtarget.hasNEON() &&
   1966              "Unexpected register load without NEON");
   1967       Opc = AArch64::LD1Twov1d, Offset = false;
   1968     }
   1969     break;
   1970   case 24:
   1971     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
   1972       assert(Subtarget.hasNEON() &&
   1973              "Unexpected register load without NEON");
   1974       Opc = AArch64::LD1Threev1d, Offset = false;
   1975     }
   1976     break;
   1977   case 32:
   1978     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
   1979       assert(Subtarget.hasNEON() &&
   1980              "Unexpected register load without NEON");
   1981       Opc = AArch64::LD1Fourv1d, Offset = false;
   1982     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
   1983       assert(Subtarget.hasNEON() &&
   1984              "Unexpected register load without NEON");
   1985       Opc = AArch64::LD1Twov2d, Offset = false;
   1986     }
   1987     break;
   1988   case 48:
   1989     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
   1990       assert(Subtarget.hasNEON() &&
   1991              "Unexpected register load without NEON");
   1992       Opc = AArch64::LD1Threev2d, Offset = false;
   1993     }
   1994     break;
   1995   case 64:
   1996     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
   1997       assert(Subtarget.hasNEON() &&
   1998              "Unexpected register load without NEON");
   1999       Opc = AArch64::LD1Fourv2d, Offset = false;
   2000     }
   2001     break;
   2002   }
   2003   assert(Opc && "Unknown register class");
   2004 
   2005   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
   2006                                       .addReg(DestReg, getDefRegState(true))
   2007                                       .addFrameIndex(FI);
   2008   if (Offset)
   2009     MI.addImm(0);
   2010   MI.addMemOperand(MMO);
   2011 }
   2012 
   2013 void llvm::emitFrameOffset(MachineBasicBlock &MBB,
   2014                            MachineBasicBlock::iterator MBBI, DebugLoc DL,
   2015                            unsigned DestReg, unsigned SrcReg, int Offset,
   2016                            const TargetInstrInfo *TII,
   2017                            MachineInstr::MIFlag Flag, bool SetNZCV) {
   2018   if (DestReg == SrcReg && Offset == 0)
   2019     return;
   2020 
   2021   bool isSub = Offset < 0;
   2022   if (isSub)
   2023     Offset = -Offset;
   2024 
   2025   // FIXME: If the offset won't fit in 24-bits, compute the offset into a
   2026   // scratch register.  If DestReg is a virtual register, use it as the
   2027   // scratch register; otherwise, create a new virtual register (to be
   2028   // replaced by the scavenger at the end of PEI).  That case can be optimized
   2029   // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
   2030   // register can be loaded with offset%8 and the add/sub can use an extending
   2031   // instruction with LSL#3.
   2032   // Currently the function handles any offsets but generates a poor sequence
   2033   // of code.
   2034   //  assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
   2035 
   2036   unsigned Opc;
   2037   if (SetNZCV)
   2038     Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
   2039   else
   2040     Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
   2041   const unsigned MaxEncoding = 0xfff;
   2042   const unsigned ShiftSize = 12;
   2043   const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
   2044   while (((unsigned)Offset) >= (1 << ShiftSize)) {
   2045     unsigned ThisVal;
   2046     if (((unsigned)Offset) > MaxEncodableValue) {
   2047       ThisVal = MaxEncodableValue;
   2048     } else {
   2049       ThisVal = Offset & MaxEncodableValue;
   2050     }
   2051     assert((ThisVal >> ShiftSize) <= MaxEncoding &&
   2052            "Encoding cannot handle value that big");
   2053     BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
   2054         .addReg(SrcReg)
   2055         .addImm(ThisVal >> ShiftSize)
   2056         .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
   2057         .setMIFlag(Flag);
   2058 
   2059     SrcReg = DestReg;
   2060     Offset -= ThisVal;
   2061     if (Offset == 0)
   2062       return;
   2063   }
   2064   BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
   2065       .addReg(SrcReg)
   2066       .addImm(Offset)
   2067       .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
   2068       .setMIFlag(Flag);
   2069 }
   2070 
   2071 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
   2072                                                       MachineInstr *MI,
   2073                                                       ArrayRef<unsigned> Ops,
   2074                                                       int FrameIndex) const {
   2075   // This is a bit of a hack. Consider this instruction:
   2076   //
   2077   //   %vreg0<def> = COPY %SP; GPR64all:%vreg0
   2078   //
   2079   // We explicitly chose GPR64all for the virtual register so such a copy might
   2080   // be eliminated by RegisterCoalescer. However, that may not be possible, and
   2081   // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all
   2082   // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
   2083   //
   2084   // To prevent that, we are going to constrain the %vreg0 register class here.
   2085   //
   2086   // <rdar://problem/11522048>
   2087   //
   2088   if (MI->isCopy()) {
   2089     unsigned DstReg = MI->getOperand(0).getReg();
   2090     unsigned SrcReg = MI->getOperand(1).getReg();
   2091     if (SrcReg == AArch64::SP &&
   2092         TargetRegisterInfo::isVirtualRegister(DstReg)) {
   2093       MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
   2094       return nullptr;
   2095     }
   2096     if (DstReg == AArch64::SP &&
   2097         TargetRegisterInfo::isVirtualRegister(SrcReg)) {
   2098       MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
   2099       return nullptr;
   2100     }
   2101   }
   2102 
   2103   // Cannot fold.
   2104   return nullptr;
   2105 }
   2106 
   2107 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
   2108                                     bool *OutUseUnscaledOp,
   2109                                     unsigned *OutUnscaledOp,
   2110                                     int *EmittableOffset) {
   2111   int Scale = 1;
   2112   bool IsSigned = false;
   2113   // The ImmIdx should be changed case by case if it is not 2.
   2114   unsigned ImmIdx = 2;
   2115   unsigned UnscaledOp = 0;
   2116   // Set output values in case of early exit.
   2117   if (EmittableOffset)
   2118     *EmittableOffset = 0;
   2119   if (OutUseUnscaledOp)
   2120     *OutUseUnscaledOp = false;
   2121   if (OutUnscaledOp)
   2122     *OutUnscaledOp = 0;
   2123   switch (MI.getOpcode()) {
   2124   default:
   2125     llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
   2126   // Vector spills/fills can't take an immediate offset.
   2127   case AArch64::LD1Twov2d:
   2128   case AArch64::LD1Threev2d:
   2129   case AArch64::LD1Fourv2d:
   2130   case AArch64::LD1Twov1d:
   2131   case AArch64::LD1Threev1d:
   2132   case AArch64::LD1Fourv1d:
   2133   case AArch64::ST1Twov2d:
   2134   case AArch64::ST1Threev2d:
   2135   case AArch64::ST1Fourv2d:
   2136   case AArch64::ST1Twov1d:
   2137   case AArch64::ST1Threev1d:
   2138   case AArch64::ST1Fourv1d:
   2139     return AArch64FrameOffsetCannotUpdate;
   2140   case AArch64::PRFMui:
   2141     Scale = 8;
   2142     UnscaledOp = AArch64::PRFUMi;
   2143     break;
   2144   case AArch64::LDRXui:
   2145     Scale = 8;
   2146     UnscaledOp = AArch64::LDURXi;
   2147     break;
   2148   case AArch64::LDRWui:
   2149     Scale = 4;
   2150     UnscaledOp = AArch64::LDURWi;
   2151     break;
   2152   case AArch64::LDRBui:
   2153     Scale = 1;
   2154     UnscaledOp = AArch64::LDURBi;
   2155     break;
   2156   case AArch64::LDRHui:
   2157     Scale = 2;
   2158     UnscaledOp = AArch64::LDURHi;
   2159     break;
   2160   case AArch64::LDRSui:
   2161     Scale = 4;
   2162     UnscaledOp = AArch64::LDURSi;
   2163     break;
   2164   case AArch64::LDRDui:
   2165     Scale = 8;
   2166     UnscaledOp = AArch64::LDURDi;
   2167     break;
   2168   case AArch64::LDRQui:
   2169     Scale = 16;
   2170     UnscaledOp = AArch64::LDURQi;
   2171     break;
   2172   case AArch64::LDRBBui:
   2173     Scale = 1;
   2174     UnscaledOp = AArch64::LDURBBi;
   2175     break;
   2176   case AArch64::LDRHHui:
   2177     Scale = 2;
   2178     UnscaledOp = AArch64::LDURHHi;
   2179     break;
   2180   case AArch64::LDRSBXui:
   2181     Scale = 1;
   2182     UnscaledOp = AArch64::LDURSBXi;
   2183     break;
   2184   case AArch64::LDRSBWui:
   2185     Scale = 1;
   2186     UnscaledOp = AArch64::LDURSBWi;
   2187     break;
   2188   case AArch64::LDRSHXui:
   2189     Scale = 2;
   2190     UnscaledOp = AArch64::LDURSHXi;
   2191     break;
   2192   case AArch64::LDRSHWui:
   2193     Scale = 2;
   2194     UnscaledOp = AArch64::LDURSHWi;
   2195     break;
   2196   case AArch64::LDRSWui:
   2197     Scale = 4;
   2198     UnscaledOp = AArch64::LDURSWi;
   2199     break;
   2200 
   2201   case AArch64::STRXui:
   2202     Scale = 8;
   2203     UnscaledOp = AArch64::STURXi;
   2204     break;
   2205   case AArch64::STRWui:
   2206     Scale = 4;
   2207     UnscaledOp = AArch64::STURWi;
   2208     break;
   2209   case AArch64::STRBui:
   2210     Scale = 1;
   2211     UnscaledOp = AArch64::STURBi;
   2212     break;
   2213   case AArch64::STRHui:
   2214     Scale = 2;
   2215     UnscaledOp = AArch64::STURHi;
   2216     break;
   2217   case AArch64::STRSui:
   2218     Scale = 4;
   2219     UnscaledOp = AArch64::STURSi;
   2220     break;
   2221   case AArch64::STRDui:
   2222     Scale = 8;
   2223     UnscaledOp = AArch64::STURDi;
   2224     break;
   2225   case AArch64::STRQui:
   2226     Scale = 16;
   2227     UnscaledOp = AArch64::STURQi;
   2228     break;
   2229   case AArch64::STRBBui:
   2230     Scale = 1;
   2231     UnscaledOp = AArch64::STURBBi;
   2232     break;
   2233   case AArch64::STRHHui:
   2234     Scale = 2;
   2235     UnscaledOp = AArch64::STURHHi;
   2236     break;
   2237 
   2238   case AArch64::LDPXi:
   2239   case AArch64::LDPDi:
   2240   case AArch64::STPXi:
   2241   case AArch64::STPDi:
   2242     IsSigned = true;
   2243     Scale = 8;
   2244     break;
   2245   case AArch64::LDPQi:
   2246   case AArch64::STPQi:
   2247     IsSigned = true;
   2248     Scale = 16;
   2249     break;
   2250   case AArch64::LDPWi:
   2251   case AArch64::LDPSi:
   2252   case AArch64::STPWi:
   2253   case AArch64::STPSi:
   2254     IsSigned = true;
   2255     Scale = 4;
   2256     break;
   2257 
   2258   case AArch64::LDURXi:
   2259   case AArch64::LDURWi:
   2260   case AArch64::LDURBi:
   2261   case AArch64::LDURHi:
   2262   case AArch64::LDURSi:
   2263   case AArch64::LDURDi:
   2264   case AArch64::LDURQi:
   2265   case AArch64::LDURHHi:
   2266   case AArch64::LDURBBi:
   2267   case AArch64::LDURSBXi:
   2268   case AArch64::LDURSBWi:
   2269   case AArch64::LDURSHXi:
   2270   case AArch64::LDURSHWi:
   2271   case AArch64::LDURSWi:
   2272   case AArch64::STURXi:
   2273   case AArch64::STURWi:
   2274   case AArch64::STURBi:
   2275   case AArch64::STURHi:
   2276   case AArch64::STURSi:
   2277   case AArch64::STURDi:
   2278   case AArch64::STURQi:
   2279   case AArch64::STURBBi:
   2280   case AArch64::STURHHi:
   2281     Scale = 1;
   2282     break;
   2283   }
   2284 
   2285   Offset += MI.getOperand(ImmIdx).getImm() * Scale;
   2286 
   2287   bool useUnscaledOp = false;
   2288   // If the offset doesn't match the scale, we rewrite the instruction to
   2289   // use the unscaled instruction instead. Likewise, if we have a negative
   2290   // offset (and have an unscaled op to use).
   2291   if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
   2292     useUnscaledOp = true;
   2293 
   2294   // Use an unscaled addressing mode if the instruction has a negative offset
   2295   // (or if the instruction is already using an unscaled addressing mode).
   2296   unsigned MaskBits;
   2297   if (IsSigned) {
   2298     // ldp/stp instructions.
   2299     MaskBits = 7;
   2300     Offset /= Scale;
   2301   } else if (UnscaledOp == 0 || useUnscaledOp) {
   2302     MaskBits = 9;
   2303     IsSigned = true;
   2304     Scale = 1;
   2305   } else {
   2306     MaskBits = 12;
   2307     IsSigned = false;
   2308     Offset /= Scale;
   2309   }
   2310 
   2311   // Attempt to fold address computation.
   2312   int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
   2313   int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
   2314   if (Offset >= MinOff && Offset <= MaxOff) {
   2315     if (EmittableOffset)
   2316       *EmittableOffset = Offset;
   2317     Offset = 0;
   2318   } else {
   2319     int NewOff = Offset < 0 ? MinOff : MaxOff;
   2320     if (EmittableOffset)
   2321       *EmittableOffset = NewOff;
   2322     Offset = (Offset - NewOff) * Scale;
   2323   }
   2324   if (OutUseUnscaledOp)
   2325     *OutUseUnscaledOp = useUnscaledOp;
   2326   if (OutUnscaledOp)
   2327     *OutUnscaledOp = UnscaledOp;
   2328   return AArch64FrameOffsetCanUpdate |
   2329          (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
   2330 }
   2331 
   2332 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
   2333                                     unsigned FrameReg, int &Offset,
   2334                                     const AArch64InstrInfo *TII) {
   2335   unsigned Opcode = MI.getOpcode();
   2336   unsigned ImmIdx = FrameRegIdx + 1;
   2337 
   2338   if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
   2339     Offset += MI.getOperand(ImmIdx).getImm();
   2340     emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
   2341                     MI.getOperand(0).getReg(), FrameReg, Offset, TII,
   2342                     MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
   2343     MI.eraseFromParent();
   2344     Offset = 0;
   2345     return true;
   2346   }
   2347 
   2348   int NewOffset;
   2349   unsigned UnscaledOp;
   2350   bool UseUnscaledOp;
   2351   int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
   2352                                          &UnscaledOp, &NewOffset);
   2353   if (Status & AArch64FrameOffsetCanUpdate) {
   2354     if (Status & AArch64FrameOffsetIsLegal)
   2355       // Replace the FrameIndex with FrameReg.
   2356       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
   2357     if (UseUnscaledOp)
   2358       MI.setDesc(TII->get(UnscaledOp));
   2359 
   2360     MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
   2361     return Offset == 0;
   2362   }
   2363 
   2364   return false;
   2365 }
   2366 
   2367 void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
   2368   NopInst.setOpcode(AArch64::HINT);
   2369   NopInst.addOperand(MCOperand::CreateImm(0));
   2370 }
   2371 /// useMachineCombiner - return true when a target supports MachineCombiner
   2372 bool AArch64InstrInfo::useMachineCombiner() const {
   2373   // AArch64 supports the combiner
   2374   return true;
   2375 }
   2376 //
   2377 // True when Opc sets flag
   2378 static bool isCombineInstrSettingFlag(unsigned Opc) {
   2379   switch (Opc) {
   2380   case AArch64::ADDSWrr:
   2381   case AArch64::ADDSWri:
   2382   case AArch64::ADDSXrr:
   2383   case AArch64::ADDSXri:
   2384   case AArch64::SUBSWrr:
   2385   case AArch64::SUBSXrr:
   2386   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
   2387   case AArch64::SUBSWri:
   2388   case AArch64::SUBSXri:
   2389     return true;
   2390   default:
   2391     break;
   2392   }
   2393   return false;
   2394 }
   2395 //
   2396 // 32b Opcodes that can be combined with a MUL
   2397 static bool isCombineInstrCandidate32(unsigned Opc) {
   2398   switch (Opc) {
   2399   case AArch64::ADDWrr:
   2400   case AArch64::ADDWri:
   2401   case AArch64::SUBWrr:
   2402   case AArch64::ADDSWrr:
   2403   case AArch64::ADDSWri:
   2404   case AArch64::SUBSWrr:
   2405   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
   2406   case AArch64::SUBWri:
   2407   case AArch64::SUBSWri:
   2408     return true;
   2409   default:
   2410     break;
   2411   }
   2412   return false;
   2413 }
   2414 //
   2415 // 64b Opcodes that can be combined with a MUL
   2416 static bool isCombineInstrCandidate64(unsigned Opc) {
   2417   switch (Opc) {
   2418   case AArch64::ADDXrr:
   2419   case AArch64::ADDXri:
   2420   case AArch64::SUBXrr:
   2421   case AArch64::ADDSXrr:
   2422   case AArch64::ADDSXri:
   2423   case AArch64::SUBSXrr:
   2424   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
   2425   case AArch64::SUBXri:
   2426   case AArch64::SUBSXri:
   2427     return true;
   2428   default:
   2429     break;
   2430   }
   2431   return false;
   2432 }
   2433 //
   2434 // Opcodes that can be combined with a MUL
   2435 static bool isCombineInstrCandidate(unsigned Opc) {
   2436   return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
   2437 }
   2438 
   2439 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
   2440                               unsigned MulOpc, unsigned ZeroReg) {
   2441   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
   2442   MachineInstr *MI = nullptr;
   2443   // We need a virtual register definition.
   2444   if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
   2445     MI = MRI.getUniqueVRegDef(MO.getReg());
   2446   // And it needs to be in the trace (otherwise, it won't have a depth).
   2447   if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != MulOpc)
   2448     return false;
   2449 
   2450   assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
   2451          MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
   2452          MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
   2453 
   2454   // The third input reg must be zero.
   2455   if (MI->getOperand(3).getReg() != ZeroReg)
   2456     return false;
   2457 
   2458   // Must only used by the user we combine with.
   2459   if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
   2460     return false;
   2461 
   2462   return true;
   2463 }
   2464 
   2465 /// hasPattern - return true when there is potentially a faster code sequence
   2466 /// for an instruction chain ending in \p Root. All potential patterns are
   2467 /// listed
   2468 /// in the \p Pattern vector. Pattern should be sorted in priority order since
   2469 /// the pattern evaluator stops checking as soon as it finds a faster sequence.
   2470 
   2471 bool AArch64InstrInfo::hasPattern(
   2472     MachineInstr &Root,
   2473     SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Pattern) const {
   2474   unsigned Opc = Root.getOpcode();
   2475   MachineBasicBlock &MBB = *Root.getParent();
   2476   bool Found = false;
   2477 
   2478   if (!isCombineInstrCandidate(Opc))
   2479     return 0;
   2480   if (isCombineInstrSettingFlag(Opc)) {
   2481     int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
   2482     // When NZCV is live bail out.
   2483     if (Cmp_NZCV == -1)
   2484       return 0;
   2485     unsigned NewOpc = convertFlagSettingOpcode(&Root);
   2486     // When opcode can't change bail out.
   2487     // CHECKME: do we miss any cases for opcode conversion?
   2488     if (NewOpc == Opc)
   2489       return 0;
   2490     Opc = NewOpc;
   2491   }
   2492 
   2493   switch (Opc) {
   2494   default:
   2495     break;
   2496   case AArch64::ADDWrr:
   2497     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
   2498            "ADDWrr does not have register operands");
   2499     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
   2500                           AArch64::WZR)) {
   2501       Pattern.push_back(MachineCombinerPattern::MC_MULADDW_OP1);
   2502       Found = true;
   2503     }
   2504     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
   2505                           AArch64::WZR)) {
   2506       Pattern.push_back(MachineCombinerPattern::MC_MULADDW_OP2);
   2507       Found = true;
   2508     }
   2509     break;
   2510   case AArch64::ADDXrr:
   2511     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
   2512                           AArch64::XZR)) {
   2513       Pattern.push_back(MachineCombinerPattern::MC_MULADDX_OP1);
   2514       Found = true;
   2515     }
   2516     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
   2517                           AArch64::XZR)) {
   2518       Pattern.push_back(MachineCombinerPattern::MC_MULADDX_OP2);
   2519       Found = true;
   2520     }
   2521     break;
   2522   case AArch64::SUBWrr:
   2523     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
   2524                           AArch64::WZR)) {
   2525       Pattern.push_back(MachineCombinerPattern::MC_MULSUBW_OP1);
   2526       Found = true;
   2527     }
   2528     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
   2529                           AArch64::WZR)) {
   2530       Pattern.push_back(MachineCombinerPattern::MC_MULSUBW_OP2);
   2531       Found = true;
   2532     }
   2533     break;
   2534   case AArch64::SUBXrr:
   2535     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
   2536                           AArch64::XZR)) {
   2537       Pattern.push_back(MachineCombinerPattern::MC_MULSUBX_OP1);
   2538       Found = true;
   2539     }
   2540     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
   2541                           AArch64::XZR)) {
   2542       Pattern.push_back(MachineCombinerPattern::MC_MULSUBX_OP2);
   2543       Found = true;
   2544     }
   2545     break;
   2546   case AArch64::ADDWri:
   2547     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
   2548                           AArch64::WZR)) {
   2549       Pattern.push_back(MachineCombinerPattern::MC_MULADDWI_OP1);
   2550       Found = true;
   2551     }
   2552     break;
   2553   case AArch64::ADDXri:
   2554     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
   2555                           AArch64::XZR)) {
   2556       Pattern.push_back(MachineCombinerPattern::MC_MULADDXI_OP1);
   2557       Found = true;
   2558     }
   2559     break;
   2560   case AArch64::SUBWri:
   2561     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
   2562                           AArch64::WZR)) {
   2563       Pattern.push_back(MachineCombinerPattern::MC_MULSUBWI_OP1);
   2564       Found = true;
   2565     }
   2566     break;
   2567   case AArch64::SUBXri:
   2568     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
   2569                           AArch64::XZR)) {
   2570       Pattern.push_back(MachineCombinerPattern::MC_MULSUBXI_OP1);
   2571       Found = true;
   2572     }
   2573     break;
   2574   }
   2575   return Found;
   2576 }
   2577 
   2578 /// genMadd - Generate madd instruction and combine mul and add.
   2579 /// Example:
   2580 ///  MUL I=A,B,0
   2581 ///  ADD R,I,C
   2582 ///  ==> MADD R,A,B,C
   2583 /// \param Root is the ADD instruction
   2584 /// \param [out] InsInstrs is a vector of machine instructions and will
   2585 /// contain the generated madd instruction
   2586 /// \param IdxMulOpd is index of operand in Root that is the result of
   2587 /// the MUL. In the example above IdxMulOpd is 1.
   2588 /// \param MaddOpc the opcode fo the madd instruction
   2589 static MachineInstr *genMadd(MachineFunction &MF, MachineRegisterInfo &MRI,
   2590                              const TargetInstrInfo *TII, MachineInstr &Root,
   2591                              SmallVectorImpl<MachineInstr *> &InsInstrs,
   2592                              unsigned IdxMulOpd, unsigned MaddOpc,
   2593                              const TargetRegisterClass *RC) {
   2594   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
   2595 
   2596   unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
   2597   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
   2598   unsigned ResultReg = Root.getOperand(0).getReg();
   2599   unsigned SrcReg0 = MUL->getOperand(1).getReg();
   2600   bool Src0IsKill = MUL->getOperand(1).isKill();
   2601   unsigned SrcReg1 = MUL->getOperand(2).getReg();
   2602   bool Src1IsKill = MUL->getOperand(2).isKill();
   2603   unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
   2604   bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
   2605 
   2606   if (TargetRegisterInfo::isVirtualRegister(ResultReg))
   2607     MRI.constrainRegClass(ResultReg, RC);
   2608   if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
   2609     MRI.constrainRegClass(SrcReg0, RC);
   2610   if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
   2611     MRI.constrainRegClass(SrcReg1, RC);
   2612   if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
   2613     MRI.constrainRegClass(SrcReg2, RC);
   2614 
   2615   MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc),
   2616                                     ResultReg)
   2617                                 .addReg(SrcReg0, getKillRegState(Src0IsKill))
   2618                                 .addReg(SrcReg1, getKillRegState(Src1IsKill))
   2619                                 .addReg(SrcReg2, getKillRegState(Src2IsKill));
   2620   // Insert the MADD
   2621   InsInstrs.push_back(MIB);
   2622   return MUL;
   2623 }
   2624 
   2625 /// genMaddR - Generate madd instruction and combine mul and add using
   2626 /// an extra virtual register
   2627 /// Example - an ADD intermediate needs to be stored in a register:
   2628 ///   MUL I=A,B,0
   2629 ///   ADD R,I,Imm
   2630 ///   ==> ORR  V, ZR, Imm
   2631 ///   ==> MADD R,A,B,V
   2632 /// \param Root is the ADD instruction
   2633 /// \param [out] InsInstrs is a vector of machine instructions and will
   2634 /// contain the generated madd instruction
   2635 /// \param IdxMulOpd is index of operand in Root that is the result of
   2636 /// the MUL. In the example above IdxMulOpd is 1.
   2637 /// \param MaddOpc the opcode fo the madd instruction
   2638 /// \param VR is a virtual register that holds the value of an ADD operand
   2639 /// (V in the example above).
   2640 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
   2641                               const TargetInstrInfo *TII, MachineInstr &Root,
   2642                               SmallVectorImpl<MachineInstr *> &InsInstrs,
   2643                               unsigned IdxMulOpd, unsigned MaddOpc,
   2644                               unsigned VR, const TargetRegisterClass *RC) {
   2645   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
   2646 
   2647   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
   2648   unsigned ResultReg = Root.getOperand(0).getReg();
   2649   unsigned SrcReg0 = MUL->getOperand(1).getReg();
   2650   bool Src0IsKill = MUL->getOperand(1).isKill();
   2651   unsigned SrcReg1 = MUL->getOperand(2).getReg();
   2652   bool Src1IsKill = MUL->getOperand(2).isKill();
   2653 
   2654   if (TargetRegisterInfo::isVirtualRegister(ResultReg))
   2655     MRI.constrainRegClass(ResultReg, RC);
   2656   if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
   2657     MRI.constrainRegClass(SrcReg0, RC);
   2658   if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
   2659     MRI.constrainRegClass(SrcReg1, RC);
   2660   if (TargetRegisterInfo::isVirtualRegister(VR))
   2661     MRI.constrainRegClass(VR, RC);
   2662 
   2663   MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc),
   2664                                     ResultReg)
   2665                                 .addReg(SrcReg0, getKillRegState(Src0IsKill))
   2666                                 .addReg(SrcReg1, getKillRegState(Src1IsKill))
   2667                                 .addReg(VR);
   2668   // Insert the MADD
   2669   InsInstrs.push_back(MIB);
   2670   return MUL;
   2671 }
   2672 
   2673 /// genAlternativeCodeSequence - when hasPattern() finds a pattern
   2674 /// this function generates the instructions that could replace the
   2675 /// original code sequence
   2676 void AArch64InstrInfo::genAlternativeCodeSequence(
   2677     MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern,
   2678     SmallVectorImpl<MachineInstr *> &InsInstrs,
   2679     SmallVectorImpl<MachineInstr *> &DelInstrs,
   2680     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
   2681   MachineBasicBlock &MBB = *Root.getParent();
   2682   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
   2683   MachineFunction &MF = *MBB.getParent();
   2684   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
   2685 
   2686   MachineInstr *MUL;
   2687   const TargetRegisterClass *RC;
   2688   unsigned Opc;
   2689   switch (Pattern) {
   2690   default:
   2691     // signal error.
   2692     break;
   2693   case MachineCombinerPattern::MC_MULADDW_OP1:
   2694   case MachineCombinerPattern::MC_MULADDX_OP1:
   2695     // MUL I=A,B,0
   2696     // ADD R,I,C
   2697     // ==> MADD R,A,B,C
   2698     // --- Create(MADD);
   2699     if (Pattern == MachineCombinerPattern::MC_MULADDW_OP1) {
   2700       Opc = AArch64::MADDWrrr;
   2701       RC = &AArch64::GPR32RegClass;
   2702     } else {
   2703       Opc = AArch64::MADDXrrr;
   2704       RC = &AArch64::GPR64RegClass;
   2705     }
   2706     MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   2707     break;
   2708   case MachineCombinerPattern::MC_MULADDW_OP2:
   2709   case MachineCombinerPattern::MC_MULADDX_OP2:
   2710     // MUL I=A,B,0
   2711     // ADD R,C,I
   2712     // ==> MADD R,A,B,C
   2713     // --- Create(MADD);
   2714     if (Pattern == MachineCombinerPattern::MC_MULADDW_OP2) {
   2715       Opc = AArch64::MADDWrrr;
   2716       RC = &AArch64::GPR32RegClass;
   2717     } else {
   2718       Opc = AArch64::MADDXrrr;
   2719       RC = &AArch64::GPR64RegClass;
   2720     }
   2721     MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   2722     break;
   2723   case MachineCombinerPattern::MC_MULADDWI_OP1:
   2724   case MachineCombinerPattern::MC_MULADDXI_OP1: {
   2725     // MUL I=A,B,0
   2726     // ADD R,I,Imm
   2727     // ==> ORR  V, ZR, Imm
   2728     // ==> MADD R,A,B,V
   2729     // --- Create(MADD);
   2730     const TargetRegisterClass *OrrRC;
   2731     unsigned BitSize, OrrOpc, ZeroReg;
   2732     if (Pattern == MachineCombinerPattern::MC_MULADDWI_OP1) {
   2733       OrrOpc = AArch64::ORRWri;
   2734       OrrRC = &AArch64::GPR32spRegClass;
   2735       BitSize = 32;
   2736       ZeroReg = AArch64::WZR;
   2737       Opc = AArch64::MADDWrrr;
   2738       RC = &AArch64::GPR32RegClass;
   2739     } else {
   2740       OrrOpc = AArch64::ORRXri;
   2741       OrrRC = &AArch64::GPR64spRegClass;
   2742       BitSize = 64;
   2743       ZeroReg = AArch64::XZR;
   2744       Opc = AArch64::MADDXrrr;
   2745       RC = &AArch64::GPR64RegClass;
   2746     }
   2747     unsigned NewVR = MRI.createVirtualRegister(OrrRC);
   2748     uint64_t Imm = Root.getOperand(2).getImm();
   2749 
   2750     if (Root.getOperand(3).isImm()) {
   2751       unsigned Val = Root.getOperand(3).getImm();
   2752       Imm = Imm << Val;
   2753     }
   2754     uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
   2755     uint64_t Encoding;
   2756     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
   2757       MachineInstrBuilder MIB1 =
   2758           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
   2759               .addReg(ZeroReg)
   2760               .addImm(Encoding);
   2761       InsInstrs.push_back(MIB1);
   2762       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
   2763       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
   2764     }
   2765     break;
   2766   }
   2767   case MachineCombinerPattern::MC_MULSUBW_OP1:
   2768   case MachineCombinerPattern::MC_MULSUBX_OP1: {
   2769     // MUL I=A,B,0
   2770     // SUB R,I, C
   2771     // ==> SUB  V, 0, C
   2772     // ==> MADD R,A,B,V // = -C + A*B
   2773     // --- Create(MADD);
   2774     const TargetRegisterClass *SubRC;
   2775     unsigned SubOpc, ZeroReg;
   2776     if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP1) {
   2777       SubOpc = AArch64::SUBWrr;
   2778       SubRC = &AArch64::GPR32spRegClass;
   2779       ZeroReg = AArch64::WZR;
   2780       Opc = AArch64::MADDWrrr;
   2781       RC = &AArch64::GPR32RegClass;
   2782     } else {
   2783       SubOpc = AArch64::SUBXrr;
   2784       SubRC = &AArch64::GPR64spRegClass;
   2785       ZeroReg = AArch64::XZR;
   2786       Opc = AArch64::MADDXrrr;
   2787       RC = &AArch64::GPR64RegClass;
   2788     }
   2789     unsigned NewVR = MRI.createVirtualRegister(SubRC);
   2790     // SUB NewVR, 0, C
   2791     MachineInstrBuilder MIB1 =
   2792         BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
   2793             .addReg(ZeroReg)
   2794             .addOperand(Root.getOperand(2));
   2795     InsInstrs.push_back(MIB1);
   2796     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
   2797     MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
   2798     break;
   2799   }
   2800   case MachineCombinerPattern::MC_MULSUBW_OP2:
   2801   case MachineCombinerPattern::MC_MULSUBX_OP2:
   2802     // MUL I=A,B,0
   2803     // SUB R,C,I
   2804     // ==> MSUB R,A,B,C (computes C - A*B)
   2805     // --- Create(MSUB);
   2806     if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP2) {
   2807       Opc = AArch64::MSUBWrrr;
   2808       RC = &AArch64::GPR32RegClass;
   2809     } else {
   2810       Opc = AArch64::MSUBXrrr;
   2811       RC = &AArch64::GPR64RegClass;
   2812     }
   2813     MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   2814     break;
   2815   case MachineCombinerPattern::MC_MULSUBWI_OP1:
   2816   case MachineCombinerPattern::MC_MULSUBXI_OP1: {
   2817     // MUL I=A,B,0
   2818     // SUB R,I, Imm
   2819     // ==> ORR  V, ZR, -Imm
   2820     // ==> MADD R,A,B,V // = -Imm + A*B
   2821     // --- Create(MADD);
   2822     const TargetRegisterClass *OrrRC;
   2823     unsigned BitSize, OrrOpc, ZeroReg;
   2824     if (Pattern == MachineCombinerPattern::MC_MULSUBWI_OP1) {
   2825       OrrOpc = AArch64::ORRWri;
   2826       OrrRC = &AArch64::GPR32spRegClass;
   2827       BitSize = 32;
   2828       ZeroReg = AArch64::WZR;
   2829       Opc = AArch64::MADDWrrr;
   2830       RC = &AArch64::GPR32RegClass;
   2831     } else {
   2832       OrrOpc = AArch64::ORRXri;
   2833       OrrRC = &AArch64::GPR64spRegClass;
   2834       BitSize = 64;
   2835       ZeroReg = AArch64::XZR;
   2836       Opc = AArch64::MADDXrrr;
   2837       RC = &AArch64::GPR64RegClass;
   2838     }
   2839     unsigned NewVR = MRI.createVirtualRegister(OrrRC);
   2840     int Imm = Root.getOperand(2).getImm();
   2841     if (Root.getOperand(3).isImm()) {
   2842       unsigned Val = Root.getOperand(3).getImm();
   2843       Imm = Imm << Val;
   2844     }
   2845     uint64_t UImm = -Imm << (64 - BitSize) >> (64 - BitSize);
   2846     uint64_t Encoding;
   2847     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
   2848       MachineInstrBuilder MIB1 =
   2849           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
   2850               .addReg(ZeroReg)
   2851               .addImm(Encoding);
   2852       InsInstrs.push_back(MIB1);
   2853       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
   2854       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
   2855     }
   2856     break;
   2857   }
   2858   } // end switch (Pattern)
   2859   // Record MUL and ADD/SUB for deletion
   2860   DelInstrs.push_back(MUL);
   2861   DelInstrs.push_back(&Root);
   2862 
   2863   return;
   2864 }
   2865 
   2866 /// \brief Replace csincr-branch sequence by simple conditional branch
   2867 ///
   2868 /// Examples:
   2869 /// 1.
   2870 ///   csinc  w9, wzr, wzr, <condition code>
   2871 ///   tbnz   w9, #0, 0x44
   2872 /// to
   2873 ///   b.<inverted condition code>
   2874 ///
   2875 /// 2.
   2876 ///   csinc w9, wzr, wzr, <condition code>
   2877 ///   tbz   w9, #0, 0x44
   2878 /// to
   2879 ///   b.<condition code>
   2880 ///
   2881 /// \param  MI Conditional Branch
   2882 /// \return True when the simple conditional branch is generated
   2883 ///
   2884 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr *MI) const {
   2885   bool IsNegativeBranch = false;
   2886   bool IsTestAndBranch = false;
   2887   unsigned TargetBBInMI = 0;
   2888   switch (MI->getOpcode()) {
   2889   default:
   2890     llvm_unreachable("Unknown branch instruction?");
   2891   case AArch64::Bcc:
   2892     return false;
   2893   case AArch64::CBZW:
   2894   case AArch64::CBZX:
   2895     TargetBBInMI = 1;
   2896     break;
   2897   case AArch64::CBNZW:
   2898   case AArch64::CBNZX:
   2899     TargetBBInMI = 1;
   2900     IsNegativeBranch = true;
   2901     break;
   2902   case AArch64::TBZW:
   2903   case AArch64::TBZX:
   2904     TargetBBInMI = 2;
   2905     IsTestAndBranch = true;
   2906     break;
   2907   case AArch64::TBNZW:
   2908   case AArch64::TBNZX:
   2909     TargetBBInMI = 2;
   2910     IsNegativeBranch = true;
   2911     IsTestAndBranch = true;
   2912     break;
   2913   }
   2914   // So we increment a zero register and test for bits other
   2915   // than bit 0? Conservatively bail out in case the verifier
   2916   // missed this case.
   2917   if (IsTestAndBranch && MI->getOperand(1).getImm())
   2918     return false;
   2919 
   2920   // Find Definition.
   2921   assert(MI->getParent() && "Incomplete machine instruciton\n");
   2922   MachineBasicBlock *MBB = MI->getParent();
   2923   MachineFunction *MF = MBB->getParent();
   2924   MachineRegisterInfo *MRI = &MF->getRegInfo();
   2925   unsigned VReg = MI->getOperand(0).getReg();
   2926   if (!TargetRegisterInfo::isVirtualRegister(VReg))
   2927     return false;
   2928 
   2929   MachineInstr *DefMI = MRI->getVRegDef(VReg);
   2930 
   2931   // Look for CSINC
   2932   if (!(DefMI->getOpcode() == AArch64::CSINCWr &&
   2933         DefMI->getOperand(1).getReg() == AArch64::WZR &&
   2934         DefMI->getOperand(2).getReg() == AArch64::WZR) &&
   2935       !(DefMI->getOpcode() == AArch64::CSINCXr &&
   2936         DefMI->getOperand(1).getReg() == AArch64::XZR &&
   2937         DefMI->getOperand(2).getReg() == AArch64::XZR))
   2938     return false;
   2939 
   2940   if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
   2941     return false;
   2942 
   2943   AArch64CC::CondCode CC =
   2944       (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
   2945   bool CheckOnlyCCWrites = true;
   2946   // Convert only when the condition code is not modified between
   2947   // the CSINC and the branch. The CC may be used by other
   2948   // instructions in between.
   2949   if (modifiesConditionCode(DefMI, MI, CheckOnlyCCWrites, &getRegisterInfo()))
   2950     return false;
   2951   MachineBasicBlock &RefToMBB = *MBB;
   2952   MachineBasicBlock *TBB = MI->getOperand(TargetBBInMI).getMBB();
   2953   DebugLoc DL = MI->getDebugLoc();
   2954   if (IsNegativeBranch)
   2955     CC = AArch64CC::getInvertedCondCode(CC);
   2956   BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
   2957   MI->eraseFromParent();
   2958   return true;
   2959 }
   2960