Home | History | Annotate | Download | only in AArch64
      1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains the AArch64 implementation of the TargetInstrInfo class.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "AArch64InstrInfo.h"
     15 #include "AArch64Subtarget.h"
     16 #include "MCTargetDesc/AArch64AddressingModes.h"
     17 #include "llvm/CodeGen/MachineFrameInfo.h"
     18 #include "llvm/CodeGen/MachineInstrBuilder.h"
     19 #include "llvm/CodeGen/MachineMemOperand.h"
     20 #include "llvm/CodeGen/MachineRegisterInfo.h"
     21 #include "llvm/CodeGen/PseudoSourceValue.h"
     22 #include "llvm/MC/MCInst.h"
     23 #include "llvm/Support/ErrorHandling.h"
     24 #include "llvm/Support/TargetRegistry.h"
     25 
     26 using namespace llvm;
     27 
     28 #define GET_INSTRINFO_CTOR_DTOR
     29 #include "AArch64GenInstrInfo.inc"
     30 
     31 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
     32     : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
     33       RI(STI.getTargetTriple()), Subtarget(STI) {}
     34 
     35 /// GetInstSize - Return the number of bytes of code the specified
     36 /// instruction may be.  This returns the maximum number of bytes.
     37 unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
     38   const MachineBasicBlock &MBB = *MI->getParent();
     39   const MachineFunction *MF = MBB.getParent();
     40   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
     41 
     42   if (MI->getOpcode() == AArch64::INLINEASM)
     43     return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
     44 
     45   const MCInstrDesc &Desc = MI->getDesc();
     46   switch (Desc.getOpcode()) {
     47   default:
     48     // Anything not explicitly designated otherwise is a nomal 4-byte insn.
     49     return 4;
     50   case TargetOpcode::DBG_VALUE:
     51   case TargetOpcode::EH_LABEL:
     52   case TargetOpcode::IMPLICIT_DEF:
     53   case TargetOpcode::KILL:
     54     return 0;
     55   }
     56 
     57   llvm_unreachable("GetInstSizeInBytes()- Unable to determin insn size");
     58 }
     59 
     60 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
     61                             SmallVectorImpl<MachineOperand> &Cond) {
     62   // Block ends with fall-through condbranch.
     63   switch (LastInst->getOpcode()) {
     64   default:
     65     llvm_unreachable("Unknown branch instruction?");
     66   case AArch64::Bcc:
     67     Target = LastInst->getOperand(1).getMBB();
     68     Cond.push_back(LastInst->getOperand(0));
     69     break;
     70   case AArch64::CBZW:
     71   case AArch64::CBZX:
     72   case AArch64::CBNZW:
     73   case AArch64::CBNZX:
     74     Target = LastInst->getOperand(1).getMBB();
     75     Cond.push_back(MachineOperand::CreateImm(-1));
     76     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
     77     Cond.push_back(LastInst->getOperand(0));
     78     break;
     79   case AArch64::TBZW:
     80   case AArch64::TBZX:
     81   case AArch64::TBNZW:
     82   case AArch64::TBNZX:
     83     Target = LastInst->getOperand(2).getMBB();
     84     Cond.push_back(MachineOperand::CreateImm(-1));
     85     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
     86     Cond.push_back(LastInst->getOperand(0));
     87     Cond.push_back(LastInst->getOperand(1));
     88   }
     89 }
     90 
     91 // Branch analysis.
     92 bool AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
     93                                    MachineBasicBlock *&TBB,
     94                                    MachineBasicBlock *&FBB,
     95                                    SmallVectorImpl<MachineOperand> &Cond,
     96                                    bool AllowModify) const {
     97   // If the block has no terminators, it just falls into the block after it.
     98   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
     99   if (I == MBB.end())
    100     return false;
    101 
    102   if (!isUnpredicatedTerminator(I))
    103     return false;
    104 
    105   // Get the last instruction in the block.
    106   MachineInstr *LastInst = I;
    107 
    108   // If there is only one terminator instruction, process it.
    109   unsigned LastOpc = LastInst->getOpcode();
    110   if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
    111     if (isUncondBranchOpcode(LastOpc)) {
    112       TBB = LastInst->getOperand(0).getMBB();
    113       return false;
    114     }
    115     if (isCondBranchOpcode(LastOpc)) {
    116       // Block ends with fall-through condbranch.
    117       parseCondBranch(LastInst, TBB, Cond);
    118       return false;
    119     }
    120     return true; // Can't handle indirect branch.
    121   }
    122 
    123   // Get the instruction before it if it is a terminator.
    124   MachineInstr *SecondLastInst = I;
    125   unsigned SecondLastOpc = SecondLastInst->getOpcode();
    126 
    127   // If AllowModify is true and the block ends with two or more unconditional
    128   // branches, delete all but the first unconditional branch.
    129   if (AllowModify && isUncondBranchOpcode(LastOpc)) {
    130     while (isUncondBranchOpcode(SecondLastOpc)) {
    131       LastInst->eraseFromParent();
    132       LastInst = SecondLastInst;
    133       LastOpc = LastInst->getOpcode();
    134       if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
    135         // Return now the only terminator is an unconditional branch.
    136         TBB = LastInst->getOperand(0).getMBB();
    137         return false;
    138       } else {
    139         SecondLastInst = I;
    140         SecondLastOpc = SecondLastInst->getOpcode();
    141       }
    142     }
    143   }
    144 
    145   // If there are three terminators, we don't know what sort of block this is.
    146   if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
    147     return true;
    148 
    149   // If the block ends with a B and a Bcc, handle it.
    150   if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
    151     parseCondBranch(SecondLastInst, TBB, Cond);
    152     FBB = LastInst->getOperand(0).getMBB();
    153     return false;
    154   }
    155 
    156   // If the block ends with two unconditional branches, handle it.  The second
    157   // one is not executed, so remove it.
    158   if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
    159     TBB = SecondLastInst->getOperand(0).getMBB();
    160     I = LastInst;
    161     if (AllowModify)
    162       I->eraseFromParent();
    163     return false;
    164   }
    165 
    166   // ...likewise if it ends with an indirect branch followed by an unconditional
    167   // branch.
    168   if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
    169     I = LastInst;
    170     if (AllowModify)
    171       I->eraseFromParent();
    172     return true;
    173   }
    174 
    175   // Otherwise, can't handle this.
    176   return true;
    177 }
    178 
    179 bool AArch64InstrInfo::ReverseBranchCondition(
    180     SmallVectorImpl<MachineOperand> &Cond) const {
    181   if (Cond[0].getImm() != -1) {
    182     // Regular Bcc
    183     AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
    184     Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
    185   } else {
    186     // Folded compare-and-branch
    187     switch (Cond[1].getImm()) {
    188     default:
    189       llvm_unreachable("Unknown conditional branch!");
    190     case AArch64::CBZW:
    191       Cond[1].setImm(AArch64::CBNZW);
    192       break;
    193     case AArch64::CBNZW:
    194       Cond[1].setImm(AArch64::CBZW);
    195       break;
    196     case AArch64::CBZX:
    197       Cond[1].setImm(AArch64::CBNZX);
    198       break;
    199     case AArch64::CBNZX:
    200       Cond[1].setImm(AArch64::CBZX);
    201       break;
    202     case AArch64::TBZW:
    203       Cond[1].setImm(AArch64::TBNZW);
    204       break;
    205     case AArch64::TBNZW:
    206       Cond[1].setImm(AArch64::TBZW);
    207       break;
    208     case AArch64::TBZX:
    209       Cond[1].setImm(AArch64::TBNZX);
    210       break;
    211     case AArch64::TBNZX:
    212       Cond[1].setImm(AArch64::TBZX);
    213       break;
    214     }
    215   }
    216 
    217   return false;
    218 }
    219 
    220 unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
    221   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
    222   if (I == MBB.end())
    223     return 0;
    224 
    225   if (!isUncondBranchOpcode(I->getOpcode()) &&
    226       !isCondBranchOpcode(I->getOpcode()))
    227     return 0;
    228 
    229   // Remove the branch.
    230   I->eraseFromParent();
    231 
    232   I = MBB.end();
    233 
    234   if (I == MBB.begin())
    235     return 1;
    236   --I;
    237   if (!isCondBranchOpcode(I->getOpcode()))
    238     return 1;
    239 
    240   // Remove the branch.
    241   I->eraseFromParent();
    242   return 2;
    243 }
    244 
    245 void AArch64InstrInfo::instantiateCondBranch(
    246     MachineBasicBlock &MBB, DebugLoc DL, MachineBasicBlock *TBB,
    247     ArrayRef<MachineOperand> Cond) const {
    248   if (Cond[0].getImm() != -1) {
    249     // Regular Bcc
    250     BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
    251   } else {
    252     // Folded compare-and-branch
    253     // Note that we use addOperand instead of addReg to keep the flags.
    254     const MachineInstrBuilder MIB =
    255         BuildMI(&MBB, DL, get(Cond[1].getImm())).addOperand(Cond[2]);
    256     if (Cond.size() > 3)
    257       MIB.addImm(Cond[3].getImm());
    258     MIB.addMBB(TBB);
    259   }
    260 }
    261 
    262 unsigned AArch64InstrInfo::InsertBranch(
    263     MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
    264     ArrayRef<MachineOperand> Cond, DebugLoc DL) const {
    265   // Shouldn't be a fall through.
    266   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
    267 
    268   if (!FBB) {
    269     if (Cond.empty()) // Unconditional branch?
    270       BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
    271     else
    272       instantiateCondBranch(MBB, DL, TBB, Cond);
    273     return 1;
    274   }
    275 
    276   // Two-way conditional branch.
    277   instantiateCondBranch(MBB, DL, TBB, Cond);
    278   BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
    279   return 2;
    280 }
    281 
    282 // Find the original register that VReg is copied from.
    283 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
    284   while (TargetRegisterInfo::isVirtualRegister(VReg)) {
    285     const MachineInstr *DefMI = MRI.getVRegDef(VReg);
    286     if (!DefMI->isFullCopy())
    287       return VReg;
    288     VReg = DefMI->getOperand(1).getReg();
    289   }
    290   return VReg;
    291 }
    292 
    293 // Determine if VReg is defined by an instruction that can be folded into a
    294 // csel instruction. If so, return the folded opcode, and the replacement
    295 // register.
    296 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
    297                                 unsigned *NewVReg = nullptr) {
    298   VReg = removeCopies(MRI, VReg);
    299   if (!TargetRegisterInfo::isVirtualRegister(VReg))
    300     return 0;
    301 
    302   bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
    303   const MachineInstr *DefMI = MRI.getVRegDef(VReg);
    304   unsigned Opc = 0;
    305   unsigned SrcOpNum = 0;
    306   switch (DefMI->getOpcode()) {
    307   case AArch64::ADDSXri:
    308   case AArch64::ADDSWri:
    309     // if NZCV is used, do not fold.
    310     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
    311       return 0;
    312   // fall-through to ADDXri and ADDWri.
    313   case AArch64::ADDXri:
    314   case AArch64::ADDWri:
    315     // add x, 1 -> csinc.
    316     if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
    317         DefMI->getOperand(3).getImm() != 0)
    318       return 0;
    319     SrcOpNum = 1;
    320     Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
    321     break;
    322 
    323   case AArch64::ORNXrr:
    324   case AArch64::ORNWrr: {
    325     // not x -> csinv, represented as orn dst, xzr, src.
    326     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
    327     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
    328       return 0;
    329     SrcOpNum = 2;
    330     Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
    331     break;
    332   }
    333 
    334   case AArch64::SUBSXrr:
    335   case AArch64::SUBSWrr:
    336     // if NZCV is used, do not fold.
    337     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
    338       return 0;
    339   // fall-through to SUBXrr and SUBWrr.
    340   case AArch64::SUBXrr:
    341   case AArch64::SUBWrr: {
    342     // neg x -> csneg, represented as sub dst, xzr, src.
    343     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
    344     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
    345       return 0;
    346     SrcOpNum = 2;
    347     Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
    348     break;
    349   }
    350   default:
    351     return 0;
    352   }
    353   assert(Opc && SrcOpNum && "Missing parameters");
    354 
    355   if (NewVReg)
    356     *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
    357   return Opc;
    358 }
    359 
    360 bool AArch64InstrInfo::canInsertSelect(
    361     const MachineBasicBlock &MBB, ArrayRef<MachineOperand> Cond,
    362     unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles,
    363     int &FalseCycles) const {
    364   // Check register classes.
    365   const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
    366   const TargetRegisterClass *RC =
    367       RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
    368   if (!RC)
    369     return false;
    370 
    371   // Expanding cbz/tbz requires an extra cycle of latency on the condition.
    372   unsigned ExtraCondLat = Cond.size() != 1;
    373 
    374   // GPRs are handled by csel.
    375   // FIXME: Fold in x+1, -x, and ~x when applicable.
    376   if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
    377       AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
    378     // Single-cycle csel, csinc, csinv, and csneg.
    379     CondCycles = 1 + ExtraCondLat;
    380     TrueCycles = FalseCycles = 1;
    381     if (canFoldIntoCSel(MRI, TrueReg))
    382       TrueCycles = 0;
    383     else if (canFoldIntoCSel(MRI, FalseReg))
    384       FalseCycles = 0;
    385     return true;
    386   }
    387 
    388   // Scalar floating point is handled by fcsel.
    389   // FIXME: Form fabs, fmin, and fmax when applicable.
    390   if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
    391       AArch64::FPR32RegClass.hasSubClassEq(RC)) {
    392     CondCycles = 5 + ExtraCondLat;
    393     TrueCycles = FalseCycles = 2;
    394     return true;
    395   }
    396 
    397   // Can't do vectors.
    398   return false;
    399 }
    400 
    401 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
    402                                     MachineBasicBlock::iterator I, DebugLoc DL,
    403                                     unsigned DstReg,
    404                                     ArrayRef<MachineOperand> Cond,
    405                                     unsigned TrueReg, unsigned FalseReg) const {
    406   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
    407 
    408   // Parse the condition code, see parseCondBranch() above.
    409   AArch64CC::CondCode CC;
    410   switch (Cond.size()) {
    411   default:
    412     llvm_unreachable("Unknown condition opcode in Cond");
    413   case 1: // b.cc
    414     CC = AArch64CC::CondCode(Cond[0].getImm());
    415     break;
    416   case 3: { // cbz/cbnz
    417     // We must insert a compare against 0.
    418     bool Is64Bit;
    419     switch (Cond[1].getImm()) {
    420     default:
    421       llvm_unreachable("Unknown branch opcode in Cond");
    422     case AArch64::CBZW:
    423       Is64Bit = 0;
    424       CC = AArch64CC::EQ;
    425       break;
    426     case AArch64::CBZX:
    427       Is64Bit = 1;
    428       CC = AArch64CC::EQ;
    429       break;
    430     case AArch64::CBNZW:
    431       Is64Bit = 0;
    432       CC = AArch64CC::NE;
    433       break;
    434     case AArch64::CBNZX:
    435       Is64Bit = 1;
    436       CC = AArch64CC::NE;
    437       break;
    438     }
    439     unsigned SrcReg = Cond[2].getReg();
    440     if (Is64Bit) {
    441       // cmp reg, #0 is actually subs xzr, reg, #0.
    442       MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
    443       BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
    444           .addReg(SrcReg)
    445           .addImm(0)
    446           .addImm(0);
    447     } else {
    448       MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
    449       BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
    450           .addReg(SrcReg)
    451           .addImm(0)
    452           .addImm(0);
    453     }
    454     break;
    455   }
    456   case 4: { // tbz/tbnz
    457     // We must insert a tst instruction.
    458     switch (Cond[1].getImm()) {
    459     default:
    460       llvm_unreachable("Unknown branch opcode in Cond");
    461     case AArch64::TBZW:
    462     case AArch64::TBZX:
    463       CC = AArch64CC::EQ;
    464       break;
    465     case AArch64::TBNZW:
    466     case AArch64::TBNZX:
    467       CC = AArch64CC::NE;
    468       break;
    469     }
    470     // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
    471     if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
    472       BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
    473           .addReg(Cond[2].getReg())
    474           .addImm(
    475               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
    476     else
    477       BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
    478           .addReg(Cond[2].getReg())
    479           .addImm(
    480               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
    481     break;
    482   }
    483   }
    484 
    485   unsigned Opc = 0;
    486   const TargetRegisterClass *RC = nullptr;
    487   bool TryFold = false;
    488   if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
    489     RC = &AArch64::GPR64RegClass;
    490     Opc = AArch64::CSELXr;
    491     TryFold = true;
    492   } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
    493     RC = &AArch64::GPR32RegClass;
    494     Opc = AArch64::CSELWr;
    495     TryFold = true;
    496   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
    497     RC = &AArch64::FPR64RegClass;
    498     Opc = AArch64::FCSELDrrr;
    499   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
    500     RC = &AArch64::FPR32RegClass;
    501     Opc = AArch64::FCSELSrrr;
    502   }
    503   assert(RC && "Unsupported regclass");
    504 
    505   // Try folding simple instructions into the csel.
    506   if (TryFold) {
    507     unsigned NewVReg = 0;
    508     unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
    509     if (FoldedOpc) {
    510       // The folded opcodes csinc, csinc and csneg apply the operation to
    511       // FalseReg, so we need to invert the condition.
    512       CC = AArch64CC::getInvertedCondCode(CC);
    513       TrueReg = FalseReg;
    514     } else
    515       FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
    516 
    517     // Fold the operation. Leave any dead instructions for DCE to clean up.
    518     if (FoldedOpc) {
    519       FalseReg = NewVReg;
    520       Opc = FoldedOpc;
    521       // The extends the live range of NewVReg.
    522       MRI.clearKillFlags(NewVReg);
    523     }
    524   }
    525 
    526   // Pull all virtual register into the appropriate class.
    527   MRI.constrainRegClass(TrueReg, RC);
    528   MRI.constrainRegClass(FalseReg, RC);
    529 
    530   // Insert the csel.
    531   BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm(
    532       CC);
    533 }
    534 
    535 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an  ORRxx.
    536 static bool canBeExpandedToORR(const MachineInstr *MI, unsigned BitSize) {
    537   uint64_t Imm = MI->getOperand(1).getImm();
    538   uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
    539   uint64_t Encoding;
    540   return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
    541 }
    542 
    543 // FIXME: this implementation should be micro-architecture dependent, so a
    544 // micro-architecture target hook should be introduced here in future.
    545 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const {
    546   if (!Subtarget.isCortexA57() && !Subtarget.isCortexA53())
    547     return MI->isAsCheapAsAMove();
    548 
    549   switch (MI->getOpcode()) {
    550   default:
    551     return false;
    552 
    553   // add/sub on register without shift
    554   case AArch64::ADDWri:
    555   case AArch64::ADDXri:
    556   case AArch64::SUBWri:
    557   case AArch64::SUBXri:
    558     return (MI->getOperand(3).getImm() == 0);
    559 
    560   // logical ops on immediate
    561   case AArch64::ANDWri:
    562   case AArch64::ANDXri:
    563   case AArch64::EORWri:
    564   case AArch64::EORXri:
    565   case AArch64::ORRWri:
    566   case AArch64::ORRXri:
    567     return true;
    568 
    569   // logical ops on register without shift
    570   case AArch64::ANDWrr:
    571   case AArch64::ANDXrr:
    572   case AArch64::BICWrr:
    573   case AArch64::BICXrr:
    574   case AArch64::EONWrr:
    575   case AArch64::EONXrr:
    576   case AArch64::EORWrr:
    577   case AArch64::EORXrr:
    578   case AArch64::ORNWrr:
    579   case AArch64::ORNXrr:
    580   case AArch64::ORRWrr:
    581   case AArch64::ORRXrr:
    582     return true;
    583   // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
    584   // ORRXri, it is as cheap as MOV
    585   case AArch64::MOVi32imm:
    586     return canBeExpandedToORR(MI, 32);
    587   case AArch64::MOVi64imm:
    588     return canBeExpandedToORR(MI, 64);
    589   }
    590 
    591   llvm_unreachable("Unknown opcode to check as cheap as a move!");
    592 }
    593 
    594 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
    595                                              unsigned &SrcReg, unsigned &DstReg,
    596                                              unsigned &SubIdx) const {
    597   switch (MI.getOpcode()) {
    598   default:
    599     return false;
    600   case AArch64::SBFMXri: // aka sxtw
    601   case AArch64::UBFMXri: // aka uxtw
    602     // Check for the 32 -> 64 bit extension case, these instructions can do
    603     // much more.
    604     if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
    605       return false;
    606     // This is a signed or unsigned 32 -> 64 bit extension.
    607     SrcReg = MI.getOperand(1).getReg();
    608     DstReg = MI.getOperand(0).getReg();
    609     SubIdx = AArch64::sub_32;
    610     return true;
    611   }
    612 }
    613 
    614 bool
    615 AArch64InstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
    616                                                   MachineInstr *MIb,
    617                                                   AliasAnalysis *AA) const {
    618   const TargetRegisterInfo *TRI = &getRegisterInfo();
    619   unsigned BaseRegA = 0, BaseRegB = 0;
    620   int OffsetA = 0, OffsetB = 0;
    621   int WidthA = 0, WidthB = 0;
    622 
    623   assert(MIa && MIa->mayLoadOrStore() && "MIa must be a load or store.");
    624   assert(MIb && MIb->mayLoadOrStore() && "MIb must be a load or store.");
    625 
    626   if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() ||
    627       MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef())
    628     return false;
    629 
    630   // Retrieve the base register, offset from the base register and width. Width
    631   // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8).  If
    632   // base registers are identical, and the offset of a lower memory access +
    633   // the width doesn't overlap the offset of a higher memory access,
    634   // then the memory accesses are different.
    635   if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
    636       getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
    637     if (BaseRegA == BaseRegB) {
    638       int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
    639       int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
    640       int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
    641       if (LowOffset + LowWidth <= HighOffset)
    642         return true;
    643     }
    644   }
    645   return false;
    646 }
    647 
    648 /// analyzeCompare - For a comparison instruction, return the source registers
    649 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
    650 /// Return true if the comparison instruction can be analyzed.
    651 bool AArch64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
    652                                       unsigned &SrcReg2, int &CmpMask,
    653                                       int &CmpValue) const {
    654   switch (MI->getOpcode()) {
    655   default:
    656     break;
    657   case AArch64::SUBSWrr:
    658   case AArch64::SUBSWrs:
    659   case AArch64::SUBSWrx:
    660   case AArch64::SUBSXrr:
    661   case AArch64::SUBSXrs:
    662   case AArch64::SUBSXrx:
    663   case AArch64::ADDSWrr:
    664   case AArch64::ADDSWrs:
    665   case AArch64::ADDSWrx:
    666   case AArch64::ADDSXrr:
    667   case AArch64::ADDSXrs:
    668   case AArch64::ADDSXrx:
    669     // Replace SUBSWrr with SUBWrr if NZCV is not used.
    670     SrcReg = MI->getOperand(1).getReg();
    671     SrcReg2 = MI->getOperand(2).getReg();
    672     CmpMask = ~0;
    673     CmpValue = 0;
    674     return true;
    675   case AArch64::SUBSWri:
    676   case AArch64::ADDSWri:
    677   case AArch64::SUBSXri:
    678   case AArch64::ADDSXri:
    679     SrcReg = MI->getOperand(1).getReg();
    680     SrcReg2 = 0;
    681     CmpMask = ~0;
    682     // FIXME: In order to convert CmpValue to 0 or 1
    683     CmpValue = (MI->getOperand(2).getImm() != 0);
    684     return true;
    685   case AArch64::ANDSWri:
    686   case AArch64::ANDSXri:
    687     // ANDS does not use the same encoding scheme as the others xxxS
    688     // instructions.
    689     SrcReg = MI->getOperand(1).getReg();
    690     SrcReg2 = 0;
    691     CmpMask = ~0;
    692     // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
    693     // while the type of CmpValue is int. When converting uint64_t to int,
    694     // the high 32 bits of uint64_t will be lost.
    695     // In fact it causes a bug in spec2006-483.xalancbmk
    696     // CmpValue is only used to compare with zero in OptimizeCompareInstr
    697     CmpValue = (AArch64_AM::decodeLogicalImmediate(
    698                     MI->getOperand(2).getImm(),
    699                     MI->getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0);
    700     return true;
    701   }
    702 
    703   return false;
    704 }
    705 
    706 static bool UpdateOperandRegClass(MachineInstr *Instr) {
    707   MachineBasicBlock *MBB = Instr->getParent();
    708   assert(MBB && "Can't get MachineBasicBlock here");
    709   MachineFunction *MF = MBB->getParent();
    710   assert(MF && "Can't get MachineFunction here");
    711   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
    712   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
    713   MachineRegisterInfo *MRI = &MF->getRegInfo();
    714 
    715   for (unsigned OpIdx = 0, EndIdx = Instr->getNumOperands(); OpIdx < EndIdx;
    716        ++OpIdx) {
    717     MachineOperand &MO = Instr->getOperand(OpIdx);
    718     const TargetRegisterClass *OpRegCstraints =
    719         Instr->getRegClassConstraint(OpIdx, TII, TRI);
    720 
    721     // If there's no constraint, there's nothing to do.
    722     if (!OpRegCstraints)
    723       continue;
    724     // If the operand is a frame index, there's nothing to do here.
    725     // A frame index operand will resolve correctly during PEI.
    726     if (MO.isFI())
    727       continue;
    728 
    729     assert(MO.isReg() &&
    730            "Operand has register constraints without being a register!");
    731 
    732     unsigned Reg = MO.getReg();
    733     if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
    734       if (!OpRegCstraints->contains(Reg))
    735         return false;
    736     } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
    737                !MRI->constrainRegClass(Reg, OpRegCstraints))
    738       return false;
    739   }
    740 
    741   return true;
    742 }
    743 
    744 /// \brief Return the opcode that does not set flags when possible - otherwise
    745 /// return the original opcode. The caller is responsible to do the actual
    746 /// substitution and legality checking.
    747 static unsigned convertFlagSettingOpcode(const MachineInstr *MI) {
    748   // Don't convert all compare instructions, because for some the zero register
    749   // encoding becomes the sp register.
    750   bool MIDefinesZeroReg = false;
    751   if (MI->definesRegister(AArch64::WZR) || MI->definesRegister(AArch64::XZR))
    752     MIDefinesZeroReg = true;
    753 
    754   switch (MI->getOpcode()) {
    755   default:
    756     return MI->getOpcode();
    757   case AArch64::ADDSWrr:
    758     return AArch64::ADDWrr;
    759   case AArch64::ADDSWri:
    760     return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
    761   case AArch64::ADDSWrs:
    762     return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
    763   case AArch64::ADDSWrx:
    764     return AArch64::ADDWrx;
    765   case AArch64::ADDSXrr:
    766     return AArch64::ADDXrr;
    767   case AArch64::ADDSXri:
    768     return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
    769   case AArch64::ADDSXrs:
    770     return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
    771   case AArch64::ADDSXrx:
    772     return AArch64::ADDXrx;
    773   case AArch64::SUBSWrr:
    774     return AArch64::SUBWrr;
    775   case AArch64::SUBSWri:
    776     return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
    777   case AArch64::SUBSWrs:
    778     return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
    779   case AArch64::SUBSWrx:
    780     return AArch64::SUBWrx;
    781   case AArch64::SUBSXrr:
    782     return AArch64::SUBXrr;
    783   case AArch64::SUBSXri:
    784     return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
    785   case AArch64::SUBSXrs:
    786     return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
    787   case AArch64::SUBSXrx:
    788     return AArch64::SUBXrx;
    789   }
    790 }
    791 
    792 /// True when condition code could be modified on the instruction
    793 /// trace starting at from and ending at to.
    794 static bool modifiesConditionCode(MachineInstr *From, MachineInstr *To,
    795                                   const bool CheckOnlyCCWrites,
    796                                   const TargetRegisterInfo *TRI) {
    797   // We iterate backward starting \p To until we hit \p From
    798   MachineBasicBlock::iterator I = To, E = From, B = To->getParent()->begin();
    799 
    800   // Early exit if To is at the beginning of the BB.
    801   if (I == B)
    802     return true;
    803 
    804   // Check whether the definition of SrcReg is in the same basic block as
    805   // Compare. If not, assume the condition code gets modified on some path.
    806   if (To->getParent() != From->getParent())
    807     return true;
    808 
    809   // Check that NZCV isn't set on the trace.
    810   for (--I; I != E; --I) {
    811     const MachineInstr &Instr = *I;
    812 
    813     if (Instr.modifiesRegister(AArch64::NZCV, TRI) ||
    814         (!CheckOnlyCCWrites && Instr.readsRegister(AArch64::NZCV, TRI)))
    815       // This instruction modifies or uses NZCV after the one we want to
    816       // change.
    817       return true;
    818     if (I == B)
    819       // We currently don't allow the instruction trace to cross basic
    820       // block boundaries
    821       return true;
    822   }
    823   return false;
    824 }
    825 /// optimizeCompareInstr - Convert the instruction supplying the argument to the
    826 /// comparison into one that sets the zero bit in the flags register.
    827 bool AArch64InstrInfo::optimizeCompareInstr(
    828     MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
    829     int CmpValue, const MachineRegisterInfo *MRI) const {
    830 
    831   // Replace SUBSWrr with SUBWrr if NZCV is not used.
    832   int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV, true);
    833   if (Cmp_NZCV != -1) {
    834     if (CmpInstr->definesRegister(AArch64::WZR) ||
    835         CmpInstr->definesRegister(AArch64::XZR)) {
    836       CmpInstr->eraseFromParent();
    837       return true;
    838     }
    839     unsigned Opc = CmpInstr->getOpcode();
    840     unsigned NewOpc = convertFlagSettingOpcode(CmpInstr);
    841     if (NewOpc == Opc)
    842       return false;
    843     const MCInstrDesc &MCID = get(NewOpc);
    844     CmpInstr->setDesc(MCID);
    845     CmpInstr->RemoveOperand(Cmp_NZCV);
    846     bool succeeded = UpdateOperandRegClass(CmpInstr);
    847     (void)succeeded;
    848     assert(succeeded && "Some operands reg class are incompatible!");
    849     return true;
    850   }
    851 
    852   // Continue only if we have a "ri" where immediate is zero.
    853   // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
    854   // function.
    855   assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
    856   if (CmpValue != 0 || SrcReg2 != 0)
    857     return false;
    858 
    859   // CmpInstr is a Compare instruction if destination register is not used.
    860   if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg()))
    861     return false;
    862 
    863   // Get the unique definition of SrcReg.
    864   MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
    865   if (!MI)
    866     return false;
    867 
    868   bool CheckOnlyCCWrites = false;
    869   const TargetRegisterInfo *TRI = &getRegisterInfo();
    870   if (modifiesConditionCode(MI, CmpInstr, CheckOnlyCCWrites, TRI))
    871     return false;
    872 
    873   unsigned NewOpc = MI->getOpcode();
    874   switch (MI->getOpcode()) {
    875   default:
    876     return false;
    877   case AArch64::ADDSWrr:
    878   case AArch64::ADDSWri:
    879   case AArch64::ADDSXrr:
    880   case AArch64::ADDSXri:
    881   case AArch64::SUBSWrr:
    882   case AArch64::SUBSWri:
    883   case AArch64::SUBSXrr:
    884   case AArch64::SUBSXri:
    885     break;
    886   case AArch64::ADDWrr:    NewOpc = AArch64::ADDSWrr; break;
    887   case AArch64::ADDWri:    NewOpc = AArch64::ADDSWri; break;
    888   case AArch64::ADDXrr:    NewOpc = AArch64::ADDSXrr; break;
    889   case AArch64::ADDXri:    NewOpc = AArch64::ADDSXri; break;
    890   case AArch64::ADCWr:     NewOpc = AArch64::ADCSWr; break;
    891   case AArch64::ADCXr:     NewOpc = AArch64::ADCSXr; break;
    892   case AArch64::SUBWrr:    NewOpc = AArch64::SUBSWrr; break;
    893   case AArch64::SUBWri:    NewOpc = AArch64::SUBSWri; break;
    894   case AArch64::SUBXrr:    NewOpc = AArch64::SUBSXrr; break;
    895   case AArch64::SUBXri:    NewOpc = AArch64::SUBSXri; break;
    896   case AArch64::SBCWr:     NewOpc = AArch64::SBCSWr; break;
    897   case AArch64::SBCXr:     NewOpc = AArch64::SBCSXr; break;
    898   case AArch64::ANDWri:    NewOpc = AArch64::ANDSWri; break;
    899   case AArch64::ANDXri:    NewOpc = AArch64::ANDSXri; break;
    900   }
    901 
    902   // Scan forward for the use of NZCV.
    903   // When checking against MI: if it's a conditional code requires
    904   // checking of V bit, then this is not safe to do.
    905   // It is safe to remove CmpInstr if NZCV is redefined or killed.
    906   // If we are done with the basic block, we need to check whether NZCV is
    907   // live-out.
    908   bool IsSafe = false;
    909   for (MachineBasicBlock::iterator I = CmpInstr,
    910                                    E = CmpInstr->getParent()->end();
    911        !IsSafe && ++I != E;) {
    912     const MachineInstr &Instr = *I;
    913     for (unsigned IO = 0, EO = Instr.getNumOperands(); !IsSafe && IO != EO;
    914          ++IO) {
    915       const MachineOperand &MO = Instr.getOperand(IO);
    916       if (MO.isRegMask() && MO.clobbersPhysReg(AArch64::NZCV)) {
    917         IsSafe = true;
    918         break;
    919       }
    920       if (!MO.isReg() || MO.getReg() != AArch64::NZCV)
    921         continue;
    922       if (MO.isDef()) {
    923         IsSafe = true;
    924         break;
    925       }
    926 
    927       // Decode the condition code.
    928       unsigned Opc = Instr.getOpcode();
    929       AArch64CC::CondCode CC;
    930       switch (Opc) {
    931       default:
    932         return false;
    933       case AArch64::Bcc:
    934         CC = (AArch64CC::CondCode)Instr.getOperand(IO - 2).getImm();
    935         break;
    936       case AArch64::CSINVWr:
    937       case AArch64::CSINVXr:
    938       case AArch64::CSINCWr:
    939       case AArch64::CSINCXr:
    940       case AArch64::CSELWr:
    941       case AArch64::CSELXr:
    942       case AArch64::CSNEGWr:
    943       case AArch64::CSNEGXr:
    944       case AArch64::FCSELSrrr:
    945       case AArch64::FCSELDrrr:
    946         CC = (AArch64CC::CondCode)Instr.getOperand(IO - 1).getImm();
    947         break;
    948       }
    949 
    950       // It is not safe to remove Compare instruction if Overflow(V) is used.
    951       switch (CC) {
    952       default:
    953         // NZCV can be used multiple times, we should continue.
    954         break;
    955       case AArch64CC::VS:
    956       case AArch64CC::VC:
    957       case AArch64CC::GE:
    958       case AArch64CC::LT:
    959       case AArch64CC::GT:
    960       case AArch64CC::LE:
    961         return false;
    962       }
    963     }
    964   }
    965 
    966   // If NZCV is not killed nor re-defined, we should check whether it is
    967   // live-out. If it is live-out, do not optimize.
    968   if (!IsSafe) {
    969     MachineBasicBlock *ParentBlock = CmpInstr->getParent();
    970     for (auto *MBB : ParentBlock->successors())
    971       if (MBB->isLiveIn(AArch64::NZCV))
    972         return false;
    973   }
    974 
    975   // Update the instruction to set NZCV.
    976   MI->setDesc(get(NewOpc));
    977   CmpInstr->eraseFromParent();
    978   bool succeeded = UpdateOperandRegClass(MI);
    979   (void)succeeded;
    980   assert(succeeded && "Some operands reg class are incompatible!");
    981   MI->addRegisterDefined(AArch64::NZCV, TRI);
    982   return true;
    983 }
    984 
    985 bool
    986 AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
    987   if (MI->getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
    988     return false;
    989 
    990   MachineBasicBlock &MBB = *MI->getParent();
    991   DebugLoc DL = MI->getDebugLoc();
    992   unsigned Reg = MI->getOperand(0).getReg();
    993   const GlobalValue *GV =
    994       cast<GlobalValue>((*MI->memoperands_begin())->getValue());
    995   const TargetMachine &TM = MBB.getParent()->getTarget();
    996   unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
    997   const unsigned char MO_NC = AArch64II::MO_NC;
    998 
    999   if ((OpFlags & AArch64II::MO_GOT) != 0) {
   1000     BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
   1001         .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
   1002     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
   1003         .addReg(Reg, RegState::Kill).addImm(0)
   1004         .addMemOperand(*MI->memoperands_begin());
   1005   } else if (TM.getCodeModel() == CodeModel::Large) {
   1006     BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
   1007         .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48);
   1008     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
   1009         .addReg(Reg, RegState::Kill)
   1010         .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32);
   1011     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
   1012         .addReg(Reg, RegState::Kill)
   1013         .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16);
   1014     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
   1015         .addReg(Reg, RegState::Kill)
   1016         .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0);
   1017     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
   1018         .addReg(Reg, RegState::Kill).addImm(0)
   1019         .addMemOperand(*MI->memoperands_begin());
   1020   } else {
   1021     BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
   1022         .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
   1023     unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
   1024     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
   1025         .addReg(Reg, RegState::Kill)
   1026         .addGlobalAddress(GV, 0, LoFlags)
   1027         .addMemOperand(*MI->memoperands_begin());
   1028   }
   1029 
   1030   MBB.erase(MI);
   1031 
   1032   return true;
   1033 }
   1034 
   1035 /// Return true if this is this instruction has a non-zero immediate
   1036 bool AArch64InstrInfo::hasShiftedReg(const MachineInstr *MI) const {
   1037   switch (MI->getOpcode()) {
   1038   default:
   1039     break;
   1040   case AArch64::ADDSWrs:
   1041   case AArch64::ADDSXrs:
   1042   case AArch64::ADDWrs:
   1043   case AArch64::ADDXrs:
   1044   case AArch64::ANDSWrs:
   1045   case AArch64::ANDSXrs:
   1046   case AArch64::ANDWrs:
   1047   case AArch64::ANDXrs:
   1048   case AArch64::BICSWrs:
   1049   case AArch64::BICSXrs:
   1050   case AArch64::BICWrs:
   1051   case AArch64::BICXrs:
   1052   case AArch64::CRC32Brr:
   1053   case AArch64::CRC32CBrr:
   1054   case AArch64::CRC32CHrr:
   1055   case AArch64::CRC32CWrr:
   1056   case AArch64::CRC32CXrr:
   1057   case AArch64::CRC32Hrr:
   1058   case AArch64::CRC32Wrr:
   1059   case AArch64::CRC32Xrr:
   1060   case AArch64::EONWrs:
   1061   case AArch64::EONXrs:
   1062   case AArch64::EORWrs:
   1063   case AArch64::EORXrs:
   1064   case AArch64::ORNWrs:
   1065   case AArch64::ORNXrs:
   1066   case AArch64::ORRWrs:
   1067   case AArch64::ORRXrs:
   1068   case AArch64::SUBSWrs:
   1069   case AArch64::SUBSXrs:
   1070   case AArch64::SUBWrs:
   1071   case AArch64::SUBXrs:
   1072     if (MI->getOperand(3).isImm()) {
   1073       unsigned val = MI->getOperand(3).getImm();
   1074       return (val != 0);
   1075     }
   1076     break;
   1077   }
   1078   return false;
   1079 }
   1080 
   1081 /// Return true if this is this instruction has a non-zero immediate
   1082 bool AArch64InstrInfo::hasExtendedReg(const MachineInstr *MI) const {
   1083   switch (MI->getOpcode()) {
   1084   default:
   1085     break;
   1086   case AArch64::ADDSWrx:
   1087   case AArch64::ADDSXrx:
   1088   case AArch64::ADDSXrx64:
   1089   case AArch64::ADDWrx:
   1090   case AArch64::ADDXrx:
   1091   case AArch64::ADDXrx64:
   1092   case AArch64::SUBSWrx:
   1093   case AArch64::SUBSXrx:
   1094   case AArch64::SUBSXrx64:
   1095   case AArch64::SUBWrx:
   1096   case AArch64::SUBXrx:
   1097   case AArch64::SUBXrx64:
   1098     if (MI->getOperand(3).isImm()) {
   1099       unsigned val = MI->getOperand(3).getImm();
   1100       return (val != 0);
   1101     }
   1102     break;
   1103   }
   1104 
   1105   return false;
   1106 }
   1107 
   1108 // Return true if this instruction simply sets its single destination register
   1109 // to zero. This is equivalent to a register rename of the zero-register.
   1110 bool AArch64InstrInfo::isGPRZero(const MachineInstr *MI) const {
   1111   switch (MI->getOpcode()) {
   1112   default:
   1113     break;
   1114   case AArch64::MOVZWi:
   1115   case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
   1116     if (MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) {
   1117       assert(MI->getDesc().getNumOperands() == 3 &&
   1118              MI->getOperand(2).getImm() == 0 && "invalid MOVZi operands");
   1119       return true;
   1120     }
   1121     break;
   1122   case AArch64::ANDWri: // and Rd, Rzr, #imm
   1123     return MI->getOperand(1).getReg() == AArch64::WZR;
   1124   case AArch64::ANDXri:
   1125     return MI->getOperand(1).getReg() == AArch64::XZR;
   1126   case TargetOpcode::COPY:
   1127     return MI->getOperand(1).getReg() == AArch64::WZR;
   1128   }
   1129   return false;
   1130 }
   1131 
   1132 // Return true if this instruction simply renames a general register without
   1133 // modifying bits.
   1134 bool AArch64InstrInfo::isGPRCopy(const MachineInstr *MI) const {
   1135   switch (MI->getOpcode()) {
   1136   default:
   1137     break;
   1138   case TargetOpcode::COPY: {
   1139     // GPR32 copies will by lowered to ORRXrs
   1140     unsigned DstReg = MI->getOperand(0).getReg();
   1141     return (AArch64::GPR32RegClass.contains(DstReg) ||
   1142             AArch64::GPR64RegClass.contains(DstReg));
   1143   }
   1144   case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
   1145     if (MI->getOperand(1).getReg() == AArch64::XZR) {
   1146       assert(MI->getDesc().getNumOperands() == 4 &&
   1147              MI->getOperand(3).getImm() == 0 && "invalid ORRrs operands");
   1148       return true;
   1149     }
   1150     break;
   1151   case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
   1152     if (MI->getOperand(2).getImm() == 0) {
   1153       assert(MI->getDesc().getNumOperands() == 4 &&
   1154              MI->getOperand(3).getImm() == 0 && "invalid ADDXri operands");
   1155       return true;
   1156     }
   1157     break;
   1158   }
   1159   return false;
   1160 }
   1161 
   1162 // Return true if this instruction simply renames a general register without
   1163 // modifying bits.
   1164 bool AArch64InstrInfo::isFPRCopy(const MachineInstr *MI) const {
   1165   switch (MI->getOpcode()) {
   1166   default:
   1167     break;
   1168   case TargetOpcode::COPY: {
   1169     // FPR64 copies will by lowered to ORR.16b
   1170     unsigned DstReg = MI->getOperand(0).getReg();
   1171     return (AArch64::FPR64RegClass.contains(DstReg) ||
   1172             AArch64::FPR128RegClass.contains(DstReg));
   1173   }
   1174   case AArch64::ORRv16i8:
   1175     if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
   1176       assert(MI->getDesc().getNumOperands() == 3 && MI->getOperand(0).isReg() &&
   1177              "invalid ORRv16i8 operands");
   1178       return true;
   1179     }
   1180     break;
   1181   }
   1182   return false;
   1183 }
   1184 
   1185 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
   1186                                                int &FrameIndex) const {
   1187   switch (MI->getOpcode()) {
   1188   default:
   1189     break;
   1190   case AArch64::LDRWui:
   1191   case AArch64::LDRXui:
   1192   case AArch64::LDRBui:
   1193   case AArch64::LDRHui:
   1194   case AArch64::LDRSui:
   1195   case AArch64::LDRDui:
   1196   case AArch64::LDRQui:
   1197     if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() &&
   1198         MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) {
   1199       FrameIndex = MI->getOperand(1).getIndex();
   1200       return MI->getOperand(0).getReg();
   1201     }
   1202     break;
   1203   }
   1204 
   1205   return 0;
   1206 }
   1207 
   1208 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
   1209                                               int &FrameIndex) const {
   1210   switch (MI->getOpcode()) {
   1211   default:
   1212     break;
   1213   case AArch64::STRWui:
   1214   case AArch64::STRXui:
   1215   case AArch64::STRBui:
   1216   case AArch64::STRHui:
   1217   case AArch64::STRSui:
   1218   case AArch64::STRDui:
   1219   case AArch64::STRQui:
   1220     if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() &&
   1221         MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) {
   1222       FrameIndex = MI->getOperand(1).getIndex();
   1223       return MI->getOperand(0).getReg();
   1224     }
   1225     break;
   1226   }
   1227   return 0;
   1228 }
   1229 
   1230 /// Return true if this is load/store scales or extends its register offset.
   1231 /// This refers to scaling a dynamic index as opposed to scaled immediates.
   1232 /// MI should be a memory op that allows scaled addressing.
   1233 bool AArch64InstrInfo::isScaledAddr(const MachineInstr *MI) const {
   1234   switch (MI->getOpcode()) {
   1235   default:
   1236     break;
   1237   case AArch64::LDRBBroW:
   1238   case AArch64::LDRBroW:
   1239   case AArch64::LDRDroW:
   1240   case AArch64::LDRHHroW:
   1241   case AArch64::LDRHroW:
   1242   case AArch64::LDRQroW:
   1243   case AArch64::LDRSBWroW:
   1244   case AArch64::LDRSBXroW:
   1245   case AArch64::LDRSHWroW:
   1246   case AArch64::LDRSHXroW:
   1247   case AArch64::LDRSWroW:
   1248   case AArch64::LDRSroW:
   1249   case AArch64::LDRWroW:
   1250   case AArch64::LDRXroW:
   1251   case AArch64::STRBBroW:
   1252   case AArch64::STRBroW:
   1253   case AArch64::STRDroW:
   1254   case AArch64::STRHHroW:
   1255   case AArch64::STRHroW:
   1256   case AArch64::STRQroW:
   1257   case AArch64::STRSroW:
   1258   case AArch64::STRWroW:
   1259   case AArch64::STRXroW:
   1260   case AArch64::LDRBBroX:
   1261   case AArch64::LDRBroX:
   1262   case AArch64::LDRDroX:
   1263   case AArch64::LDRHHroX:
   1264   case AArch64::LDRHroX:
   1265   case AArch64::LDRQroX:
   1266   case AArch64::LDRSBWroX:
   1267   case AArch64::LDRSBXroX:
   1268   case AArch64::LDRSHWroX:
   1269   case AArch64::LDRSHXroX:
   1270   case AArch64::LDRSWroX:
   1271   case AArch64::LDRSroX:
   1272   case AArch64::LDRWroX:
   1273   case AArch64::LDRXroX:
   1274   case AArch64::STRBBroX:
   1275   case AArch64::STRBroX:
   1276   case AArch64::STRDroX:
   1277   case AArch64::STRHHroX:
   1278   case AArch64::STRHroX:
   1279   case AArch64::STRQroX:
   1280   case AArch64::STRSroX:
   1281   case AArch64::STRWroX:
   1282   case AArch64::STRXroX:
   1283 
   1284     unsigned Val = MI->getOperand(3).getImm();
   1285     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
   1286     return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
   1287   }
   1288   return false;
   1289 }
   1290 
   1291 /// Check all MachineMemOperands for a hint to suppress pairing.
   1292 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr *MI) const {
   1293   assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) &&
   1294          "Too many target MO flags");
   1295   for (auto *MM : MI->memoperands()) {
   1296     if (MM->getFlags() &
   1297         (MOSuppressPair << MachineMemOperand::MOTargetStartBit)) {
   1298       return true;
   1299     }
   1300   }
   1301   return false;
   1302 }
   1303 
   1304 /// Set a flag on the first MachineMemOperand to suppress pairing.
   1305 void AArch64InstrInfo::suppressLdStPair(MachineInstr *MI) const {
   1306   if (MI->memoperands_empty())
   1307     return;
   1308 
   1309   assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) &&
   1310          "Too many target MO flags");
   1311   (*MI->memoperands_begin())
   1312       ->setFlags(MOSuppressPair << MachineMemOperand::MOTargetStartBit);
   1313 }
   1314 
   1315 bool
   1316 AArch64InstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
   1317                                         unsigned &Offset,
   1318                                         const TargetRegisterInfo *TRI) const {
   1319   switch (LdSt->getOpcode()) {
   1320   default:
   1321     return false;
   1322   case AArch64::STRSui:
   1323   case AArch64::STRDui:
   1324   case AArch64::STRQui:
   1325   case AArch64::STRXui:
   1326   case AArch64::STRWui:
   1327   case AArch64::LDRSui:
   1328   case AArch64::LDRDui:
   1329   case AArch64::LDRQui:
   1330   case AArch64::LDRXui:
   1331   case AArch64::LDRWui:
   1332     if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
   1333       return false;
   1334     BaseReg = LdSt->getOperand(1).getReg();
   1335     MachineFunction &MF = *LdSt->getParent()->getParent();
   1336     unsigned Width = getRegClass(LdSt->getDesc(), 0, TRI, MF)->getSize();
   1337     Offset = LdSt->getOperand(2).getImm() * Width;
   1338     return true;
   1339   };
   1340 }
   1341 
   1342 bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
   1343     MachineInstr *LdSt, unsigned &BaseReg, int &Offset, int &Width,
   1344     const TargetRegisterInfo *TRI) const {
   1345   // Handle only loads/stores with base register followed by immediate offset.
   1346   if (LdSt->getNumOperands() != 3)
   1347     return false;
   1348   if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
   1349     return false;
   1350 
   1351   // Offset is calculated as the immediate operand multiplied by the scaling factor.
   1352   // Unscaled instructions have scaling factor set to 1.
   1353   int Scale = 0;
   1354   switch (LdSt->getOpcode()) {
   1355   default:
   1356     return false;
   1357   case AArch64::LDURQi:
   1358   case AArch64::STURQi:
   1359     Width = 16;
   1360     Scale = 1;
   1361     break;
   1362   case AArch64::LDURXi:
   1363   case AArch64::LDURDi:
   1364   case AArch64::STURXi:
   1365   case AArch64::STURDi:
   1366     Width = 8;
   1367     Scale = 1;
   1368     break;
   1369   case AArch64::LDURWi:
   1370   case AArch64::LDURSi:
   1371   case AArch64::LDURSWi:
   1372   case AArch64::STURWi:
   1373   case AArch64::STURSi:
   1374     Width = 4;
   1375     Scale = 1;
   1376     break;
   1377   case AArch64::LDURHi:
   1378   case AArch64::LDURHHi:
   1379   case AArch64::LDURSHXi:
   1380   case AArch64::LDURSHWi:
   1381   case AArch64::STURHi:
   1382   case AArch64::STURHHi:
   1383     Width = 2;
   1384     Scale = 1;
   1385     break;
   1386   case AArch64::LDURBi:
   1387   case AArch64::LDURBBi:
   1388   case AArch64::LDURSBXi:
   1389   case AArch64::LDURSBWi:
   1390   case AArch64::STURBi:
   1391   case AArch64::STURBBi:
   1392     Width = 1;
   1393     Scale = 1;
   1394     break;
   1395   case AArch64::LDRQui:
   1396   case AArch64::STRQui:
   1397     Scale = Width = 16;
   1398     break;
   1399   case AArch64::LDRXui:
   1400   case AArch64::LDRDui:
   1401   case AArch64::STRXui:
   1402   case AArch64::STRDui:
   1403     Scale = Width = 8;
   1404     break;
   1405   case AArch64::LDRWui:
   1406   case AArch64::LDRSui:
   1407   case AArch64::STRWui:
   1408   case AArch64::STRSui:
   1409     Scale = Width = 4;
   1410     break;
   1411   case AArch64::LDRHui:
   1412   case AArch64::LDRHHui:
   1413   case AArch64::STRHui:
   1414   case AArch64::STRHHui:
   1415     Scale = Width = 2;
   1416     break;
   1417   case AArch64::LDRBui:
   1418   case AArch64::LDRBBui:
   1419   case AArch64::STRBui:
   1420   case AArch64::STRBBui:
   1421     Scale = Width = 1;
   1422     break;
   1423   };
   1424 
   1425   BaseReg = LdSt->getOperand(1).getReg();
   1426   Offset = LdSt->getOperand(2).getImm() * Scale;
   1427   return true;
   1428 }
   1429 
   1430 /// Detect opportunities for ldp/stp formation.
   1431 ///
   1432 /// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
   1433 bool AArch64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
   1434                                           MachineInstr *SecondLdSt,
   1435                                           unsigned NumLoads) const {
   1436   // Only cluster up to a single pair.
   1437   if (NumLoads > 1)
   1438     return false;
   1439   if (FirstLdSt->getOpcode() != SecondLdSt->getOpcode())
   1440     return false;
   1441   // getMemOpBaseRegImmOfs guarantees that oper 2 isImm.
   1442   unsigned Ofs1 = FirstLdSt->getOperand(2).getImm();
   1443   // Allow 6 bits of positive range.
   1444   if (Ofs1 > 64)
   1445     return false;
   1446   // The caller should already have ordered First/SecondLdSt by offset.
   1447   unsigned Ofs2 = SecondLdSt->getOperand(2).getImm();
   1448   return Ofs1 + 1 == Ofs2;
   1449 }
   1450 
   1451 bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First,
   1452                                               MachineInstr *Second) const {
   1453   if (Subtarget.isCyclone()) {
   1454     // Cyclone can fuse CMN, CMP, TST followed by Bcc.
   1455     unsigned SecondOpcode = Second->getOpcode();
   1456     if (SecondOpcode == AArch64::Bcc) {
   1457       switch (First->getOpcode()) {
   1458       default:
   1459         return false;
   1460       case AArch64::SUBSWri:
   1461       case AArch64::ADDSWri:
   1462       case AArch64::ANDSWri:
   1463       case AArch64::SUBSXri:
   1464       case AArch64::ADDSXri:
   1465       case AArch64::ANDSXri:
   1466         return true;
   1467       }
   1468     }
   1469     // Cyclone B0 also supports ALU operations followed by CBZ/CBNZ.
   1470     if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
   1471         SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
   1472       switch (First->getOpcode()) {
   1473       default:
   1474         return false;
   1475       case AArch64::ADDWri:
   1476       case AArch64::ADDXri:
   1477       case AArch64::ANDWri:
   1478       case AArch64::ANDXri:
   1479       case AArch64::EORWri:
   1480       case AArch64::EORXri:
   1481       case AArch64::ORRWri:
   1482       case AArch64::ORRXri:
   1483       case AArch64::SUBWri:
   1484       case AArch64::SUBXri:
   1485         return true;
   1486       }
   1487     }
   1488   }
   1489   return false;
   1490 }
   1491 
   1492 MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(
   1493     MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var,
   1494     const MDNode *Expr, DebugLoc DL) const {
   1495   MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
   1496                                 .addFrameIndex(FrameIx)
   1497                                 .addImm(0)
   1498                                 .addImm(Offset)
   1499                                 .addMetadata(Var)
   1500                                 .addMetadata(Expr);
   1501   return &*MIB;
   1502 }
   1503 
   1504 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
   1505                                             unsigned Reg, unsigned SubIdx,
   1506                                             unsigned State,
   1507                                             const TargetRegisterInfo *TRI) {
   1508   if (!SubIdx)
   1509     return MIB.addReg(Reg, State);
   1510 
   1511   if (TargetRegisterInfo::isPhysicalRegister(Reg))
   1512     return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
   1513   return MIB.addReg(Reg, State, SubIdx);
   1514 }
   1515 
   1516 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
   1517                                         unsigned NumRegs) {
   1518   // We really want the positive remainder mod 32 here, that happens to be
   1519   // easily obtainable with a mask.
   1520   return ((DestReg - SrcReg) & 0x1f) < NumRegs;
   1521 }
   1522 
   1523 void AArch64InstrInfo::copyPhysRegTuple(
   1524     MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
   1525     unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
   1526     llvm::ArrayRef<unsigned> Indices) const {
   1527   assert(Subtarget.hasNEON() &&
   1528          "Unexpected register copy without NEON");
   1529   const TargetRegisterInfo *TRI = &getRegisterInfo();
   1530   uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
   1531   uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
   1532   unsigned NumRegs = Indices.size();
   1533 
   1534   int SubReg = 0, End = NumRegs, Incr = 1;
   1535   if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
   1536     SubReg = NumRegs - 1;
   1537     End = -1;
   1538     Incr = -1;
   1539   }
   1540 
   1541   for (; SubReg != End; SubReg += Incr) {
   1542     const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
   1543     AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
   1544     AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
   1545     AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
   1546   }
   1547 }
   1548 
   1549 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   1550                                    MachineBasicBlock::iterator I, DebugLoc DL,
   1551                                    unsigned DestReg, unsigned SrcReg,
   1552                                    bool KillSrc) const {
   1553   if (AArch64::GPR32spRegClass.contains(DestReg) &&
   1554       (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
   1555     const TargetRegisterInfo *TRI = &getRegisterInfo();
   1556 
   1557     if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
   1558       // If either operand is WSP, expand to ADD #0.
   1559       if (Subtarget.hasZeroCycleRegMove()) {
   1560         // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
   1561         unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
   1562                                                      &AArch64::GPR64spRegClass);
   1563         unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
   1564                                                     &AArch64::GPR64spRegClass);
   1565         // This instruction is reading and writing X registers.  This may upset
   1566         // the register scavenger and machine verifier, so we need to indicate
   1567         // that we are reading an undefined value from SrcRegX, but a proper
   1568         // value from SrcReg.
   1569         BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
   1570             .addReg(SrcRegX, RegState::Undef)
   1571             .addImm(0)
   1572             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
   1573             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
   1574       } else {
   1575         BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
   1576             .addReg(SrcReg, getKillRegState(KillSrc))
   1577             .addImm(0)
   1578             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
   1579       }
   1580     } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
   1581       BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm(
   1582           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
   1583     } else {
   1584       if (Subtarget.hasZeroCycleRegMove()) {
   1585         // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
   1586         unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
   1587                                                      &AArch64::GPR64spRegClass);
   1588         unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
   1589                                                     &AArch64::GPR64spRegClass);
   1590         // This instruction is reading and writing X registers.  This may upset
   1591         // the register scavenger and machine verifier, so we need to indicate
   1592         // that we are reading an undefined value from SrcRegX, but a proper
   1593         // value from SrcReg.
   1594         BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
   1595             .addReg(AArch64::XZR)
   1596             .addReg(SrcRegX, RegState::Undef)
   1597             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
   1598       } else {
   1599         // Otherwise, expand to ORR WZR.
   1600         BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
   1601             .addReg(AArch64::WZR)
   1602             .addReg(SrcReg, getKillRegState(KillSrc));
   1603       }
   1604     }
   1605     return;
   1606   }
   1607 
   1608   if (AArch64::GPR64spRegClass.contains(DestReg) &&
   1609       (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
   1610     if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
   1611       // If either operand is SP, expand to ADD #0.
   1612       BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
   1613           .addReg(SrcReg, getKillRegState(KillSrc))
   1614           .addImm(0)
   1615           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
   1616     } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
   1617       BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm(
   1618           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
   1619     } else {
   1620       // Otherwise, expand to ORR XZR.
   1621       BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
   1622           .addReg(AArch64::XZR)
   1623           .addReg(SrcReg, getKillRegState(KillSrc));
   1624     }
   1625     return;
   1626   }
   1627 
   1628   // Copy a DDDD register quad by copying the individual sub-registers.
   1629   if (AArch64::DDDDRegClass.contains(DestReg) &&
   1630       AArch64::DDDDRegClass.contains(SrcReg)) {
   1631     static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
   1632                                         AArch64::dsub2, AArch64::dsub3 };
   1633     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
   1634                      Indices);
   1635     return;
   1636   }
   1637 
   1638   // Copy a DDD register triple by copying the individual sub-registers.
   1639   if (AArch64::DDDRegClass.contains(DestReg) &&
   1640       AArch64::DDDRegClass.contains(SrcReg)) {
   1641     static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
   1642                                         AArch64::dsub2 };
   1643     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
   1644                      Indices);
   1645     return;
   1646   }
   1647 
   1648   // Copy a DD register pair by copying the individual sub-registers.
   1649   if (AArch64::DDRegClass.contains(DestReg) &&
   1650       AArch64::DDRegClass.contains(SrcReg)) {
   1651     static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 };
   1652     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
   1653                      Indices);
   1654     return;
   1655   }
   1656 
   1657   // Copy a QQQQ register quad by copying the individual sub-registers.
   1658   if (AArch64::QQQQRegClass.contains(DestReg) &&
   1659       AArch64::QQQQRegClass.contains(SrcReg)) {
   1660     static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
   1661                                         AArch64::qsub2, AArch64::qsub3 };
   1662     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
   1663                      Indices);
   1664     return;
   1665   }
   1666 
   1667   // Copy a QQQ register triple by copying the individual sub-registers.
   1668   if (AArch64::QQQRegClass.contains(DestReg) &&
   1669       AArch64::QQQRegClass.contains(SrcReg)) {
   1670     static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
   1671                                         AArch64::qsub2 };
   1672     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
   1673                      Indices);
   1674     return;
   1675   }
   1676 
   1677   // Copy a QQ register pair by copying the individual sub-registers.
   1678   if (AArch64::QQRegClass.contains(DestReg) &&
   1679       AArch64::QQRegClass.contains(SrcReg)) {
   1680     static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 };
   1681     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
   1682                      Indices);
   1683     return;
   1684   }
   1685 
   1686   if (AArch64::FPR128RegClass.contains(DestReg) &&
   1687       AArch64::FPR128RegClass.contains(SrcReg)) {
   1688     if(Subtarget.hasNEON()) {
   1689       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
   1690           .addReg(SrcReg)
   1691           .addReg(SrcReg, getKillRegState(KillSrc));
   1692     } else {
   1693       BuildMI(MBB, I, DL, get(AArch64::STRQpre))
   1694         .addReg(AArch64::SP, RegState::Define)
   1695         .addReg(SrcReg, getKillRegState(KillSrc))
   1696         .addReg(AArch64::SP)
   1697         .addImm(-16);
   1698       BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
   1699         .addReg(AArch64::SP, RegState::Define)
   1700         .addReg(DestReg, RegState::Define)
   1701         .addReg(AArch64::SP)
   1702         .addImm(16);
   1703     }
   1704     return;
   1705   }
   1706 
   1707   if (AArch64::FPR64RegClass.contains(DestReg) &&
   1708       AArch64::FPR64RegClass.contains(SrcReg)) {
   1709     if(Subtarget.hasNEON()) {
   1710       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
   1711                                        &AArch64::FPR128RegClass);
   1712       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
   1713                                       &AArch64::FPR128RegClass);
   1714       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
   1715           .addReg(SrcReg)
   1716           .addReg(SrcReg, getKillRegState(KillSrc));
   1717     } else {
   1718       BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
   1719           .addReg(SrcReg, getKillRegState(KillSrc));
   1720     }
   1721     return;
   1722   }
   1723 
   1724   if (AArch64::FPR32RegClass.contains(DestReg) &&
   1725       AArch64::FPR32RegClass.contains(SrcReg)) {
   1726     if(Subtarget.hasNEON()) {
   1727       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
   1728                                        &AArch64::FPR128RegClass);
   1729       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
   1730                                       &AArch64::FPR128RegClass);
   1731       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
   1732           .addReg(SrcReg)
   1733           .addReg(SrcReg, getKillRegState(KillSrc));
   1734     } else {
   1735       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
   1736           .addReg(SrcReg, getKillRegState(KillSrc));
   1737     }
   1738     return;
   1739   }
   1740 
   1741   if (AArch64::FPR16RegClass.contains(DestReg) &&
   1742       AArch64::FPR16RegClass.contains(SrcReg)) {
   1743     if(Subtarget.hasNEON()) {
   1744       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
   1745                                        &AArch64::FPR128RegClass);
   1746       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
   1747                                       &AArch64::FPR128RegClass);
   1748       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
   1749           .addReg(SrcReg)
   1750           .addReg(SrcReg, getKillRegState(KillSrc));
   1751     } else {
   1752       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
   1753                                        &AArch64::FPR32RegClass);
   1754       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
   1755                                       &AArch64::FPR32RegClass);
   1756       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
   1757           .addReg(SrcReg, getKillRegState(KillSrc));
   1758     }
   1759     return;
   1760   }
   1761 
   1762   if (AArch64::FPR8RegClass.contains(DestReg) &&
   1763       AArch64::FPR8RegClass.contains(SrcReg)) {
   1764     if(Subtarget.hasNEON()) {
   1765       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
   1766                                        &AArch64::FPR128RegClass);
   1767       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
   1768                                       &AArch64::FPR128RegClass);
   1769       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
   1770           .addReg(SrcReg)
   1771           .addReg(SrcReg, getKillRegState(KillSrc));
   1772     } else {
   1773       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
   1774                                        &AArch64::FPR32RegClass);
   1775       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
   1776                                       &AArch64::FPR32RegClass);
   1777       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
   1778           .addReg(SrcReg, getKillRegState(KillSrc));
   1779     }
   1780     return;
   1781   }
   1782 
   1783   // Copies between GPR64 and FPR64.
   1784   if (AArch64::FPR64RegClass.contains(DestReg) &&
   1785       AArch64::GPR64RegClass.contains(SrcReg)) {
   1786     BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
   1787         .addReg(SrcReg, getKillRegState(KillSrc));
   1788     return;
   1789   }
   1790   if (AArch64::GPR64RegClass.contains(DestReg) &&
   1791       AArch64::FPR64RegClass.contains(SrcReg)) {
   1792     BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
   1793         .addReg(SrcReg, getKillRegState(KillSrc));
   1794     return;
   1795   }
   1796   // Copies between GPR32 and FPR32.
   1797   if (AArch64::FPR32RegClass.contains(DestReg) &&
   1798       AArch64::GPR32RegClass.contains(SrcReg)) {
   1799     BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
   1800         .addReg(SrcReg, getKillRegState(KillSrc));
   1801     return;
   1802   }
   1803   if (AArch64::GPR32RegClass.contains(DestReg) &&
   1804       AArch64::FPR32RegClass.contains(SrcReg)) {
   1805     BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
   1806         .addReg(SrcReg, getKillRegState(KillSrc));
   1807     return;
   1808   }
   1809 
   1810   if (DestReg == AArch64::NZCV) {
   1811     assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
   1812     BuildMI(MBB, I, DL, get(AArch64::MSR))
   1813       .addImm(AArch64SysReg::NZCV)
   1814       .addReg(SrcReg, getKillRegState(KillSrc))
   1815       .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
   1816     return;
   1817   }
   1818 
   1819   if (SrcReg == AArch64::NZCV) {
   1820     assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
   1821     BuildMI(MBB, I, DL, get(AArch64::MRS))
   1822       .addReg(DestReg)
   1823       .addImm(AArch64SysReg::NZCV)
   1824       .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
   1825     return;
   1826   }
   1827 
   1828   llvm_unreachable("unimplemented reg-to-reg copy");
   1829 }
   1830 
   1831 void AArch64InstrInfo::storeRegToStackSlot(
   1832     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
   1833     bool isKill, int FI, const TargetRegisterClass *RC,
   1834     const TargetRegisterInfo *TRI) const {
   1835   DebugLoc DL;
   1836   if (MBBI != MBB.end())
   1837     DL = MBBI->getDebugLoc();
   1838   MachineFunction &MF = *MBB.getParent();
   1839   MachineFrameInfo &MFI = *MF.getFrameInfo();
   1840   unsigned Align = MFI.getObjectAlignment(FI);
   1841 
   1842   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
   1843   MachineMemOperand *MMO = MF.getMachineMemOperand(
   1844       PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
   1845   unsigned Opc = 0;
   1846   bool Offset = true;
   1847   switch (RC->getSize()) {
   1848   case 1:
   1849     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
   1850       Opc = AArch64::STRBui;
   1851     break;
   1852   case 2:
   1853     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
   1854       Opc = AArch64::STRHui;
   1855     break;
   1856   case 4:
   1857     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
   1858       Opc = AArch64::STRWui;
   1859       if (TargetRegisterInfo::isVirtualRegister(SrcReg))
   1860         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
   1861       else
   1862         assert(SrcReg != AArch64::WSP);
   1863     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
   1864       Opc = AArch64::STRSui;
   1865     break;
   1866   case 8:
   1867     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
   1868       Opc = AArch64::STRXui;
   1869       if (TargetRegisterInfo::isVirtualRegister(SrcReg))
   1870         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
   1871       else
   1872         assert(SrcReg != AArch64::SP);
   1873     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
   1874       Opc = AArch64::STRDui;
   1875     break;
   1876   case 16:
   1877     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
   1878       Opc = AArch64::STRQui;
   1879     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
   1880       assert(Subtarget.hasNEON() &&
   1881              "Unexpected register store without NEON");
   1882       Opc = AArch64::ST1Twov1d, Offset = false;
   1883     }
   1884     break;
   1885   case 24:
   1886     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
   1887       assert(Subtarget.hasNEON() &&
   1888              "Unexpected register store without NEON");
   1889       Opc = AArch64::ST1Threev1d, Offset = false;
   1890     }
   1891     break;
   1892   case 32:
   1893     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
   1894       assert(Subtarget.hasNEON() &&
   1895              "Unexpected register store without NEON");
   1896       Opc = AArch64::ST1Fourv1d, Offset = false;
   1897     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
   1898       assert(Subtarget.hasNEON() &&
   1899              "Unexpected register store without NEON");
   1900       Opc = AArch64::ST1Twov2d, Offset = false;
   1901     }
   1902     break;
   1903   case 48:
   1904     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
   1905       assert(Subtarget.hasNEON() &&
   1906              "Unexpected register store without NEON");
   1907       Opc = AArch64::ST1Threev2d, Offset = false;
   1908     }
   1909     break;
   1910   case 64:
   1911     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
   1912       assert(Subtarget.hasNEON() &&
   1913              "Unexpected register store without NEON");
   1914       Opc = AArch64::ST1Fourv2d, Offset = false;
   1915     }
   1916     break;
   1917   }
   1918   assert(Opc && "Unknown register class");
   1919 
   1920   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
   1921                                       .addReg(SrcReg, getKillRegState(isKill))
   1922                                       .addFrameIndex(FI);
   1923 
   1924   if (Offset)
   1925     MI.addImm(0);
   1926   MI.addMemOperand(MMO);
   1927 }
   1928 
   1929 void AArch64InstrInfo::loadRegFromStackSlot(
   1930     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
   1931     int FI, const TargetRegisterClass *RC,
   1932     const TargetRegisterInfo *TRI) const {
   1933   DebugLoc DL;
   1934   if (MBBI != MBB.end())
   1935     DL = MBBI->getDebugLoc();
   1936   MachineFunction &MF = *MBB.getParent();
   1937   MachineFrameInfo &MFI = *MF.getFrameInfo();
   1938   unsigned Align = MFI.getObjectAlignment(FI);
   1939   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
   1940   MachineMemOperand *MMO = MF.getMachineMemOperand(
   1941       PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
   1942 
   1943   unsigned Opc = 0;
   1944   bool Offset = true;
   1945   switch (RC->getSize()) {
   1946   case 1:
   1947     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
   1948       Opc = AArch64::LDRBui;
   1949     break;
   1950   case 2:
   1951     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
   1952       Opc = AArch64::LDRHui;
   1953     break;
   1954   case 4:
   1955     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
   1956       Opc = AArch64::LDRWui;
   1957       if (TargetRegisterInfo::isVirtualRegister(DestReg))
   1958         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
   1959       else
   1960         assert(DestReg != AArch64::WSP);
   1961     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
   1962       Opc = AArch64::LDRSui;
   1963     break;
   1964   case 8:
   1965     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
   1966       Opc = AArch64::LDRXui;
   1967       if (TargetRegisterInfo::isVirtualRegister(DestReg))
   1968         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
   1969       else
   1970         assert(DestReg != AArch64::SP);
   1971     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
   1972       Opc = AArch64::LDRDui;
   1973     break;
   1974   case 16:
   1975     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
   1976       Opc = AArch64::LDRQui;
   1977     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
   1978       assert(Subtarget.hasNEON() &&
   1979              "Unexpected register load without NEON");
   1980       Opc = AArch64::LD1Twov1d, Offset = false;
   1981     }
   1982     break;
   1983   case 24:
   1984     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
   1985       assert(Subtarget.hasNEON() &&
   1986              "Unexpected register load without NEON");
   1987       Opc = AArch64::LD1Threev1d, Offset = false;
   1988     }
   1989     break;
   1990   case 32:
   1991     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
   1992       assert(Subtarget.hasNEON() &&
   1993              "Unexpected register load without NEON");
   1994       Opc = AArch64::LD1Fourv1d, Offset = false;
   1995     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
   1996       assert(Subtarget.hasNEON() &&
   1997              "Unexpected register load without NEON");
   1998       Opc = AArch64::LD1Twov2d, Offset = false;
   1999     }
   2000     break;
   2001   case 48:
   2002     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
   2003       assert(Subtarget.hasNEON() &&
   2004              "Unexpected register load without NEON");
   2005       Opc = AArch64::LD1Threev2d, Offset = false;
   2006     }
   2007     break;
   2008   case 64:
   2009     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
   2010       assert(Subtarget.hasNEON() &&
   2011              "Unexpected register load without NEON");
   2012       Opc = AArch64::LD1Fourv2d, Offset = false;
   2013     }
   2014     break;
   2015   }
   2016   assert(Opc && "Unknown register class");
   2017 
   2018   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
   2019                                       .addReg(DestReg, getDefRegState(true))
   2020                                       .addFrameIndex(FI);
   2021   if (Offset)
   2022     MI.addImm(0);
   2023   MI.addMemOperand(MMO);
   2024 }
   2025 
   2026 void llvm::emitFrameOffset(MachineBasicBlock &MBB,
   2027                            MachineBasicBlock::iterator MBBI, DebugLoc DL,
   2028                            unsigned DestReg, unsigned SrcReg, int Offset,
   2029                            const TargetInstrInfo *TII,
   2030                            MachineInstr::MIFlag Flag, bool SetNZCV) {
   2031   if (DestReg == SrcReg && Offset == 0)
   2032     return;
   2033 
   2034   bool isSub = Offset < 0;
   2035   if (isSub)
   2036     Offset = -Offset;
   2037 
   2038   // FIXME: If the offset won't fit in 24-bits, compute the offset into a
   2039   // scratch register.  If DestReg is a virtual register, use it as the
   2040   // scratch register; otherwise, create a new virtual register (to be
   2041   // replaced by the scavenger at the end of PEI).  That case can be optimized
   2042   // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
   2043   // register can be loaded with offset%8 and the add/sub can use an extending
   2044   // instruction with LSL#3.
   2045   // Currently the function handles any offsets but generates a poor sequence
   2046   // of code.
   2047   //  assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
   2048 
   2049   unsigned Opc;
   2050   if (SetNZCV)
   2051     Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
   2052   else
   2053     Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
   2054   const unsigned MaxEncoding = 0xfff;
   2055   const unsigned ShiftSize = 12;
   2056   const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
   2057   while (((unsigned)Offset) >= (1 << ShiftSize)) {
   2058     unsigned ThisVal;
   2059     if (((unsigned)Offset) > MaxEncodableValue) {
   2060       ThisVal = MaxEncodableValue;
   2061     } else {
   2062       ThisVal = Offset & MaxEncodableValue;
   2063     }
   2064     assert((ThisVal >> ShiftSize) <= MaxEncoding &&
   2065            "Encoding cannot handle value that big");
   2066     BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
   2067         .addReg(SrcReg)
   2068         .addImm(ThisVal >> ShiftSize)
   2069         .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
   2070         .setMIFlag(Flag);
   2071 
   2072     SrcReg = DestReg;
   2073     Offset -= ThisVal;
   2074     if (Offset == 0)
   2075       return;
   2076   }
   2077   BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
   2078       .addReg(SrcReg)
   2079       .addImm(Offset)
   2080       .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
   2081       .setMIFlag(Flag);
   2082 }
   2083 
   2084 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
   2085     MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
   2086     MachineBasicBlock::iterator InsertPt, int FrameIndex) const {
   2087   // This is a bit of a hack. Consider this instruction:
   2088   //
   2089   //   %vreg0<def> = COPY %SP; GPR64all:%vreg0
   2090   //
   2091   // We explicitly chose GPR64all for the virtual register so such a copy might
   2092   // be eliminated by RegisterCoalescer. However, that may not be possible, and
   2093   // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all
   2094   // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
   2095   //
   2096   // To prevent that, we are going to constrain the %vreg0 register class here.
   2097   //
   2098   // <rdar://problem/11522048>
   2099   //
   2100   if (MI->isCopy()) {
   2101     unsigned DstReg = MI->getOperand(0).getReg();
   2102     unsigned SrcReg = MI->getOperand(1).getReg();
   2103     if (SrcReg == AArch64::SP &&
   2104         TargetRegisterInfo::isVirtualRegister(DstReg)) {
   2105       MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
   2106       return nullptr;
   2107     }
   2108     if (DstReg == AArch64::SP &&
   2109         TargetRegisterInfo::isVirtualRegister(SrcReg)) {
   2110       MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
   2111       return nullptr;
   2112     }
   2113   }
   2114 
   2115   // Cannot fold.
   2116   return nullptr;
   2117 }
   2118 
   2119 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
   2120                                     bool *OutUseUnscaledOp,
   2121                                     unsigned *OutUnscaledOp,
   2122                                     int *EmittableOffset) {
   2123   int Scale = 1;
   2124   bool IsSigned = false;
   2125   // The ImmIdx should be changed case by case if it is not 2.
   2126   unsigned ImmIdx = 2;
   2127   unsigned UnscaledOp = 0;
   2128   // Set output values in case of early exit.
   2129   if (EmittableOffset)
   2130     *EmittableOffset = 0;
   2131   if (OutUseUnscaledOp)
   2132     *OutUseUnscaledOp = false;
   2133   if (OutUnscaledOp)
   2134     *OutUnscaledOp = 0;
   2135   switch (MI.getOpcode()) {
   2136   default:
   2137     llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
   2138   // Vector spills/fills can't take an immediate offset.
   2139   case AArch64::LD1Twov2d:
   2140   case AArch64::LD1Threev2d:
   2141   case AArch64::LD1Fourv2d:
   2142   case AArch64::LD1Twov1d:
   2143   case AArch64::LD1Threev1d:
   2144   case AArch64::LD1Fourv1d:
   2145   case AArch64::ST1Twov2d:
   2146   case AArch64::ST1Threev2d:
   2147   case AArch64::ST1Fourv2d:
   2148   case AArch64::ST1Twov1d:
   2149   case AArch64::ST1Threev1d:
   2150   case AArch64::ST1Fourv1d:
   2151     return AArch64FrameOffsetCannotUpdate;
   2152   case AArch64::PRFMui:
   2153     Scale = 8;
   2154     UnscaledOp = AArch64::PRFUMi;
   2155     break;
   2156   case AArch64::LDRXui:
   2157     Scale = 8;
   2158     UnscaledOp = AArch64::LDURXi;
   2159     break;
   2160   case AArch64::LDRWui:
   2161     Scale = 4;
   2162     UnscaledOp = AArch64::LDURWi;
   2163     break;
   2164   case AArch64::LDRBui:
   2165     Scale = 1;
   2166     UnscaledOp = AArch64::LDURBi;
   2167     break;
   2168   case AArch64::LDRHui:
   2169     Scale = 2;
   2170     UnscaledOp = AArch64::LDURHi;
   2171     break;
   2172   case AArch64::LDRSui:
   2173     Scale = 4;
   2174     UnscaledOp = AArch64::LDURSi;
   2175     break;
   2176   case AArch64::LDRDui:
   2177     Scale = 8;
   2178     UnscaledOp = AArch64::LDURDi;
   2179     break;
   2180   case AArch64::LDRQui:
   2181     Scale = 16;
   2182     UnscaledOp = AArch64::LDURQi;
   2183     break;
   2184   case AArch64::LDRBBui:
   2185     Scale = 1;
   2186     UnscaledOp = AArch64::LDURBBi;
   2187     break;
   2188   case AArch64::LDRHHui:
   2189     Scale = 2;
   2190     UnscaledOp = AArch64::LDURHHi;
   2191     break;
   2192   case AArch64::LDRSBXui:
   2193     Scale = 1;
   2194     UnscaledOp = AArch64::LDURSBXi;
   2195     break;
   2196   case AArch64::LDRSBWui:
   2197     Scale = 1;
   2198     UnscaledOp = AArch64::LDURSBWi;
   2199     break;
   2200   case AArch64::LDRSHXui:
   2201     Scale = 2;
   2202     UnscaledOp = AArch64::LDURSHXi;
   2203     break;
   2204   case AArch64::LDRSHWui:
   2205     Scale = 2;
   2206     UnscaledOp = AArch64::LDURSHWi;
   2207     break;
   2208   case AArch64::LDRSWui:
   2209     Scale = 4;
   2210     UnscaledOp = AArch64::LDURSWi;
   2211     break;
   2212 
   2213   case AArch64::STRXui:
   2214     Scale = 8;
   2215     UnscaledOp = AArch64::STURXi;
   2216     break;
   2217   case AArch64::STRWui:
   2218     Scale = 4;
   2219     UnscaledOp = AArch64::STURWi;
   2220     break;
   2221   case AArch64::STRBui:
   2222     Scale = 1;
   2223     UnscaledOp = AArch64::STURBi;
   2224     break;
   2225   case AArch64::STRHui:
   2226     Scale = 2;
   2227     UnscaledOp = AArch64::STURHi;
   2228     break;
   2229   case AArch64::STRSui:
   2230     Scale = 4;
   2231     UnscaledOp = AArch64::STURSi;
   2232     break;
   2233   case AArch64::STRDui:
   2234     Scale = 8;
   2235     UnscaledOp = AArch64::STURDi;
   2236     break;
   2237   case AArch64::STRQui:
   2238     Scale = 16;
   2239     UnscaledOp = AArch64::STURQi;
   2240     break;
   2241   case AArch64::STRBBui:
   2242     Scale = 1;
   2243     UnscaledOp = AArch64::STURBBi;
   2244     break;
   2245   case AArch64::STRHHui:
   2246     Scale = 2;
   2247     UnscaledOp = AArch64::STURHHi;
   2248     break;
   2249 
   2250   case AArch64::LDPXi:
   2251   case AArch64::LDPDi:
   2252   case AArch64::STPXi:
   2253   case AArch64::STPDi:
   2254   case AArch64::LDNPXi:
   2255   case AArch64::LDNPDi:
   2256   case AArch64::STNPXi:
   2257   case AArch64::STNPDi:
   2258     ImmIdx = 3;
   2259     IsSigned = true;
   2260     Scale = 8;
   2261     break;
   2262   case AArch64::LDPQi:
   2263   case AArch64::STPQi:
   2264   case AArch64::LDNPQi:
   2265   case AArch64::STNPQi:
   2266     ImmIdx = 3;
   2267     IsSigned = true;
   2268     Scale = 16;
   2269     break;
   2270   case AArch64::LDPWi:
   2271   case AArch64::LDPSi:
   2272   case AArch64::STPWi:
   2273   case AArch64::STPSi:
   2274   case AArch64::LDNPWi:
   2275   case AArch64::LDNPSi:
   2276   case AArch64::STNPWi:
   2277   case AArch64::STNPSi:
   2278     ImmIdx = 3;
   2279     IsSigned = true;
   2280     Scale = 4;
   2281     break;
   2282 
   2283   case AArch64::LDURXi:
   2284   case AArch64::LDURWi:
   2285   case AArch64::LDURBi:
   2286   case AArch64::LDURHi:
   2287   case AArch64::LDURSi:
   2288   case AArch64::LDURDi:
   2289   case AArch64::LDURQi:
   2290   case AArch64::LDURHHi:
   2291   case AArch64::LDURBBi:
   2292   case AArch64::LDURSBXi:
   2293   case AArch64::LDURSBWi:
   2294   case AArch64::LDURSHXi:
   2295   case AArch64::LDURSHWi:
   2296   case AArch64::LDURSWi:
   2297   case AArch64::STURXi:
   2298   case AArch64::STURWi:
   2299   case AArch64::STURBi:
   2300   case AArch64::STURHi:
   2301   case AArch64::STURSi:
   2302   case AArch64::STURDi:
   2303   case AArch64::STURQi:
   2304   case AArch64::STURBBi:
   2305   case AArch64::STURHHi:
   2306     Scale = 1;
   2307     break;
   2308   }
   2309 
   2310   Offset += MI.getOperand(ImmIdx).getImm() * Scale;
   2311 
   2312   bool useUnscaledOp = false;
   2313   // If the offset doesn't match the scale, we rewrite the instruction to
   2314   // use the unscaled instruction instead. Likewise, if we have a negative
   2315   // offset (and have an unscaled op to use).
   2316   if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
   2317     useUnscaledOp = true;
   2318 
   2319   // Use an unscaled addressing mode if the instruction has a negative offset
   2320   // (or if the instruction is already using an unscaled addressing mode).
   2321   unsigned MaskBits;
   2322   if (IsSigned) {
   2323     // ldp/stp instructions.
   2324     MaskBits = 7;
   2325     Offset /= Scale;
   2326   } else if (UnscaledOp == 0 || useUnscaledOp) {
   2327     MaskBits = 9;
   2328     IsSigned = true;
   2329     Scale = 1;
   2330   } else {
   2331     MaskBits = 12;
   2332     IsSigned = false;
   2333     Offset /= Scale;
   2334   }
   2335 
   2336   // Attempt to fold address computation.
   2337   int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
   2338   int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
   2339   if (Offset >= MinOff && Offset <= MaxOff) {
   2340     if (EmittableOffset)
   2341       *EmittableOffset = Offset;
   2342     Offset = 0;
   2343   } else {
   2344     int NewOff = Offset < 0 ? MinOff : MaxOff;
   2345     if (EmittableOffset)
   2346       *EmittableOffset = NewOff;
   2347     Offset = (Offset - NewOff) * Scale;
   2348   }
   2349   if (OutUseUnscaledOp)
   2350     *OutUseUnscaledOp = useUnscaledOp;
   2351   if (OutUnscaledOp)
   2352     *OutUnscaledOp = UnscaledOp;
   2353   return AArch64FrameOffsetCanUpdate |
   2354          (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
   2355 }
   2356 
   2357 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
   2358                                     unsigned FrameReg, int &Offset,
   2359                                     const AArch64InstrInfo *TII) {
   2360   unsigned Opcode = MI.getOpcode();
   2361   unsigned ImmIdx = FrameRegIdx + 1;
   2362 
   2363   if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
   2364     Offset += MI.getOperand(ImmIdx).getImm();
   2365     emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
   2366                     MI.getOperand(0).getReg(), FrameReg, Offset, TII,
   2367                     MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
   2368     MI.eraseFromParent();
   2369     Offset = 0;
   2370     return true;
   2371   }
   2372 
   2373   int NewOffset;
   2374   unsigned UnscaledOp;
   2375   bool UseUnscaledOp;
   2376   int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
   2377                                          &UnscaledOp, &NewOffset);
   2378   if (Status & AArch64FrameOffsetCanUpdate) {
   2379     if (Status & AArch64FrameOffsetIsLegal)
   2380       // Replace the FrameIndex with FrameReg.
   2381       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
   2382     if (UseUnscaledOp)
   2383       MI.setDesc(TII->get(UnscaledOp));
   2384 
   2385     MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
   2386     return Offset == 0;
   2387   }
   2388 
   2389   return false;
   2390 }
   2391 
   2392 void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
   2393   NopInst.setOpcode(AArch64::HINT);
   2394   NopInst.addOperand(MCOperand::createImm(0));
   2395 }
   2396 /// useMachineCombiner - return true when a target supports MachineCombiner
   2397 bool AArch64InstrInfo::useMachineCombiner() const {
   2398   // AArch64 supports the combiner
   2399   return true;
   2400 }
   2401 //
   2402 // True when Opc sets flag
   2403 static bool isCombineInstrSettingFlag(unsigned Opc) {
   2404   switch (Opc) {
   2405   case AArch64::ADDSWrr:
   2406   case AArch64::ADDSWri:
   2407   case AArch64::ADDSXrr:
   2408   case AArch64::ADDSXri:
   2409   case AArch64::SUBSWrr:
   2410   case AArch64::SUBSXrr:
   2411   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
   2412   case AArch64::SUBSWri:
   2413   case AArch64::SUBSXri:
   2414     return true;
   2415   default:
   2416     break;
   2417   }
   2418   return false;
   2419 }
   2420 //
   2421 // 32b Opcodes that can be combined with a MUL
   2422 static bool isCombineInstrCandidate32(unsigned Opc) {
   2423   switch (Opc) {
   2424   case AArch64::ADDWrr:
   2425   case AArch64::ADDWri:
   2426   case AArch64::SUBWrr:
   2427   case AArch64::ADDSWrr:
   2428   case AArch64::ADDSWri:
   2429   case AArch64::SUBSWrr:
   2430   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
   2431   case AArch64::SUBWri:
   2432   case AArch64::SUBSWri:
   2433     return true;
   2434   default:
   2435     break;
   2436   }
   2437   return false;
   2438 }
   2439 //
   2440 // 64b Opcodes that can be combined with a MUL
   2441 static bool isCombineInstrCandidate64(unsigned Opc) {
   2442   switch (Opc) {
   2443   case AArch64::ADDXrr:
   2444   case AArch64::ADDXri:
   2445   case AArch64::SUBXrr:
   2446   case AArch64::ADDSXrr:
   2447   case AArch64::ADDSXri:
   2448   case AArch64::SUBSXrr:
   2449   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
   2450   case AArch64::SUBXri:
   2451   case AArch64::SUBSXri:
   2452     return true;
   2453   default:
   2454     break;
   2455   }
   2456   return false;
   2457 }
   2458 //
   2459 // Opcodes that can be combined with a MUL
   2460 static bool isCombineInstrCandidate(unsigned Opc) {
   2461   return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
   2462 }
   2463 
   2464 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
   2465                               unsigned MulOpc, unsigned ZeroReg) {
   2466   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
   2467   MachineInstr *MI = nullptr;
   2468   // We need a virtual register definition.
   2469   if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
   2470     MI = MRI.getUniqueVRegDef(MO.getReg());
   2471   // And it needs to be in the trace (otherwise, it won't have a depth).
   2472   if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != MulOpc)
   2473     return false;
   2474 
   2475   assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
   2476          MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
   2477          MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
   2478 
   2479   // The third input reg must be zero.
   2480   if (MI->getOperand(3).getReg() != ZeroReg)
   2481     return false;
   2482 
   2483   // Must only used by the user we combine with.
   2484   if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
   2485     return false;
   2486 
   2487   return true;
   2488 }
   2489 
   2490 /// Return true when there is potentially a faster code sequence
   2491 /// for an instruction chain ending in \p Root. All potential patterns are
   2492 /// listed
   2493 /// in the \p Pattern vector. Pattern should be sorted in priority order since
   2494 /// the pattern evaluator stops checking as soon as it finds a faster sequence.
   2495 
   2496 bool AArch64InstrInfo::getMachineCombinerPatterns(
   2497     MachineInstr &Root,
   2498     SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
   2499   unsigned Opc = Root.getOpcode();
   2500   MachineBasicBlock &MBB = *Root.getParent();
   2501   bool Found = false;
   2502 
   2503   if (!isCombineInstrCandidate(Opc))
   2504     return 0;
   2505   if (isCombineInstrSettingFlag(Opc)) {
   2506     int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
   2507     // When NZCV is live bail out.
   2508     if (Cmp_NZCV == -1)
   2509       return 0;
   2510     unsigned NewOpc = convertFlagSettingOpcode(&Root);
   2511     // When opcode can't change bail out.
   2512     // CHECKME: do we miss any cases for opcode conversion?
   2513     if (NewOpc == Opc)
   2514       return 0;
   2515     Opc = NewOpc;
   2516   }
   2517 
   2518   switch (Opc) {
   2519   default:
   2520     break;
   2521   case AArch64::ADDWrr:
   2522     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
   2523            "ADDWrr does not have register operands");
   2524     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
   2525                           AArch64::WZR)) {
   2526       Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
   2527       Found = true;
   2528     }
   2529     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
   2530                           AArch64::WZR)) {
   2531       Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
   2532       Found = true;
   2533     }
   2534     break;
   2535   case AArch64::ADDXrr:
   2536     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
   2537                           AArch64::XZR)) {
   2538       Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
   2539       Found = true;
   2540     }
   2541     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
   2542                           AArch64::XZR)) {
   2543       Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
   2544       Found = true;
   2545     }
   2546     break;
   2547   case AArch64::SUBWrr:
   2548     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
   2549                           AArch64::WZR)) {
   2550       Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
   2551       Found = true;
   2552     }
   2553     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
   2554                           AArch64::WZR)) {
   2555       Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
   2556       Found = true;
   2557     }
   2558     break;
   2559   case AArch64::SUBXrr:
   2560     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
   2561                           AArch64::XZR)) {
   2562       Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
   2563       Found = true;
   2564     }
   2565     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
   2566                           AArch64::XZR)) {
   2567       Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
   2568       Found = true;
   2569     }
   2570     break;
   2571   case AArch64::ADDWri:
   2572     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
   2573                           AArch64::WZR)) {
   2574       Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
   2575       Found = true;
   2576     }
   2577     break;
   2578   case AArch64::ADDXri:
   2579     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
   2580                           AArch64::XZR)) {
   2581       Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
   2582       Found = true;
   2583     }
   2584     break;
   2585   case AArch64::SUBWri:
   2586     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
   2587                           AArch64::WZR)) {
   2588       Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
   2589       Found = true;
   2590     }
   2591     break;
   2592   case AArch64::SUBXri:
   2593     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
   2594                           AArch64::XZR)) {
   2595       Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
   2596       Found = true;
   2597     }
   2598     break;
   2599   }
   2600   return Found;
   2601 }
   2602 
   2603 /// genMadd - Generate madd instruction and combine mul and add.
   2604 /// Example:
   2605 ///  MUL I=A,B,0
   2606 ///  ADD R,I,C
   2607 ///  ==> MADD R,A,B,C
   2608 /// \param Root is the ADD instruction
   2609 /// \param [out] InsInstrs is a vector of machine instructions and will
   2610 /// contain the generated madd instruction
   2611 /// \param IdxMulOpd is index of operand in Root that is the result of
   2612 /// the MUL. In the example above IdxMulOpd is 1.
   2613 /// \param MaddOpc the opcode fo the madd instruction
   2614 static MachineInstr *genMadd(MachineFunction &MF, MachineRegisterInfo &MRI,
   2615                              const TargetInstrInfo *TII, MachineInstr &Root,
   2616                              SmallVectorImpl<MachineInstr *> &InsInstrs,
   2617                              unsigned IdxMulOpd, unsigned MaddOpc,
   2618                              const TargetRegisterClass *RC) {
   2619   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
   2620 
   2621   unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
   2622   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
   2623   unsigned ResultReg = Root.getOperand(0).getReg();
   2624   unsigned SrcReg0 = MUL->getOperand(1).getReg();
   2625   bool Src0IsKill = MUL->getOperand(1).isKill();
   2626   unsigned SrcReg1 = MUL->getOperand(2).getReg();
   2627   bool Src1IsKill = MUL->getOperand(2).isKill();
   2628   unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
   2629   bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
   2630 
   2631   if (TargetRegisterInfo::isVirtualRegister(ResultReg))
   2632     MRI.constrainRegClass(ResultReg, RC);
   2633   if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
   2634     MRI.constrainRegClass(SrcReg0, RC);
   2635   if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
   2636     MRI.constrainRegClass(SrcReg1, RC);
   2637   if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
   2638     MRI.constrainRegClass(SrcReg2, RC);
   2639 
   2640   MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc),
   2641                                     ResultReg)
   2642                                 .addReg(SrcReg0, getKillRegState(Src0IsKill))
   2643                                 .addReg(SrcReg1, getKillRegState(Src1IsKill))
   2644                                 .addReg(SrcReg2, getKillRegState(Src2IsKill));
   2645   // Insert the MADD
   2646   InsInstrs.push_back(MIB);
   2647   return MUL;
   2648 }
   2649 
   2650 /// genMaddR - Generate madd instruction and combine mul and add using
   2651 /// an extra virtual register
   2652 /// Example - an ADD intermediate needs to be stored in a register:
   2653 ///   MUL I=A,B,0
   2654 ///   ADD R,I,Imm
   2655 ///   ==> ORR  V, ZR, Imm
   2656 ///   ==> MADD R,A,B,V
   2657 /// \param Root is the ADD instruction
   2658 /// \param [out] InsInstrs is a vector of machine instructions and will
   2659 /// contain the generated madd instruction
   2660 /// \param IdxMulOpd is index of operand in Root that is the result of
   2661 /// the MUL. In the example above IdxMulOpd is 1.
   2662 /// \param MaddOpc the opcode fo the madd instruction
   2663 /// \param VR is a virtual register that holds the value of an ADD operand
   2664 /// (V in the example above).
   2665 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
   2666                               const TargetInstrInfo *TII, MachineInstr &Root,
   2667                               SmallVectorImpl<MachineInstr *> &InsInstrs,
   2668                               unsigned IdxMulOpd, unsigned MaddOpc,
   2669                               unsigned VR, const TargetRegisterClass *RC) {
   2670   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
   2671 
   2672   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
   2673   unsigned ResultReg = Root.getOperand(0).getReg();
   2674   unsigned SrcReg0 = MUL->getOperand(1).getReg();
   2675   bool Src0IsKill = MUL->getOperand(1).isKill();
   2676   unsigned SrcReg1 = MUL->getOperand(2).getReg();
   2677   bool Src1IsKill = MUL->getOperand(2).isKill();
   2678 
   2679   if (TargetRegisterInfo::isVirtualRegister(ResultReg))
   2680     MRI.constrainRegClass(ResultReg, RC);
   2681   if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
   2682     MRI.constrainRegClass(SrcReg0, RC);
   2683   if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
   2684     MRI.constrainRegClass(SrcReg1, RC);
   2685   if (TargetRegisterInfo::isVirtualRegister(VR))
   2686     MRI.constrainRegClass(VR, RC);
   2687 
   2688   MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc),
   2689                                     ResultReg)
   2690                                 .addReg(SrcReg0, getKillRegState(Src0IsKill))
   2691                                 .addReg(SrcReg1, getKillRegState(Src1IsKill))
   2692                                 .addReg(VR);
   2693   // Insert the MADD
   2694   InsInstrs.push_back(MIB);
   2695   return MUL;
   2696 }
   2697 
   2698 /// When getMachineCombinerPatterns() finds potential patterns,
   2699 /// this function generates the instructions that could replace the
   2700 /// original code sequence
   2701 void AArch64InstrInfo::genAlternativeCodeSequence(
   2702     MachineInstr &Root, MachineCombinerPattern Pattern,
   2703     SmallVectorImpl<MachineInstr *> &InsInstrs,
   2704     SmallVectorImpl<MachineInstr *> &DelInstrs,
   2705     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
   2706   MachineBasicBlock &MBB = *Root.getParent();
   2707   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
   2708   MachineFunction &MF = *MBB.getParent();
   2709   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
   2710 
   2711   MachineInstr *MUL;
   2712   const TargetRegisterClass *RC;
   2713   unsigned Opc;
   2714   switch (Pattern) {
   2715   default:
   2716     // signal error.
   2717     break;
   2718   case MachineCombinerPattern::MULADDW_OP1:
   2719   case MachineCombinerPattern::MULADDX_OP1:
   2720     // MUL I=A,B,0
   2721     // ADD R,I,C
   2722     // ==> MADD R,A,B,C
   2723     // --- Create(MADD);
   2724     if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
   2725       Opc = AArch64::MADDWrrr;
   2726       RC = &AArch64::GPR32RegClass;
   2727     } else {
   2728       Opc = AArch64::MADDXrrr;
   2729       RC = &AArch64::GPR64RegClass;
   2730     }
   2731     MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   2732     break;
   2733   case MachineCombinerPattern::MULADDW_OP2:
   2734   case MachineCombinerPattern::MULADDX_OP2:
   2735     // MUL I=A,B,0
   2736     // ADD R,C,I
   2737     // ==> MADD R,A,B,C
   2738     // --- Create(MADD);
   2739     if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
   2740       Opc = AArch64::MADDWrrr;
   2741       RC = &AArch64::GPR32RegClass;
   2742     } else {
   2743       Opc = AArch64::MADDXrrr;
   2744       RC = &AArch64::GPR64RegClass;
   2745     }
   2746     MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   2747     break;
   2748   case MachineCombinerPattern::MULADDWI_OP1:
   2749   case MachineCombinerPattern::MULADDXI_OP1: {
   2750     // MUL I=A,B,0
   2751     // ADD R,I,Imm
   2752     // ==> ORR  V, ZR, Imm
   2753     // ==> MADD R,A,B,V
   2754     // --- Create(MADD);
   2755     const TargetRegisterClass *OrrRC;
   2756     unsigned BitSize, OrrOpc, ZeroReg;
   2757     if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
   2758       OrrOpc = AArch64::ORRWri;
   2759       OrrRC = &AArch64::GPR32spRegClass;
   2760       BitSize = 32;
   2761       ZeroReg = AArch64::WZR;
   2762       Opc = AArch64::MADDWrrr;
   2763       RC = &AArch64::GPR32RegClass;
   2764     } else {
   2765       OrrOpc = AArch64::ORRXri;
   2766       OrrRC = &AArch64::GPR64spRegClass;
   2767       BitSize = 64;
   2768       ZeroReg = AArch64::XZR;
   2769       Opc = AArch64::MADDXrrr;
   2770       RC = &AArch64::GPR64RegClass;
   2771     }
   2772     unsigned NewVR = MRI.createVirtualRegister(OrrRC);
   2773     uint64_t Imm = Root.getOperand(2).getImm();
   2774 
   2775     if (Root.getOperand(3).isImm()) {
   2776       unsigned Val = Root.getOperand(3).getImm();
   2777       Imm = Imm << Val;
   2778     }
   2779     uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
   2780     uint64_t Encoding;
   2781     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
   2782       MachineInstrBuilder MIB1 =
   2783           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
   2784               .addReg(ZeroReg)
   2785               .addImm(Encoding);
   2786       InsInstrs.push_back(MIB1);
   2787       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
   2788       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
   2789     }
   2790     break;
   2791   }
   2792   case MachineCombinerPattern::MULSUBW_OP1:
   2793   case MachineCombinerPattern::MULSUBX_OP1: {
   2794     // MUL I=A,B,0
   2795     // SUB R,I, C
   2796     // ==> SUB  V, 0, C
   2797     // ==> MADD R,A,B,V // = -C + A*B
   2798     // --- Create(MADD);
   2799     const TargetRegisterClass *SubRC;
   2800     unsigned SubOpc, ZeroReg;
   2801     if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
   2802       SubOpc = AArch64::SUBWrr;
   2803       SubRC = &AArch64::GPR32spRegClass;
   2804       ZeroReg = AArch64::WZR;
   2805       Opc = AArch64::MADDWrrr;
   2806       RC = &AArch64::GPR32RegClass;
   2807     } else {
   2808       SubOpc = AArch64::SUBXrr;
   2809       SubRC = &AArch64::GPR64spRegClass;
   2810       ZeroReg = AArch64::XZR;
   2811       Opc = AArch64::MADDXrrr;
   2812       RC = &AArch64::GPR64RegClass;
   2813     }
   2814     unsigned NewVR = MRI.createVirtualRegister(SubRC);
   2815     // SUB NewVR, 0, C
   2816     MachineInstrBuilder MIB1 =
   2817         BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
   2818             .addReg(ZeroReg)
   2819             .addOperand(Root.getOperand(2));
   2820     InsInstrs.push_back(MIB1);
   2821     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
   2822     MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
   2823     break;
   2824   }
   2825   case MachineCombinerPattern::MULSUBW_OP2:
   2826   case MachineCombinerPattern::MULSUBX_OP2:
   2827     // MUL I=A,B,0
   2828     // SUB R,C,I
   2829     // ==> MSUB R,A,B,C (computes C - A*B)
   2830     // --- Create(MSUB);
   2831     if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
   2832       Opc = AArch64::MSUBWrrr;
   2833       RC = &AArch64::GPR32RegClass;
   2834     } else {
   2835       Opc = AArch64::MSUBXrrr;
   2836       RC = &AArch64::GPR64RegClass;
   2837     }
   2838     MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   2839     break;
   2840   case MachineCombinerPattern::MULSUBWI_OP1:
   2841   case MachineCombinerPattern::MULSUBXI_OP1: {
   2842     // MUL I=A,B,0
   2843     // SUB R,I, Imm
   2844     // ==> ORR  V, ZR, -Imm
   2845     // ==> MADD R,A,B,V // = -Imm + A*B
   2846     // --- Create(MADD);
   2847     const TargetRegisterClass *OrrRC;
   2848     unsigned BitSize, OrrOpc, ZeroReg;
   2849     if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
   2850       OrrOpc = AArch64::ORRWri;
   2851       OrrRC = &AArch64::GPR32spRegClass;
   2852       BitSize = 32;
   2853       ZeroReg = AArch64::WZR;
   2854       Opc = AArch64::MADDWrrr;
   2855       RC = &AArch64::GPR32RegClass;
   2856     } else {
   2857       OrrOpc = AArch64::ORRXri;
   2858       OrrRC = &AArch64::GPR64spRegClass;
   2859       BitSize = 64;
   2860       ZeroReg = AArch64::XZR;
   2861       Opc = AArch64::MADDXrrr;
   2862       RC = &AArch64::GPR64RegClass;
   2863     }
   2864     unsigned NewVR = MRI.createVirtualRegister(OrrRC);
   2865     int Imm = Root.getOperand(2).getImm();
   2866     if (Root.getOperand(3).isImm()) {
   2867       unsigned Val = Root.getOperand(3).getImm();
   2868       Imm = Imm << Val;
   2869     }
   2870     uint64_t UImm = -Imm << (64 - BitSize) >> (64 - BitSize);
   2871     uint64_t Encoding;
   2872     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
   2873       MachineInstrBuilder MIB1 =
   2874           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
   2875               .addReg(ZeroReg)
   2876               .addImm(Encoding);
   2877       InsInstrs.push_back(MIB1);
   2878       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
   2879       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
   2880     }
   2881     break;
   2882   }
   2883   } // end switch (Pattern)
   2884   // Record MUL and ADD/SUB for deletion
   2885   DelInstrs.push_back(MUL);
   2886   DelInstrs.push_back(&Root);
   2887 
   2888   return;
   2889 }
   2890 
   2891 /// \brief Replace csincr-branch sequence by simple conditional branch
   2892 ///
   2893 /// Examples:
   2894 /// 1.
   2895 ///   csinc  w9, wzr, wzr, <condition code>
   2896 ///   tbnz   w9, #0, 0x44
   2897 /// to
   2898 ///   b.<inverted condition code>
   2899 ///
   2900 /// 2.
   2901 ///   csinc w9, wzr, wzr, <condition code>
   2902 ///   tbz   w9, #0, 0x44
   2903 /// to
   2904 ///   b.<condition code>
   2905 ///
   2906 /// \param  MI Conditional Branch
   2907 /// \return True when the simple conditional branch is generated
   2908 ///
   2909 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr *MI) const {
   2910   bool IsNegativeBranch = false;
   2911   bool IsTestAndBranch = false;
   2912   unsigned TargetBBInMI = 0;
   2913   switch (MI->getOpcode()) {
   2914   default:
   2915     llvm_unreachable("Unknown branch instruction?");
   2916   case AArch64::Bcc:
   2917     return false;
   2918   case AArch64::CBZW:
   2919   case AArch64::CBZX:
   2920     TargetBBInMI = 1;
   2921     break;
   2922   case AArch64::CBNZW:
   2923   case AArch64::CBNZX:
   2924     TargetBBInMI = 1;
   2925     IsNegativeBranch = true;
   2926     break;
   2927   case AArch64::TBZW:
   2928   case AArch64::TBZX:
   2929     TargetBBInMI = 2;
   2930     IsTestAndBranch = true;
   2931     break;
   2932   case AArch64::TBNZW:
   2933   case AArch64::TBNZX:
   2934     TargetBBInMI = 2;
   2935     IsNegativeBranch = true;
   2936     IsTestAndBranch = true;
   2937     break;
   2938   }
   2939   // So we increment a zero register and test for bits other
   2940   // than bit 0? Conservatively bail out in case the verifier
   2941   // missed this case.
   2942   if (IsTestAndBranch && MI->getOperand(1).getImm())
   2943     return false;
   2944 
   2945   // Find Definition.
   2946   assert(MI->getParent() && "Incomplete machine instruciton\n");
   2947   MachineBasicBlock *MBB = MI->getParent();
   2948   MachineFunction *MF = MBB->getParent();
   2949   MachineRegisterInfo *MRI = &MF->getRegInfo();
   2950   unsigned VReg = MI->getOperand(0).getReg();
   2951   if (!TargetRegisterInfo::isVirtualRegister(VReg))
   2952     return false;
   2953 
   2954   MachineInstr *DefMI = MRI->getVRegDef(VReg);
   2955 
   2956   // Look for CSINC
   2957   if (!(DefMI->getOpcode() == AArch64::CSINCWr &&
   2958         DefMI->getOperand(1).getReg() == AArch64::WZR &&
   2959         DefMI->getOperand(2).getReg() == AArch64::WZR) &&
   2960       !(DefMI->getOpcode() == AArch64::CSINCXr &&
   2961         DefMI->getOperand(1).getReg() == AArch64::XZR &&
   2962         DefMI->getOperand(2).getReg() == AArch64::XZR))
   2963     return false;
   2964 
   2965   if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
   2966     return false;
   2967 
   2968   AArch64CC::CondCode CC =
   2969       (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
   2970   bool CheckOnlyCCWrites = true;
   2971   // Convert only when the condition code is not modified between
   2972   // the CSINC and the branch. The CC may be used by other
   2973   // instructions in between.
   2974   if (modifiesConditionCode(DefMI, MI, CheckOnlyCCWrites, &getRegisterInfo()))
   2975     return false;
   2976   MachineBasicBlock &RefToMBB = *MBB;
   2977   MachineBasicBlock *TBB = MI->getOperand(TargetBBInMI).getMBB();
   2978   DebugLoc DL = MI->getDebugLoc();
   2979   if (IsNegativeBranch)
   2980     CC = AArch64CC::getInvertedCondCode(CC);
   2981   BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
   2982   MI->eraseFromParent();
   2983   return true;
   2984 }
   2985 
   2986 std::pair<unsigned, unsigned>
   2987 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
   2988   const unsigned Mask = AArch64II::MO_FRAGMENT;
   2989   return std::make_pair(TF & Mask, TF & ~Mask);
   2990 }
   2991 
   2992 ArrayRef<std::pair<unsigned, const char *>>
   2993 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
   2994   using namespace AArch64II;
   2995   static const std::pair<unsigned, const char *> TargetFlags[] = {
   2996       {MO_PAGE, "aarch64-page"},
   2997       {MO_PAGEOFF, "aarch64-pageoff"},
   2998       {MO_G3, "aarch64-g3"},
   2999       {MO_G2, "aarch64-g2"},
   3000       {MO_G1, "aarch64-g1"},
   3001       {MO_G0, "aarch64-g0"},
   3002       {MO_HI12, "aarch64-hi12"}};
   3003   return makeArrayRef(TargetFlags);
   3004 }
   3005 
   3006 ArrayRef<std::pair<unsigned, const char *>>
   3007 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
   3008   using namespace AArch64II;
   3009   static const std::pair<unsigned, const char *> TargetFlags[] = {
   3010       {MO_GOT, "aarch64-got"},
   3011       {MO_NC, "aarch64-nc"},
   3012       {MO_TLS, "aarch64-tls"},
   3013       {MO_CONSTPOOL, "aarch64-constant-pool"}};
   3014   return makeArrayRef(TargetFlags);
   3015 }
   3016