Home | History | Annotate | Download | only in AArch64
      1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains the AArch64 implementation of the TargetInstrInfo class.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "AArch64InstrInfo.h"
     15 #include "AArch64MachineFunctionInfo.h"
     16 #include "AArch64Subtarget.h"
     17 #include "MCTargetDesc/AArch64AddressingModes.h"
     18 #include "Utils/AArch64BaseInfo.h"
     19 #include "llvm/ADT/ArrayRef.h"
     20 #include "llvm/ADT/STLExtras.h"
     21 #include "llvm/ADT/SmallVector.h"
     22 #include "llvm/CodeGen/MachineBasicBlock.h"
     23 #include "llvm/CodeGen/MachineFrameInfo.h"
     24 #include "llvm/CodeGen/MachineFunction.h"
     25 #include "llvm/CodeGen/MachineInstr.h"
     26 #include "llvm/CodeGen/MachineInstrBuilder.h"
     27 #include "llvm/CodeGen/MachineMemOperand.h"
     28 #include "llvm/CodeGen/MachineOperand.h"
     29 #include "llvm/CodeGen/MachineRegisterInfo.h"
     30 #include "llvm/CodeGen/MachineModuleInfo.h"
     31 #include "llvm/CodeGen/StackMaps.h"
     32 #include "llvm/CodeGen/TargetRegisterInfo.h"
     33 #include "llvm/CodeGen/TargetSubtargetInfo.h"
     34 #include "llvm/IR/DebugLoc.h"
     35 #include "llvm/IR/GlobalValue.h"
     36 #include "llvm/MC/MCInst.h"
     37 #include "llvm/MC/MCInstrDesc.h"
     38 #include "llvm/Support/Casting.h"
     39 #include "llvm/Support/CodeGen.h"
     40 #include "llvm/Support/CommandLine.h"
     41 #include "llvm/Support/Compiler.h"
     42 #include "llvm/Support/ErrorHandling.h"
     43 #include "llvm/Support/MathExtras.h"
     44 #include "llvm/Target/TargetMachine.h"
     45 #include "llvm/Target/TargetOptions.h"
     46 #include <cassert>
     47 #include <cstdint>
     48 #include <iterator>
     49 #include <utility>
     50 
     51 using namespace llvm;
     52 
     53 #define GET_INSTRINFO_CTOR_DTOR
     54 #include "AArch64GenInstrInfo.inc"
     55 
     56 static cl::opt<unsigned> TBZDisplacementBits(
     57     "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
     58     cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
     59 
     60 static cl::opt<unsigned> CBZDisplacementBits(
     61     "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
     62     cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
     63 
     64 static cl::opt<unsigned>
     65     BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
     66                         cl::desc("Restrict range of Bcc instructions (DEBUG)"));
     67 
     68 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
     69     : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
     70       RI(STI.getTargetTriple()), Subtarget(STI) {}
     71 
     72 /// GetInstSize - Return the number of bytes of code the specified
     73 /// instruction may be.  This returns the maximum number of bytes.
     74 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
     75   const MachineBasicBlock &MBB = *MI.getParent();
     76   const MachineFunction *MF = MBB.getParent();
     77   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
     78 
     79   if (MI.getOpcode() == AArch64::INLINEASM)
     80     return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
     81 
     82   // FIXME: We currently only handle pseudoinstructions that don't get expanded
     83   //        before the assembly printer.
     84   unsigned NumBytes = 0;
     85   const MCInstrDesc &Desc = MI.getDesc();
     86   switch (Desc.getOpcode()) {
     87   default:
     88     // Anything not explicitly designated otherwise is a normal 4-byte insn.
     89     NumBytes = 4;
     90     break;
     91   case TargetOpcode::DBG_VALUE:
     92   case TargetOpcode::EH_LABEL:
     93   case TargetOpcode::IMPLICIT_DEF:
     94   case TargetOpcode::KILL:
     95     NumBytes = 0;
     96     break;
     97   case TargetOpcode::STACKMAP:
     98     // The upper bound for a stackmap intrinsic is the full length of its shadow
     99     NumBytes = StackMapOpers(&MI).getNumPatchBytes();
    100     assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
    101     break;
    102   case TargetOpcode::PATCHPOINT:
    103     // The size of the patchpoint intrinsic is the number of bytes requested
    104     NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
    105     assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
    106     break;
    107   case AArch64::TLSDESC_CALLSEQ:
    108     // This gets lowered to an instruction sequence which takes 16 bytes
    109     NumBytes = 16;
    110     break;
    111   }
    112 
    113   return NumBytes;
    114 }
    115 
    116 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
    117                             SmallVectorImpl<MachineOperand> &Cond) {
    118   // Block ends with fall-through condbranch.
    119   switch (LastInst->getOpcode()) {
    120   default:
    121     llvm_unreachable("Unknown branch instruction?");
    122   case AArch64::Bcc:
    123     Target = LastInst->getOperand(1).getMBB();
    124     Cond.push_back(LastInst->getOperand(0));
    125     break;
    126   case AArch64::CBZW:
    127   case AArch64::CBZX:
    128   case AArch64::CBNZW:
    129   case AArch64::CBNZX:
    130     Target = LastInst->getOperand(1).getMBB();
    131     Cond.push_back(MachineOperand::CreateImm(-1));
    132     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
    133     Cond.push_back(LastInst->getOperand(0));
    134     break;
    135   case AArch64::TBZW:
    136   case AArch64::TBZX:
    137   case AArch64::TBNZW:
    138   case AArch64::TBNZX:
    139     Target = LastInst->getOperand(2).getMBB();
    140     Cond.push_back(MachineOperand::CreateImm(-1));
    141     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
    142     Cond.push_back(LastInst->getOperand(0));
    143     Cond.push_back(LastInst->getOperand(1));
    144   }
    145 }
    146 
    147 static unsigned getBranchDisplacementBits(unsigned Opc) {
    148   switch (Opc) {
    149   default:
    150     llvm_unreachable("unexpected opcode!");
    151   case AArch64::B:
    152     return 64;
    153   case AArch64::TBNZW:
    154   case AArch64::TBZW:
    155   case AArch64::TBNZX:
    156   case AArch64::TBZX:
    157     return TBZDisplacementBits;
    158   case AArch64::CBNZW:
    159   case AArch64::CBZW:
    160   case AArch64::CBNZX:
    161   case AArch64::CBZX:
    162     return CBZDisplacementBits;
    163   case AArch64::Bcc:
    164     return BCCDisplacementBits;
    165   }
    166 }
    167 
    168 bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
    169                                              int64_t BrOffset) const {
    170   unsigned Bits = getBranchDisplacementBits(BranchOp);
    171   assert(Bits >= 3 && "max branch displacement must be enough to jump"
    172                       "over conditional branch expansion");
    173   return isIntN(Bits, BrOffset / 4);
    174 }
    175 
    176 MachineBasicBlock *
    177 AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
    178   switch (MI.getOpcode()) {
    179   default:
    180     llvm_unreachable("unexpected opcode!");
    181   case AArch64::B:
    182     return MI.getOperand(0).getMBB();
    183   case AArch64::TBZW:
    184   case AArch64::TBNZW:
    185   case AArch64::TBZX:
    186   case AArch64::TBNZX:
    187     return MI.getOperand(2).getMBB();
    188   case AArch64::CBZW:
    189   case AArch64::CBNZW:
    190   case AArch64::CBZX:
    191   case AArch64::CBNZX:
    192   case AArch64::Bcc:
    193     return MI.getOperand(1).getMBB();
    194   }
    195 }
    196 
    197 // Branch analysis.
    198 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
    199                                      MachineBasicBlock *&TBB,
    200                                      MachineBasicBlock *&FBB,
    201                                      SmallVectorImpl<MachineOperand> &Cond,
    202                                      bool AllowModify) const {
    203   // If the block has no terminators, it just falls into the block after it.
    204   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
    205   if (I == MBB.end())
    206     return false;
    207 
    208   if (!isUnpredicatedTerminator(*I))
    209     return false;
    210 
    211   // Get the last instruction in the block.
    212   MachineInstr *LastInst = &*I;
    213 
    214   // If there is only one terminator instruction, process it.
    215   unsigned LastOpc = LastInst->getOpcode();
    216   if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
    217     if (isUncondBranchOpcode(LastOpc)) {
    218       TBB = LastInst->getOperand(0).getMBB();
    219       return false;
    220     }
    221     if (isCondBranchOpcode(LastOpc)) {
    222       // Block ends with fall-through condbranch.
    223       parseCondBranch(LastInst, TBB, Cond);
    224       return false;
    225     }
    226     return true; // Can't handle indirect branch.
    227   }
    228 
    229   // Get the instruction before it if it is a terminator.
    230   MachineInstr *SecondLastInst = &*I;
    231   unsigned SecondLastOpc = SecondLastInst->getOpcode();
    232 
    233   // If AllowModify is true and the block ends with two or more unconditional
    234   // branches, delete all but the first unconditional branch.
    235   if (AllowModify && isUncondBranchOpcode(LastOpc)) {
    236     while (isUncondBranchOpcode(SecondLastOpc)) {
    237       LastInst->eraseFromParent();
    238       LastInst = SecondLastInst;
    239       LastOpc = LastInst->getOpcode();
    240       if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
    241         // Return now the only terminator is an unconditional branch.
    242         TBB = LastInst->getOperand(0).getMBB();
    243         return false;
    244       } else {
    245         SecondLastInst = &*I;
    246         SecondLastOpc = SecondLastInst->getOpcode();
    247       }
    248     }
    249   }
    250 
    251   // If there are three terminators, we don't know what sort of block this is.
    252   if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
    253     return true;
    254 
    255   // If the block ends with a B and a Bcc, handle it.
    256   if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
    257     parseCondBranch(SecondLastInst, TBB, Cond);
    258     FBB = LastInst->getOperand(0).getMBB();
    259     return false;
    260   }
    261 
    262   // If the block ends with two unconditional branches, handle it.  The second
    263   // one is not executed, so remove it.
    264   if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
    265     TBB = SecondLastInst->getOperand(0).getMBB();
    266     I = LastInst;
    267     if (AllowModify)
    268       I->eraseFromParent();
    269     return false;
    270   }
    271 
    272   // ...likewise if it ends with an indirect branch followed by an unconditional
    273   // branch.
    274   if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
    275     I = LastInst;
    276     if (AllowModify)
    277       I->eraseFromParent();
    278     return true;
    279   }
    280 
    281   // Otherwise, can't handle this.
    282   return true;
    283 }
    284 
    285 bool AArch64InstrInfo::reverseBranchCondition(
    286     SmallVectorImpl<MachineOperand> &Cond) const {
    287   if (Cond[0].getImm() != -1) {
    288     // Regular Bcc
    289     AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
    290     Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
    291   } else {
    292     // Folded compare-and-branch
    293     switch (Cond[1].getImm()) {
    294     default:
    295       llvm_unreachable("Unknown conditional branch!");
    296     case AArch64::CBZW:
    297       Cond[1].setImm(AArch64::CBNZW);
    298       break;
    299     case AArch64::CBNZW:
    300       Cond[1].setImm(AArch64::CBZW);
    301       break;
    302     case AArch64::CBZX:
    303       Cond[1].setImm(AArch64::CBNZX);
    304       break;
    305     case AArch64::CBNZX:
    306       Cond[1].setImm(AArch64::CBZX);
    307       break;
    308     case AArch64::TBZW:
    309       Cond[1].setImm(AArch64::TBNZW);
    310       break;
    311     case AArch64::TBNZW:
    312       Cond[1].setImm(AArch64::TBZW);
    313       break;
    314     case AArch64::TBZX:
    315       Cond[1].setImm(AArch64::TBNZX);
    316       break;
    317     case AArch64::TBNZX:
    318       Cond[1].setImm(AArch64::TBZX);
    319       break;
    320     }
    321   }
    322 
    323   return false;
    324 }
    325 
    326 unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
    327                                         int *BytesRemoved) const {
    328   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
    329   if (I == MBB.end())
    330     return 0;
    331 
    332   if (!isUncondBranchOpcode(I->getOpcode()) &&
    333       !isCondBranchOpcode(I->getOpcode()))
    334     return 0;
    335 
    336   // Remove the branch.
    337   I->eraseFromParent();
    338 
    339   I = MBB.end();
    340 
    341   if (I == MBB.begin()) {
    342     if (BytesRemoved)
    343       *BytesRemoved = 4;
    344     return 1;
    345   }
    346   --I;
    347   if (!isCondBranchOpcode(I->getOpcode())) {
    348     if (BytesRemoved)
    349       *BytesRemoved = 4;
    350     return 1;
    351   }
    352 
    353   // Remove the branch.
    354   I->eraseFromParent();
    355   if (BytesRemoved)
    356     *BytesRemoved = 8;
    357 
    358   return 2;
    359 }
    360 
    361 void AArch64InstrInfo::instantiateCondBranch(
    362     MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
    363     ArrayRef<MachineOperand> Cond) const {
    364   if (Cond[0].getImm() != -1) {
    365     // Regular Bcc
    366     BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
    367   } else {
    368     // Folded compare-and-branch
    369     // Note that we use addOperand instead of addReg to keep the flags.
    370     const MachineInstrBuilder MIB =
    371         BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
    372     if (Cond.size() > 3)
    373       MIB.addImm(Cond[3].getImm());
    374     MIB.addMBB(TBB);
    375   }
    376 }
    377 
    378 unsigned AArch64InstrInfo::insertBranch(
    379     MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
    380     ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
    381   // Shouldn't be a fall through.
    382   assert(TBB && "insertBranch must not be told to insert a fallthrough");
    383 
    384   if (!FBB) {
    385     if (Cond.empty()) // Unconditional branch?
    386       BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
    387     else
    388       instantiateCondBranch(MBB, DL, TBB, Cond);
    389 
    390     if (BytesAdded)
    391       *BytesAdded = 4;
    392 
    393     return 1;
    394   }
    395 
    396   // Two-way conditional branch.
    397   instantiateCondBranch(MBB, DL, TBB, Cond);
    398   BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
    399 
    400   if (BytesAdded)
    401     *BytesAdded = 8;
    402 
    403   return 2;
    404 }
    405 
    406 // Find the original register that VReg is copied from.
    407 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
    408   while (TargetRegisterInfo::isVirtualRegister(VReg)) {
    409     const MachineInstr *DefMI = MRI.getVRegDef(VReg);
    410     if (!DefMI->isFullCopy())
    411       return VReg;
    412     VReg = DefMI->getOperand(1).getReg();
    413   }
    414   return VReg;
    415 }
    416 
    417 // Determine if VReg is defined by an instruction that can be folded into a
    418 // csel instruction. If so, return the folded opcode, and the replacement
    419 // register.
    420 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
    421                                 unsigned *NewVReg = nullptr) {
    422   VReg = removeCopies(MRI, VReg);
    423   if (!TargetRegisterInfo::isVirtualRegister(VReg))
    424     return 0;
    425 
    426   bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
    427   const MachineInstr *DefMI = MRI.getVRegDef(VReg);
    428   unsigned Opc = 0;
    429   unsigned SrcOpNum = 0;
    430   switch (DefMI->getOpcode()) {
    431   case AArch64::ADDSXri:
    432   case AArch64::ADDSWri:
    433     // if NZCV is used, do not fold.
    434     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
    435       return 0;
    436     // fall-through to ADDXri and ADDWri.
    437     LLVM_FALLTHROUGH;
    438   case AArch64::ADDXri:
    439   case AArch64::ADDWri:
    440     // add x, 1 -> csinc.
    441     if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
    442         DefMI->getOperand(3).getImm() != 0)
    443       return 0;
    444     SrcOpNum = 1;
    445     Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
    446     break;
    447 
    448   case AArch64::ORNXrr:
    449   case AArch64::ORNWrr: {
    450     // not x -> csinv, represented as orn dst, xzr, src.
    451     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
    452     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
    453       return 0;
    454     SrcOpNum = 2;
    455     Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
    456     break;
    457   }
    458 
    459   case AArch64::SUBSXrr:
    460   case AArch64::SUBSWrr:
    461     // if NZCV is used, do not fold.
    462     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
    463       return 0;
    464     // fall-through to SUBXrr and SUBWrr.
    465     LLVM_FALLTHROUGH;
    466   case AArch64::SUBXrr:
    467   case AArch64::SUBWrr: {
    468     // neg x -> csneg, represented as sub dst, xzr, src.
    469     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
    470     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
    471       return 0;
    472     SrcOpNum = 2;
    473     Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
    474     break;
    475   }
    476   default:
    477     return 0;
    478   }
    479   assert(Opc && SrcOpNum && "Missing parameters");
    480 
    481   if (NewVReg)
    482     *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
    483   return Opc;
    484 }
    485 
    486 bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
    487                                        ArrayRef<MachineOperand> Cond,
    488                                        unsigned TrueReg, unsigned FalseReg,
    489                                        int &CondCycles, int &TrueCycles,
    490                                        int &FalseCycles) const {
    491   // Check register classes.
    492   const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
    493   const TargetRegisterClass *RC =
    494       RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
    495   if (!RC)
    496     return false;
    497 
    498   // Expanding cbz/tbz requires an extra cycle of latency on the condition.
    499   unsigned ExtraCondLat = Cond.size() != 1;
    500 
    501   // GPRs are handled by csel.
    502   // FIXME: Fold in x+1, -x, and ~x when applicable.
    503   if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
    504       AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
    505     // Single-cycle csel, csinc, csinv, and csneg.
    506     CondCycles = 1 + ExtraCondLat;
    507     TrueCycles = FalseCycles = 1;
    508     if (canFoldIntoCSel(MRI, TrueReg))
    509       TrueCycles = 0;
    510     else if (canFoldIntoCSel(MRI, FalseReg))
    511       FalseCycles = 0;
    512     return true;
    513   }
    514 
    515   // Scalar floating point is handled by fcsel.
    516   // FIXME: Form fabs, fmin, and fmax when applicable.
    517   if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
    518       AArch64::FPR32RegClass.hasSubClassEq(RC)) {
    519     CondCycles = 5 + ExtraCondLat;
    520     TrueCycles = FalseCycles = 2;
    521     return true;
    522   }
    523 
    524   // Can't do vectors.
    525   return false;
    526 }
    527 
    528 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
    529                                     MachineBasicBlock::iterator I,
    530                                     const DebugLoc &DL, unsigned DstReg,
    531                                     ArrayRef<MachineOperand> Cond,
    532                                     unsigned TrueReg, unsigned FalseReg) const {
    533   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
    534 
    535   // Parse the condition code, see parseCondBranch() above.
    536   AArch64CC::CondCode CC;
    537   switch (Cond.size()) {
    538   default:
    539     llvm_unreachable("Unknown condition opcode in Cond");
    540   case 1: // b.cc
    541     CC = AArch64CC::CondCode(Cond[0].getImm());
    542     break;
    543   case 3: { // cbz/cbnz
    544     // We must insert a compare against 0.
    545     bool Is64Bit;
    546     switch (Cond[1].getImm()) {
    547     default:
    548       llvm_unreachable("Unknown branch opcode in Cond");
    549     case AArch64::CBZW:
    550       Is64Bit = false;
    551       CC = AArch64CC::EQ;
    552       break;
    553     case AArch64::CBZX:
    554       Is64Bit = true;
    555       CC = AArch64CC::EQ;
    556       break;
    557     case AArch64::CBNZW:
    558       Is64Bit = false;
    559       CC = AArch64CC::NE;
    560       break;
    561     case AArch64::CBNZX:
    562       Is64Bit = true;
    563       CC = AArch64CC::NE;
    564       break;
    565     }
    566     unsigned SrcReg = Cond[2].getReg();
    567     if (Is64Bit) {
    568       // cmp reg, #0 is actually subs xzr, reg, #0.
    569       MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
    570       BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
    571           .addReg(SrcReg)
    572           .addImm(0)
    573           .addImm(0);
    574     } else {
    575       MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
    576       BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
    577           .addReg(SrcReg)
    578           .addImm(0)
    579           .addImm(0);
    580     }
    581     break;
    582   }
    583   case 4: { // tbz/tbnz
    584     // We must insert a tst instruction.
    585     switch (Cond[1].getImm()) {
    586     default:
    587       llvm_unreachable("Unknown branch opcode in Cond");
    588     case AArch64::TBZW:
    589     case AArch64::TBZX:
    590       CC = AArch64CC::EQ;
    591       break;
    592     case AArch64::TBNZW:
    593     case AArch64::TBNZX:
    594       CC = AArch64CC::NE;
    595       break;
    596     }
    597     // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
    598     if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
    599       BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
    600           .addReg(Cond[2].getReg())
    601           .addImm(
    602               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
    603     else
    604       BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
    605           .addReg(Cond[2].getReg())
    606           .addImm(
    607               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
    608     break;
    609   }
    610   }
    611 
    612   unsigned Opc = 0;
    613   const TargetRegisterClass *RC = nullptr;
    614   bool TryFold = false;
    615   if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
    616     RC = &AArch64::GPR64RegClass;
    617     Opc = AArch64::CSELXr;
    618     TryFold = true;
    619   } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
    620     RC = &AArch64::GPR32RegClass;
    621     Opc = AArch64::CSELWr;
    622     TryFold = true;
    623   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
    624     RC = &AArch64::FPR64RegClass;
    625     Opc = AArch64::FCSELDrrr;
    626   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
    627     RC = &AArch64::FPR32RegClass;
    628     Opc = AArch64::FCSELSrrr;
    629   }
    630   assert(RC && "Unsupported regclass");
    631 
    632   // Try folding simple instructions into the csel.
    633   if (TryFold) {
    634     unsigned NewVReg = 0;
    635     unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
    636     if (FoldedOpc) {
    637       // The folded opcodes csinc, csinc and csneg apply the operation to
    638       // FalseReg, so we need to invert the condition.
    639       CC = AArch64CC::getInvertedCondCode(CC);
    640       TrueReg = FalseReg;
    641     } else
    642       FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
    643 
    644     // Fold the operation. Leave any dead instructions for DCE to clean up.
    645     if (FoldedOpc) {
    646       FalseReg = NewVReg;
    647       Opc = FoldedOpc;
    648       // The extends the live range of NewVReg.
    649       MRI.clearKillFlags(NewVReg);
    650     }
    651   }
    652 
    653   // Pull all virtual register into the appropriate class.
    654   MRI.constrainRegClass(TrueReg, RC);
    655   MRI.constrainRegClass(FalseReg, RC);
    656 
    657   // Insert the csel.
    658   BuildMI(MBB, I, DL, get(Opc), DstReg)
    659       .addReg(TrueReg)
    660       .addReg(FalseReg)
    661       .addImm(CC);
    662 }
    663 
    664 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an  ORRxx.
    665 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
    666   uint64_t Imm = MI.getOperand(1).getImm();
    667   uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
    668   uint64_t Encoding;
    669   return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
    670 }
    671 
    672 // FIXME: this implementation should be micro-architecture dependent, so a
    673 // micro-architecture target hook should be introduced here in future.
    674 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
    675   if (!Subtarget.hasCustomCheapAsMoveHandling())
    676     return MI.isAsCheapAsAMove();
    677 
    678   if (Subtarget.hasExynosCheapAsMoveHandling()) {
    679     if (isExynosResetFast(MI) || isExynosShiftLeftFast(MI))
    680       return true;
    681     else
    682       return MI.isAsCheapAsAMove();
    683   }
    684 
    685   switch (MI.getOpcode()) {
    686   default:
    687     return false;
    688 
    689   // add/sub on register without shift
    690   case AArch64::ADDWri:
    691   case AArch64::ADDXri:
    692   case AArch64::SUBWri:
    693   case AArch64::SUBXri:
    694     return (MI.getOperand(3).getImm() == 0);
    695 
    696   // logical ops on immediate
    697   case AArch64::ANDWri:
    698   case AArch64::ANDXri:
    699   case AArch64::EORWri:
    700   case AArch64::EORXri:
    701   case AArch64::ORRWri:
    702   case AArch64::ORRXri:
    703     return true;
    704 
    705   // logical ops on register without shift
    706   case AArch64::ANDWrr:
    707   case AArch64::ANDXrr:
    708   case AArch64::BICWrr:
    709   case AArch64::BICXrr:
    710   case AArch64::EONWrr:
    711   case AArch64::EONXrr:
    712   case AArch64::EORWrr:
    713   case AArch64::EORXrr:
    714   case AArch64::ORNWrr:
    715   case AArch64::ORNXrr:
    716   case AArch64::ORRWrr:
    717   case AArch64::ORRXrr:
    718     return true;
    719 
    720   // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
    721   // ORRXri, it is as cheap as MOV
    722   case AArch64::MOVi32imm:
    723     return canBeExpandedToORR(MI, 32);
    724   case AArch64::MOVi64imm:
    725     return canBeExpandedToORR(MI, 64);
    726 
    727   // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
    728   // feature.
    729   case AArch64::FMOVH0:
    730   case AArch64::FMOVS0:
    731   case AArch64::FMOVD0:
    732     return Subtarget.hasZeroCycleZeroing();
    733   case TargetOpcode::COPY:
    734     return (Subtarget.hasZeroCycleZeroing() &&
    735             (MI.getOperand(1).getReg() == AArch64::WZR ||
    736              MI.getOperand(1).getReg() == AArch64::XZR));
    737   }
    738 
    739   llvm_unreachable("Unknown opcode to check as cheap as a move!");
    740 }
    741 
    742 bool AArch64InstrInfo::isExynosResetFast(const MachineInstr &MI) const {
    743   unsigned Reg, Imm, Shift;
    744 
    745   switch (MI.getOpcode()) {
    746   default:
    747     return false;
    748 
    749   // MOV Rd, SP
    750   case AArch64::ADDWri:
    751   case AArch64::ADDXri:
    752     if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
    753       return false;
    754 
    755     Reg = MI.getOperand(1).getReg();
    756     Imm = MI.getOperand(2).getImm();
    757     return ((Reg == AArch64::WSP || Reg == AArch64::SP) && Imm == 0);
    758 
    759   // Literal
    760   case AArch64::ADR:
    761   case AArch64::ADRP:
    762     return true;
    763 
    764   // MOVI Vd, #0
    765   case AArch64::MOVID:
    766   case AArch64::MOVIv8b_ns:
    767   case AArch64::MOVIv2d_ns:
    768   case AArch64::MOVIv16b_ns:
    769     Imm = MI.getOperand(1).getImm();
    770     return (Imm == 0);
    771 
    772   // MOVI Vd, #0
    773   case AArch64::MOVIv2i32:
    774   case AArch64::MOVIv4i16:
    775   case AArch64::MOVIv4i32:
    776   case AArch64::MOVIv8i16:
    777     Imm = MI.getOperand(1).getImm();
    778     Shift = MI.getOperand(2).getImm();
    779     return (Imm == 0 && Shift == 0);
    780 
    781   // MOV Rd, Imm
    782   case AArch64::MOVNWi:
    783   case AArch64::MOVNXi:
    784 
    785   // MOV Rd, Imm
    786   case AArch64::MOVZWi:
    787   case AArch64::MOVZXi:
    788     return true;
    789 
    790   // MOV Rd, Imm
    791   case AArch64::ORRWri:
    792   case AArch64::ORRXri:
    793     if (!MI.getOperand(1).isReg())
    794       return false;
    795 
    796     Reg = MI.getOperand(1).getReg();
    797     Imm = MI.getOperand(2).getImm();
    798     return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Imm == 0);
    799 
    800   // MOV Rd, Rm
    801   case AArch64::ORRWrs:
    802   case AArch64::ORRXrs:
    803     if (!MI.getOperand(1).isReg())
    804       return false;
    805 
    806     Reg = MI.getOperand(1).getReg();
    807     Imm = MI.getOperand(3).getImm();
    808     Shift = AArch64_AM::getShiftValue(Imm);
    809     return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Shift == 0);
    810   }
    811 }
    812 
    813 bool AArch64InstrInfo::isExynosShiftLeftFast(const MachineInstr &MI) const {
    814   unsigned Imm, Shift;
    815   AArch64_AM::ShiftExtendType Ext;
    816 
    817   switch (MI.getOpcode()) {
    818   default:
    819     return false;
    820 
    821   // WriteI
    822   case AArch64::ADDSWri:
    823   case AArch64::ADDSXri:
    824   case AArch64::ADDWri:
    825   case AArch64::ADDXri:
    826   case AArch64::SUBSWri:
    827   case AArch64::SUBSXri:
    828   case AArch64::SUBWri:
    829   case AArch64::SUBXri:
    830     return true;
    831 
    832   // WriteISReg
    833   case AArch64::ADDSWrs:
    834   case AArch64::ADDSXrs:
    835   case AArch64::ADDWrs:
    836   case AArch64::ADDXrs:
    837   case AArch64::ANDSWrs:
    838   case AArch64::ANDSXrs:
    839   case AArch64::ANDWrs:
    840   case AArch64::ANDXrs:
    841   case AArch64::BICSWrs:
    842   case AArch64::BICSXrs:
    843   case AArch64::BICWrs:
    844   case AArch64::BICXrs:
    845   case AArch64::EONWrs:
    846   case AArch64::EONXrs:
    847   case AArch64::EORWrs:
    848   case AArch64::EORXrs:
    849   case AArch64::ORNWrs:
    850   case AArch64::ORNXrs:
    851   case AArch64::ORRWrs:
    852   case AArch64::ORRXrs:
    853   case AArch64::SUBSWrs:
    854   case AArch64::SUBSXrs:
    855   case AArch64::SUBWrs:
    856   case AArch64::SUBXrs:
    857     Imm = MI.getOperand(3).getImm();
    858     Shift = AArch64_AM::getShiftValue(Imm);
    859     Ext = AArch64_AM::getShiftType(Imm);
    860     return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::LSL));
    861 
    862   // WriteIEReg
    863   case AArch64::ADDSWrx:
    864   case AArch64::ADDSXrx:
    865   case AArch64::ADDSXrx64:
    866   case AArch64::ADDWrx:
    867   case AArch64::ADDXrx:
    868   case AArch64::ADDXrx64:
    869   case AArch64::SUBSWrx:
    870   case AArch64::SUBSXrx:
    871   case AArch64::SUBSXrx64:
    872   case AArch64::SUBWrx:
    873   case AArch64::SUBXrx:
    874   case AArch64::SUBXrx64:
    875     Imm = MI.getOperand(3).getImm();
    876     Shift = AArch64_AM::getArithShiftValue(Imm);
    877     Ext = AArch64_AM::getArithExtendType(Imm);
    878     return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::UXTX));
    879 
    880   case AArch64::PRFMroW:
    881   case AArch64::PRFMroX:
    882 
    883   // WriteLDIdx
    884   case AArch64::LDRBBroW:
    885   case AArch64::LDRBBroX:
    886   case AArch64::LDRHHroW:
    887   case AArch64::LDRHHroX:
    888   case AArch64::LDRSBWroW:
    889   case AArch64::LDRSBWroX:
    890   case AArch64::LDRSBXroW:
    891   case AArch64::LDRSBXroX:
    892   case AArch64::LDRSHWroW:
    893   case AArch64::LDRSHWroX:
    894   case AArch64::LDRSHXroW:
    895   case AArch64::LDRSHXroX:
    896   case AArch64::LDRSWroW:
    897   case AArch64::LDRSWroX:
    898   case AArch64::LDRWroW:
    899   case AArch64::LDRWroX:
    900   case AArch64::LDRXroW:
    901   case AArch64::LDRXroX:
    902 
    903   case AArch64::LDRBroW:
    904   case AArch64::LDRBroX:
    905   case AArch64::LDRDroW:
    906   case AArch64::LDRDroX:
    907   case AArch64::LDRHroW:
    908   case AArch64::LDRHroX:
    909   case AArch64::LDRSroW:
    910   case AArch64::LDRSroX:
    911 
    912   // WriteSTIdx
    913   case AArch64::STRBBroW:
    914   case AArch64::STRBBroX:
    915   case AArch64::STRHHroW:
    916   case AArch64::STRHHroX:
    917   case AArch64::STRWroW:
    918   case AArch64::STRWroX:
    919   case AArch64::STRXroW:
    920   case AArch64::STRXroX:
    921 
    922   case AArch64::STRBroW:
    923   case AArch64::STRBroX:
    924   case AArch64::STRDroW:
    925   case AArch64::STRDroX:
    926   case AArch64::STRHroW:
    927   case AArch64::STRHroX:
    928   case AArch64::STRSroW:
    929   case AArch64::STRSroX:
    930     Imm = MI.getOperand(3).getImm();
    931     Ext = AArch64_AM::getMemExtendType(Imm);
    932     return (Ext == AArch64_AM::SXTX || Ext == AArch64_AM::UXTX);
    933   }
    934 }
    935 
    936 bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const {
    937   switch (MI.getOpcode()) {
    938   default:
    939     return false;
    940 
    941   case AArch64::ADDWrs:
    942   case AArch64::ADDXrs:
    943   case AArch64::ADDSWrs:
    944   case AArch64::ADDSXrs: {
    945     unsigned Imm = MI.getOperand(3).getImm();
    946     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
    947     if (ShiftVal == 0)
    948       return true;
    949     return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
    950   }
    951 
    952   case AArch64::ADDWrx:
    953   case AArch64::ADDXrx:
    954   case AArch64::ADDXrx64:
    955   case AArch64::ADDSWrx:
    956   case AArch64::ADDSXrx:
    957   case AArch64::ADDSXrx64: {
    958     unsigned Imm = MI.getOperand(3).getImm();
    959     switch (AArch64_AM::getArithExtendType(Imm)) {
    960     default:
    961       return false;
    962     case AArch64_AM::UXTB:
    963     case AArch64_AM::UXTH:
    964     case AArch64_AM::UXTW:
    965     case AArch64_AM::UXTX:
    966       return AArch64_AM::getArithShiftValue(Imm) <= 4;
    967     }
    968   }
    969 
    970   case AArch64::SUBWrs:
    971   case AArch64::SUBSWrs: {
    972     unsigned Imm = MI.getOperand(3).getImm();
    973     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
    974     return ShiftVal == 0 ||
    975            (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
    976   }
    977 
    978   case AArch64::SUBXrs:
    979   case AArch64::SUBSXrs: {
    980     unsigned Imm = MI.getOperand(3).getImm();
    981     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
    982     return ShiftVal == 0 ||
    983            (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
    984   }
    985 
    986   case AArch64::SUBWrx:
    987   case AArch64::SUBXrx:
    988   case AArch64::SUBXrx64:
    989   case AArch64::SUBSWrx:
    990   case AArch64::SUBSXrx:
    991   case AArch64::SUBSXrx64: {
    992     unsigned Imm = MI.getOperand(3).getImm();
    993     switch (AArch64_AM::getArithExtendType(Imm)) {
    994     default:
    995       return false;
    996     case AArch64_AM::UXTB:
    997     case AArch64_AM::UXTH:
    998     case AArch64_AM::UXTW:
    999     case AArch64_AM::UXTX:
   1000       return AArch64_AM::getArithShiftValue(Imm) == 0;
   1001     }
   1002   }
   1003 
   1004   case AArch64::LDRBBroW:
   1005   case AArch64::LDRBBroX:
   1006   case AArch64::LDRBroW:
   1007   case AArch64::LDRBroX:
   1008   case AArch64::LDRDroW:
   1009   case AArch64::LDRDroX:
   1010   case AArch64::LDRHHroW:
   1011   case AArch64::LDRHHroX:
   1012   case AArch64::LDRHroW:
   1013   case AArch64::LDRHroX:
   1014   case AArch64::LDRQroW:
   1015   case AArch64::LDRQroX:
   1016   case AArch64::LDRSBWroW:
   1017   case AArch64::LDRSBWroX:
   1018   case AArch64::LDRSBXroW:
   1019   case AArch64::LDRSBXroX:
   1020   case AArch64::LDRSHWroW:
   1021   case AArch64::LDRSHWroX:
   1022   case AArch64::LDRSHXroW:
   1023   case AArch64::LDRSHXroX:
   1024   case AArch64::LDRSWroW:
   1025   case AArch64::LDRSWroX:
   1026   case AArch64::LDRSroW:
   1027   case AArch64::LDRSroX:
   1028   case AArch64::LDRWroW:
   1029   case AArch64::LDRWroX:
   1030   case AArch64::LDRXroW:
   1031   case AArch64::LDRXroX:
   1032   case AArch64::PRFMroW:
   1033   case AArch64::PRFMroX:
   1034   case AArch64::STRBBroW:
   1035   case AArch64::STRBBroX:
   1036   case AArch64::STRBroW:
   1037   case AArch64::STRBroX:
   1038   case AArch64::STRDroW:
   1039   case AArch64::STRDroX:
   1040   case AArch64::STRHHroW:
   1041   case AArch64::STRHHroX:
   1042   case AArch64::STRHroW:
   1043   case AArch64::STRHroX:
   1044   case AArch64::STRQroW:
   1045   case AArch64::STRQroX:
   1046   case AArch64::STRSroW:
   1047   case AArch64::STRSroX:
   1048   case AArch64::STRWroW:
   1049   case AArch64::STRWroX:
   1050   case AArch64::STRXroW:
   1051   case AArch64::STRXroX: {
   1052     unsigned IsSigned = MI.getOperand(3).getImm();
   1053     return !IsSigned;
   1054   }
   1055   }
   1056 }
   1057 
   1058 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
   1059                                              unsigned &SrcReg, unsigned &DstReg,
   1060                                              unsigned &SubIdx) const {
   1061   switch (MI.getOpcode()) {
   1062   default:
   1063     return false;
   1064   case AArch64::SBFMXri: // aka sxtw
   1065   case AArch64::UBFMXri: // aka uxtw
   1066     // Check for the 32 -> 64 bit extension case, these instructions can do
   1067     // much more.
   1068     if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
   1069       return false;
   1070     // This is a signed or unsigned 32 -> 64 bit extension.
   1071     SrcReg = MI.getOperand(1).getReg();
   1072     DstReg = MI.getOperand(0).getReg();
   1073     SubIdx = AArch64::sub_32;
   1074     return true;
   1075   }
   1076 }
   1077 
   1078 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
   1079     MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
   1080   const TargetRegisterInfo *TRI = &getRegisterInfo();
   1081   unsigned BaseRegA = 0, BaseRegB = 0;
   1082   int64_t OffsetA = 0, OffsetB = 0;
   1083   unsigned WidthA = 0, WidthB = 0;
   1084 
   1085   assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
   1086   assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
   1087 
   1088   if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
   1089       MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
   1090     return false;
   1091 
   1092   // Retrieve the base register, offset from the base register and width. Width
   1093   // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8).  If
   1094   // base registers are identical, and the offset of a lower memory access +
   1095   // the width doesn't overlap the offset of a higher memory access,
   1096   // then the memory accesses are different.
   1097   if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
   1098       getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
   1099     if (BaseRegA == BaseRegB) {
   1100       int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
   1101       int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
   1102       int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
   1103       if (LowOffset + LowWidth <= HighOffset)
   1104         return true;
   1105     }
   1106   }
   1107   return false;
   1108 }
   1109 
   1110 /// analyzeCompare - For a comparison instruction, return the source registers
   1111 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
   1112 /// Return true if the comparison instruction can be analyzed.
   1113 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
   1114                                       unsigned &SrcReg2, int &CmpMask,
   1115                                       int &CmpValue) const {
   1116   // The first operand can be a frame index where we'd normally expect a
   1117   // register.
   1118   assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
   1119   if (!MI.getOperand(1).isReg())
   1120     return false;
   1121 
   1122   switch (MI.getOpcode()) {
   1123   default:
   1124     break;
   1125   case AArch64::SUBSWrr:
   1126   case AArch64::SUBSWrs:
   1127   case AArch64::SUBSWrx:
   1128   case AArch64::SUBSXrr:
   1129   case AArch64::SUBSXrs:
   1130   case AArch64::SUBSXrx:
   1131   case AArch64::ADDSWrr:
   1132   case AArch64::ADDSWrs:
   1133   case AArch64::ADDSWrx:
   1134   case AArch64::ADDSXrr:
   1135   case AArch64::ADDSXrs:
   1136   case AArch64::ADDSXrx:
   1137     // Replace SUBSWrr with SUBWrr if NZCV is not used.
   1138     SrcReg = MI.getOperand(1).getReg();
   1139     SrcReg2 = MI.getOperand(2).getReg();
   1140     CmpMask = ~0;
   1141     CmpValue = 0;
   1142     return true;
   1143   case AArch64::SUBSWri:
   1144   case AArch64::ADDSWri:
   1145   case AArch64::SUBSXri:
   1146   case AArch64::ADDSXri:
   1147     SrcReg = MI.getOperand(1).getReg();
   1148     SrcReg2 = 0;
   1149     CmpMask = ~0;
   1150     // FIXME: In order to convert CmpValue to 0 or 1
   1151     CmpValue = MI.getOperand(2).getImm() != 0;
   1152     return true;
   1153   case AArch64::ANDSWri:
   1154   case AArch64::ANDSXri:
   1155     // ANDS does not use the same encoding scheme as the others xxxS
   1156     // instructions.
   1157     SrcReg = MI.getOperand(1).getReg();
   1158     SrcReg2 = 0;
   1159     CmpMask = ~0;
   1160     // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
   1161     // while the type of CmpValue is int. When converting uint64_t to int,
   1162     // the high 32 bits of uint64_t will be lost.
   1163     // In fact it causes a bug in spec2006-483.xalancbmk
   1164     // CmpValue is only used to compare with zero in OptimizeCompareInstr
   1165     CmpValue = AArch64_AM::decodeLogicalImmediate(
   1166                    MI.getOperand(2).getImm(),
   1167                    MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
   1168     return true;
   1169   }
   1170 
   1171   return false;
   1172 }
   1173 
   1174 static bool UpdateOperandRegClass(MachineInstr &Instr) {
   1175   MachineBasicBlock *MBB = Instr.getParent();
   1176   assert(MBB && "Can't get MachineBasicBlock here");
   1177   MachineFunction *MF = MBB->getParent();
   1178   assert(MF && "Can't get MachineFunction here");
   1179   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
   1180   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
   1181   MachineRegisterInfo *MRI = &MF->getRegInfo();
   1182 
   1183   for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
   1184        ++OpIdx) {
   1185     MachineOperand &MO = Instr.getOperand(OpIdx);
   1186     const TargetRegisterClass *OpRegCstraints =
   1187         Instr.getRegClassConstraint(OpIdx, TII, TRI);
   1188 
   1189     // If there's no constraint, there's nothing to do.
   1190     if (!OpRegCstraints)
   1191       continue;
   1192     // If the operand is a frame index, there's nothing to do here.
   1193     // A frame index operand will resolve correctly during PEI.
   1194     if (MO.isFI())
   1195       continue;
   1196 
   1197     assert(MO.isReg() &&
   1198            "Operand has register constraints without being a register!");
   1199 
   1200     unsigned Reg = MO.getReg();
   1201     if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
   1202       if (!OpRegCstraints->contains(Reg))
   1203         return false;
   1204     } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
   1205                !MRI->constrainRegClass(Reg, OpRegCstraints))
   1206       return false;
   1207   }
   1208 
   1209   return true;
   1210 }
   1211 
   1212 /// Return the opcode that does not set flags when possible - otherwise
   1213 /// return the original opcode. The caller is responsible to do the actual
   1214 /// substitution and legality checking.
   1215 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
   1216   // Don't convert all compare instructions, because for some the zero register
   1217   // encoding becomes the sp register.
   1218   bool MIDefinesZeroReg = false;
   1219   if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
   1220     MIDefinesZeroReg = true;
   1221 
   1222   switch (MI.getOpcode()) {
   1223   default:
   1224     return MI.getOpcode();
   1225   case AArch64::ADDSWrr:
   1226     return AArch64::ADDWrr;
   1227   case AArch64::ADDSWri:
   1228     return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
   1229   case AArch64::ADDSWrs:
   1230     return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
   1231   case AArch64::ADDSWrx:
   1232     return AArch64::ADDWrx;
   1233   case AArch64::ADDSXrr:
   1234     return AArch64::ADDXrr;
   1235   case AArch64::ADDSXri:
   1236     return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
   1237   case AArch64::ADDSXrs:
   1238     return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
   1239   case AArch64::ADDSXrx:
   1240     return AArch64::ADDXrx;
   1241   case AArch64::SUBSWrr:
   1242     return AArch64::SUBWrr;
   1243   case AArch64::SUBSWri:
   1244     return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
   1245   case AArch64::SUBSWrs:
   1246     return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
   1247   case AArch64::SUBSWrx:
   1248     return AArch64::SUBWrx;
   1249   case AArch64::SUBSXrr:
   1250     return AArch64::SUBXrr;
   1251   case AArch64::SUBSXri:
   1252     return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
   1253   case AArch64::SUBSXrs:
   1254     return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
   1255   case AArch64::SUBSXrx:
   1256     return AArch64::SUBXrx;
   1257   }
   1258 }
   1259 
   1260 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
   1261 
   1262 /// True when condition flags are accessed (either by writing or reading)
   1263 /// on the instruction trace starting at From and ending at To.
   1264 ///
   1265 /// Note: If From and To are from different blocks it's assumed CC are accessed
   1266 ///       on the path.
   1267 static bool areCFlagsAccessedBetweenInstrs(
   1268     MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
   1269     const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
   1270   // Early exit if To is at the beginning of the BB.
   1271   if (To == To->getParent()->begin())
   1272     return true;
   1273 
   1274   // Check whether the instructions are in the same basic block
   1275   // If not, assume the condition flags might get modified somewhere.
   1276   if (To->getParent() != From->getParent())
   1277     return true;
   1278 
   1279   // From must be above To.
   1280   assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
   1281                       [From](MachineInstr &MI) {
   1282                         return MI.getIterator() == From;
   1283                       }) != To->getParent()->rend());
   1284 
   1285   // We iterate backward starting \p To until we hit \p From.
   1286   for (--To; To != From; --To) {
   1287     const MachineInstr &Instr = *To;
   1288 
   1289     if (((AccessToCheck & AK_Write) &&
   1290          Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
   1291         ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
   1292       return true;
   1293   }
   1294   return false;
   1295 }
   1296 
   1297 /// Try to optimize a compare instruction. A compare instruction is an
   1298 /// instruction which produces AArch64::NZCV. It can be truly compare
   1299 /// instruction
   1300 /// when there are no uses of its destination register.
   1301 ///
   1302 /// The following steps are tried in order:
   1303 /// 1. Convert CmpInstr into an unconditional version.
   1304 /// 2. Remove CmpInstr if above there is an instruction producing a needed
   1305 ///    condition code or an instruction which can be converted into such an
   1306 ///    instruction.
   1307 ///    Only comparison with zero is supported.
   1308 bool AArch64InstrInfo::optimizeCompareInstr(
   1309     MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
   1310     int CmpValue, const MachineRegisterInfo *MRI) const {
   1311   assert(CmpInstr.getParent());
   1312   assert(MRI);
   1313 
   1314   // Replace SUBSWrr with SUBWrr if NZCV is not used.
   1315   int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
   1316   if (DeadNZCVIdx != -1) {
   1317     if (CmpInstr.definesRegister(AArch64::WZR) ||
   1318         CmpInstr.definesRegister(AArch64::XZR)) {
   1319       CmpInstr.eraseFromParent();
   1320       return true;
   1321     }
   1322     unsigned Opc = CmpInstr.getOpcode();
   1323     unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
   1324     if (NewOpc == Opc)
   1325       return false;
   1326     const MCInstrDesc &MCID = get(NewOpc);
   1327     CmpInstr.setDesc(MCID);
   1328     CmpInstr.RemoveOperand(DeadNZCVIdx);
   1329     bool succeeded = UpdateOperandRegClass(CmpInstr);
   1330     (void)succeeded;
   1331     assert(succeeded && "Some operands reg class are incompatible!");
   1332     return true;
   1333   }
   1334 
   1335   // Continue only if we have a "ri" where immediate is zero.
   1336   // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
   1337   // function.
   1338   assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
   1339   if (CmpValue != 0 || SrcReg2 != 0)
   1340     return false;
   1341 
   1342   // CmpInstr is a Compare instruction if destination register is not used.
   1343   if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
   1344     return false;
   1345 
   1346   return substituteCmpToZero(CmpInstr, SrcReg, MRI);
   1347 }
   1348 
   1349 /// Get opcode of S version of Instr.
   1350 /// If Instr is S version its opcode is returned.
   1351 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
   1352 /// or we are not interested in it.
   1353 static unsigned sForm(MachineInstr &Instr) {
   1354   switch (Instr.getOpcode()) {
   1355   default:
   1356     return AArch64::INSTRUCTION_LIST_END;
   1357 
   1358   case AArch64::ADDSWrr:
   1359   case AArch64::ADDSWri:
   1360   case AArch64::ADDSXrr:
   1361   case AArch64::ADDSXri:
   1362   case AArch64::SUBSWrr:
   1363   case AArch64::SUBSWri:
   1364   case AArch64::SUBSXrr:
   1365   case AArch64::SUBSXri:
   1366     return Instr.getOpcode();
   1367 
   1368   case AArch64::ADDWrr:
   1369     return AArch64::ADDSWrr;
   1370   case AArch64::ADDWri:
   1371     return AArch64::ADDSWri;
   1372   case AArch64::ADDXrr:
   1373     return AArch64::ADDSXrr;
   1374   case AArch64::ADDXri:
   1375     return AArch64::ADDSXri;
   1376   case AArch64::ADCWr:
   1377     return AArch64::ADCSWr;
   1378   case AArch64::ADCXr:
   1379     return AArch64::ADCSXr;
   1380   case AArch64::SUBWrr:
   1381     return AArch64::SUBSWrr;
   1382   case AArch64::SUBWri:
   1383     return AArch64::SUBSWri;
   1384   case AArch64::SUBXrr:
   1385     return AArch64::SUBSXrr;
   1386   case AArch64::SUBXri:
   1387     return AArch64::SUBSXri;
   1388   case AArch64::SBCWr:
   1389     return AArch64::SBCSWr;
   1390   case AArch64::SBCXr:
   1391     return AArch64::SBCSXr;
   1392   case AArch64::ANDWri:
   1393     return AArch64::ANDSWri;
   1394   case AArch64::ANDXri:
   1395     return AArch64::ANDSXri;
   1396   }
   1397 }
   1398 
   1399 /// Check if AArch64::NZCV should be alive in successors of MBB.
   1400 static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
   1401   for (auto *BB : MBB->successors())
   1402     if (BB->isLiveIn(AArch64::NZCV))
   1403       return true;
   1404   return false;
   1405 }
   1406 
   1407 namespace {
   1408 
   1409 struct UsedNZCV {
   1410   bool N = false;
   1411   bool Z = false;
   1412   bool C = false;
   1413   bool V = false;
   1414 
   1415   UsedNZCV() = default;
   1416 
   1417   UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
   1418     this->N |= UsedFlags.N;
   1419     this->Z |= UsedFlags.Z;
   1420     this->C |= UsedFlags.C;
   1421     this->V |= UsedFlags.V;
   1422     return *this;
   1423   }
   1424 };
   1425 
   1426 } // end anonymous namespace
   1427 
   1428 /// Find a condition code used by the instruction.
   1429 /// Returns AArch64CC::Invalid if either the instruction does not use condition
   1430 /// codes or we don't optimize CmpInstr in the presence of such instructions.
   1431 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
   1432   switch (Instr.getOpcode()) {
   1433   default:
   1434     return AArch64CC::Invalid;
   1435 
   1436   case AArch64::Bcc: {
   1437     int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
   1438     assert(Idx >= 2);
   1439     return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
   1440   }
   1441 
   1442   case AArch64::CSINVWr:
   1443   case AArch64::CSINVXr:
   1444   case AArch64::CSINCWr:
   1445   case AArch64::CSINCXr:
   1446   case AArch64::CSELWr:
   1447   case AArch64::CSELXr:
   1448   case AArch64::CSNEGWr:
   1449   case AArch64::CSNEGXr:
   1450   case AArch64::FCSELSrrr:
   1451   case AArch64::FCSELDrrr: {
   1452     int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
   1453     assert(Idx >= 1);
   1454     return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
   1455   }
   1456   }
   1457 }
   1458 
   1459 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
   1460   assert(CC != AArch64CC::Invalid);
   1461   UsedNZCV UsedFlags;
   1462   switch (CC) {
   1463   default:
   1464     break;
   1465 
   1466   case AArch64CC::EQ: // Z set
   1467   case AArch64CC::NE: // Z clear
   1468     UsedFlags.Z = true;
   1469     break;
   1470 
   1471   case AArch64CC::HI: // Z clear and C set
   1472   case AArch64CC::LS: // Z set   or  C clear
   1473     UsedFlags.Z = true;
   1474     LLVM_FALLTHROUGH;
   1475   case AArch64CC::HS: // C set
   1476   case AArch64CC::LO: // C clear
   1477     UsedFlags.C = true;
   1478     break;
   1479 
   1480   case AArch64CC::MI: // N set
   1481   case AArch64CC::PL: // N clear
   1482     UsedFlags.N = true;
   1483     break;
   1484 
   1485   case AArch64CC::VS: // V set
   1486   case AArch64CC::VC: // V clear
   1487     UsedFlags.V = true;
   1488     break;
   1489 
   1490   case AArch64CC::GT: // Z clear, N and V the same
   1491   case AArch64CC::LE: // Z set,   N and V differ
   1492     UsedFlags.Z = true;
   1493     LLVM_FALLTHROUGH;
   1494   case AArch64CC::GE: // N and V the same
   1495   case AArch64CC::LT: // N and V differ
   1496     UsedFlags.N = true;
   1497     UsedFlags.V = true;
   1498     break;
   1499   }
   1500   return UsedFlags;
   1501 }
   1502 
   1503 static bool isADDSRegImm(unsigned Opcode) {
   1504   return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
   1505 }
   1506 
   1507 static bool isSUBSRegImm(unsigned Opcode) {
   1508   return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
   1509 }
   1510 
   1511 /// Check if CmpInstr can be substituted by MI.
   1512 ///
   1513 /// CmpInstr can be substituted:
   1514 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
   1515 /// - and, MI and CmpInstr are from the same MachineBB
   1516 /// - and, condition flags are not alive in successors of the CmpInstr parent
   1517 /// - and, if MI opcode is the S form there must be no defs of flags between
   1518 ///        MI and CmpInstr
   1519 ///        or if MI opcode is not the S form there must be neither defs of flags
   1520 ///        nor uses of flags between MI and CmpInstr.
   1521 /// - and  C/V flags are not used after CmpInstr
   1522 static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
   1523                                        const TargetRegisterInfo *TRI) {
   1524   assert(MI);
   1525   assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
   1526   assert(CmpInstr);
   1527 
   1528   const unsigned CmpOpcode = CmpInstr->getOpcode();
   1529   if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
   1530     return false;
   1531 
   1532   if (MI->getParent() != CmpInstr->getParent())
   1533     return false;
   1534 
   1535   if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
   1536     return false;
   1537 
   1538   AccessKind AccessToCheck = AK_Write;
   1539   if (sForm(*MI) != MI->getOpcode())
   1540     AccessToCheck = AK_All;
   1541   if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
   1542     return false;
   1543 
   1544   UsedNZCV NZCVUsedAfterCmp;
   1545   for (auto I = std::next(CmpInstr->getIterator()),
   1546             E = CmpInstr->getParent()->instr_end();
   1547        I != E; ++I) {
   1548     const MachineInstr &Instr = *I;
   1549     if (Instr.readsRegister(AArch64::NZCV, TRI)) {
   1550       AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
   1551       if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
   1552         return false;
   1553       NZCVUsedAfterCmp |= getUsedNZCV(CC);
   1554     }
   1555 
   1556     if (Instr.modifiesRegister(AArch64::NZCV, TRI))
   1557       break;
   1558   }
   1559 
   1560   return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
   1561 }
   1562 
   1563 /// Substitute an instruction comparing to zero with another instruction
   1564 /// which produces needed condition flags.
   1565 ///
   1566 /// Return true on success.
   1567 bool AArch64InstrInfo::substituteCmpToZero(
   1568     MachineInstr &CmpInstr, unsigned SrcReg,
   1569     const MachineRegisterInfo *MRI) const {
   1570   assert(MRI);
   1571   // Get the unique definition of SrcReg.
   1572   MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
   1573   if (!MI)
   1574     return false;
   1575 
   1576   const TargetRegisterInfo *TRI = &getRegisterInfo();
   1577 
   1578   unsigned NewOpc = sForm(*MI);
   1579   if (NewOpc == AArch64::INSTRUCTION_LIST_END)
   1580     return false;
   1581 
   1582   if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
   1583     return false;
   1584 
   1585   // Update the instruction to set NZCV.
   1586   MI->setDesc(get(NewOpc));
   1587   CmpInstr.eraseFromParent();
   1588   bool succeeded = UpdateOperandRegClass(*MI);
   1589   (void)succeeded;
   1590   assert(succeeded && "Some operands reg class are incompatible!");
   1591   MI->addRegisterDefined(AArch64::NZCV, TRI);
   1592   return true;
   1593 }
   1594 
   1595 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   1596   if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
   1597     return false;
   1598 
   1599   MachineBasicBlock &MBB = *MI.getParent();
   1600   DebugLoc DL = MI.getDebugLoc();
   1601   unsigned Reg = MI.getOperand(0).getReg();
   1602   const GlobalValue *GV =
   1603       cast<GlobalValue>((*MI.memoperands_begin())->getValue());
   1604   const TargetMachine &TM = MBB.getParent()->getTarget();
   1605   unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
   1606   const unsigned char MO_NC = AArch64II::MO_NC;
   1607 
   1608   if ((OpFlags & AArch64II::MO_GOT) != 0) {
   1609     BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
   1610         .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
   1611     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
   1612         .addReg(Reg, RegState::Kill)
   1613         .addImm(0)
   1614         .addMemOperand(*MI.memoperands_begin());
   1615   } else if (TM.getCodeModel() == CodeModel::Large) {
   1616     BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
   1617         .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
   1618         .addImm(0);
   1619     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
   1620         .addReg(Reg, RegState::Kill)
   1621         .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
   1622         .addImm(16);
   1623     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
   1624         .addReg(Reg, RegState::Kill)
   1625         .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
   1626         .addImm(32);
   1627     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
   1628         .addReg(Reg, RegState::Kill)
   1629         .addGlobalAddress(GV, 0, AArch64II::MO_G3)
   1630         .addImm(48);
   1631     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
   1632         .addReg(Reg, RegState::Kill)
   1633         .addImm(0)
   1634         .addMemOperand(*MI.memoperands_begin());
   1635   } else {
   1636     BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
   1637         .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
   1638     unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
   1639     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
   1640         .addReg(Reg, RegState::Kill)
   1641         .addGlobalAddress(GV, 0, LoFlags)
   1642         .addMemOperand(*MI.memoperands_begin());
   1643   }
   1644 
   1645   MBB.erase(MI);
   1646 
   1647   return true;
   1648 }
   1649 
   1650 /// Return true if this is this instruction has a non-zero immediate
   1651 bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) {
   1652   switch (MI.getOpcode()) {
   1653   default:
   1654     break;
   1655   case AArch64::ADDSWrs:
   1656   case AArch64::ADDSXrs:
   1657   case AArch64::ADDWrs:
   1658   case AArch64::ADDXrs:
   1659   case AArch64::ANDSWrs:
   1660   case AArch64::ANDSXrs:
   1661   case AArch64::ANDWrs:
   1662   case AArch64::ANDXrs:
   1663   case AArch64::BICSWrs:
   1664   case AArch64::BICSXrs:
   1665   case AArch64::BICWrs:
   1666   case AArch64::BICXrs:
   1667   case AArch64::EONWrs:
   1668   case AArch64::EONXrs:
   1669   case AArch64::EORWrs:
   1670   case AArch64::EORXrs:
   1671   case AArch64::ORNWrs:
   1672   case AArch64::ORNXrs:
   1673   case AArch64::ORRWrs:
   1674   case AArch64::ORRXrs:
   1675   case AArch64::SUBSWrs:
   1676   case AArch64::SUBSXrs:
   1677   case AArch64::SUBWrs:
   1678   case AArch64::SUBXrs:
   1679     if (MI.getOperand(3).isImm()) {
   1680       unsigned val = MI.getOperand(3).getImm();
   1681       return (val != 0);
   1682     }
   1683     break;
   1684   }
   1685   return false;
   1686 }
   1687 
   1688 /// Return true if this is this instruction has a non-zero immediate
   1689 bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) {
   1690   switch (MI.getOpcode()) {
   1691   default:
   1692     break;
   1693   case AArch64::ADDSWrx:
   1694   case AArch64::ADDSXrx:
   1695   case AArch64::ADDSXrx64:
   1696   case AArch64::ADDWrx:
   1697   case AArch64::ADDXrx:
   1698   case AArch64::ADDXrx64:
   1699   case AArch64::SUBSWrx:
   1700   case AArch64::SUBSXrx:
   1701   case AArch64::SUBSXrx64:
   1702   case AArch64::SUBWrx:
   1703   case AArch64::SUBXrx:
   1704   case AArch64::SUBXrx64:
   1705     if (MI.getOperand(3).isImm()) {
   1706       unsigned val = MI.getOperand(3).getImm();
   1707       return (val != 0);
   1708     }
   1709     break;
   1710   }
   1711 
   1712   return false;
   1713 }
   1714 
   1715 // Return true if this instruction simply sets its single destination register
   1716 // to zero. This is equivalent to a register rename of the zero-register.
   1717 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
   1718   switch (MI.getOpcode()) {
   1719   default:
   1720     break;
   1721   case AArch64::MOVZWi:
   1722   case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
   1723     if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
   1724       assert(MI.getDesc().getNumOperands() == 3 &&
   1725              MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
   1726       return true;
   1727     }
   1728     break;
   1729   case AArch64::ANDWri: // and Rd, Rzr, #imm
   1730     return MI.getOperand(1).getReg() == AArch64::WZR;
   1731   case AArch64::ANDXri:
   1732     return MI.getOperand(1).getReg() == AArch64::XZR;
   1733   case TargetOpcode::COPY:
   1734     return MI.getOperand(1).getReg() == AArch64::WZR;
   1735   }
   1736   return false;
   1737 }
   1738 
   1739 // Return true if this instruction simply renames a general register without
   1740 // modifying bits.
   1741 bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
   1742   switch (MI.getOpcode()) {
   1743   default:
   1744     break;
   1745   case TargetOpcode::COPY: {
   1746     // GPR32 copies will by lowered to ORRXrs
   1747     unsigned DstReg = MI.getOperand(0).getReg();
   1748     return (AArch64::GPR32RegClass.contains(DstReg) ||
   1749             AArch64::GPR64RegClass.contains(DstReg));
   1750   }
   1751   case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
   1752     if (MI.getOperand(1).getReg() == AArch64::XZR) {
   1753       assert(MI.getDesc().getNumOperands() == 4 &&
   1754              MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
   1755       return true;
   1756     }
   1757     break;
   1758   case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
   1759     if (MI.getOperand(2).getImm() == 0) {
   1760       assert(MI.getDesc().getNumOperands() == 4 &&
   1761              MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
   1762       return true;
   1763     }
   1764     break;
   1765   }
   1766   return false;
   1767 }
   1768 
   1769 // Return true if this instruction simply renames a general register without
   1770 // modifying bits.
   1771 bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
   1772   switch (MI.getOpcode()) {
   1773   default:
   1774     break;
   1775   case TargetOpcode::COPY: {
   1776     // FPR64 copies will by lowered to ORR.16b
   1777     unsigned DstReg = MI.getOperand(0).getReg();
   1778     return (AArch64::FPR64RegClass.contains(DstReg) ||
   1779             AArch64::FPR128RegClass.contains(DstReg));
   1780   }
   1781   case AArch64::ORRv16i8:
   1782     if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
   1783       assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
   1784              "invalid ORRv16i8 operands");
   1785       return true;
   1786     }
   1787     break;
   1788   }
   1789   return false;
   1790 }
   1791 
   1792 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
   1793                                                int &FrameIndex) const {
   1794   switch (MI.getOpcode()) {
   1795   default:
   1796     break;
   1797   case AArch64::LDRWui:
   1798   case AArch64::LDRXui:
   1799   case AArch64::LDRBui:
   1800   case AArch64::LDRHui:
   1801   case AArch64::LDRSui:
   1802   case AArch64::LDRDui:
   1803   case AArch64::LDRQui:
   1804     if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
   1805         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
   1806       FrameIndex = MI.getOperand(1).getIndex();
   1807       return MI.getOperand(0).getReg();
   1808     }
   1809     break;
   1810   }
   1811 
   1812   return 0;
   1813 }
   1814 
   1815 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
   1816                                               int &FrameIndex) const {
   1817   switch (MI.getOpcode()) {
   1818   default:
   1819     break;
   1820   case AArch64::STRWui:
   1821   case AArch64::STRXui:
   1822   case AArch64::STRBui:
   1823   case AArch64::STRHui:
   1824   case AArch64::STRSui:
   1825   case AArch64::STRDui:
   1826   case AArch64::STRQui:
   1827     if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
   1828         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
   1829       FrameIndex = MI.getOperand(1).getIndex();
   1830       return MI.getOperand(0).getReg();
   1831     }
   1832     break;
   1833   }
   1834   return 0;
   1835 }
   1836 
   1837 /// Return true if this is load/store scales or extends its register offset.
   1838 /// This refers to scaling a dynamic index as opposed to scaled immediates.
   1839 /// MI should be a memory op that allows scaled addressing.
   1840 bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) {
   1841   switch (MI.getOpcode()) {
   1842   default:
   1843     break;
   1844   case AArch64::LDRBBroW:
   1845   case AArch64::LDRBroW:
   1846   case AArch64::LDRDroW:
   1847   case AArch64::LDRHHroW:
   1848   case AArch64::LDRHroW:
   1849   case AArch64::LDRQroW:
   1850   case AArch64::LDRSBWroW:
   1851   case AArch64::LDRSBXroW:
   1852   case AArch64::LDRSHWroW:
   1853   case AArch64::LDRSHXroW:
   1854   case AArch64::LDRSWroW:
   1855   case AArch64::LDRSroW:
   1856   case AArch64::LDRWroW:
   1857   case AArch64::LDRXroW:
   1858   case AArch64::STRBBroW:
   1859   case AArch64::STRBroW:
   1860   case AArch64::STRDroW:
   1861   case AArch64::STRHHroW:
   1862   case AArch64::STRHroW:
   1863   case AArch64::STRQroW:
   1864   case AArch64::STRSroW:
   1865   case AArch64::STRWroW:
   1866   case AArch64::STRXroW:
   1867   case AArch64::LDRBBroX:
   1868   case AArch64::LDRBroX:
   1869   case AArch64::LDRDroX:
   1870   case AArch64::LDRHHroX:
   1871   case AArch64::LDRHroX:
   1872   case AArch64::LDRQroX:
   1873   case AArch64::LDRSBWroX:
   1874   case AArch64::LDRSBXroX:
   1875   case AArch64::LDRSHWroX:
   1876   case AArch64::LDRSHXroX:
   1877   case AArch64::LDRSWroX:
   1878   case AArch64::LDRSroX:
   1879   case AArch64::LDRWroX:
   1880   case AArch64::LDRXroX:
   1881   case AArch64::STRBBroX:
   1882   case AArch64::STRBroX:
   1883   case AArch64::STRDroX:
   1884   case AArch64::STRHHroX:
   1885   case AArch64::STRHroX:
   1886   case AArch64::STRQroX:
   1887   case AArch64::STRSroX:
   1888   case AArch64::STRWroX:
   1889   case AArch64::STRXroX:
   1890 
   1891     unsigned Val = MI.getOperand(3).getImm();
   1892     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
   1893     return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
   1894   }
   1895   return false;
   1896 }
   1897 
   1898 /// Check all MachineMemOperands for a hint to suppress pairing.
   1899 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
   1900   return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
   1901     return MMO->getFlags() & MOSuppressPair;
   1902   });
   1903 }
   1904 
   1905 /// Set a flag on the first MachineMemOperand to suppress pairing.
   1906 void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
   1907   if (MI.memoperands_empty())
   1908     return;
   1909   (*MI.memoperands_begin())->setFlags(MOSuppressPair);
   1910 }
   1911 
   1912 /// Check all MachineMemOperands for a hint that the load/store is strided.
   1913 bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
   1914   return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
   1915     return MMO->getFlags() & MOStridedAccess;
   1916   });
   1917 }
   1918 
   1919 bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) {
   1920   switch (Opc) {
   1921   default:
   1922     return false;
   1923   case AArch64::STURSi:
   1924   case AArch64::STURDi:
   1925   case AArch64::STURQi:
   1926   case AArch64::STURBBi:
   1927   case AArch64::STURHHi:
   1928   case AArch64::STURWi:
   1929   case AArch64::STURXi:
   1930   case AArch64::LDURSi:
   1931   case AArch64::LDURDi:
   1932   case AArch64::LDURQi:
   1933   case AArch64::LDURWi:
   1934   case AArch64::LDURXi:
   1935   case AArch64::LDURSWi:
   1936   case AArch64::LDURHHi:
   1937   case AArch64::LDURBBi:
   1938   case AArch64::LDURSBWi:
   1939   case AArch64::LDURSHWi:
   1940     return true;
   1941   }
   1942 }
   1943 
   1944 bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
   1945   switch (MI.getOpcode()) {
   1946   default:
   1947     return false;
   1948   // Scaled instructions.
   1949   case AArch64::STRSui:
   1950   case AArch64::STRDui:
   1951   case AArch64::STRQui:
   1952   case AArch64::STRXui:
   1953   case AArch64::STRWui:
   1954   case AArch64::LDRSui:
   1955   case AArch64::LDRDui:
   1956   case AArch64::LDRQui:
   1957   case AArch64::LDRXui:
   1958   case AArch64::LDRWui:
   1959   case AArch64::LDRSWui:
   1960   // Unscaled instructions.
   1961   case AArch64::STURSi:
   1962   case AArch64::STURDi:
   1963   case AArch64::STURQi:
   1964   case AArch64::STURWi:
   1965   case AArch64::STURXi:
   1966   case AArch64::LDURSi:
   1967   case AArch64::LDURDi:
   1968   case AArch64::LDURQi:
   1969   case AArch64::LDURWi:
   1970   case AArch64::LDURXi:
   1971   case AArch64::LDURSWi:
   1972     return true;
   1973   }
   1974 }
   1975 
   1976 unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc,
   1977                                                    bool &Is64Bit) {
   1978   switch (Opc) {
   1979   default:
   1980     llvm_unreachable("Opcode has no flag setting equivalent!");
   1981   // 32-bit cases:
   1982   case AArch64::ADDWri:
   1983     Is64Bit = false;
   1984     return AArch64::ADDSWri;
   1985   case AArch64::ADDWrr:
   1986     Is64Bit = false;
   1987     return AArch64::ADDSWrr;
   1988   case AArch64::ADDWrs:
   1989     Is64Bit = false;
   1990     return AArch64::ADDSWrs;
   1991   case AArch64::ADDWrx:
   1992     Is64Bit = false;
   1993     return AArch64::ADDSWrx;
   1994   case AArch64::ANDWri:
   1995     Is64Bit = false;
   1996     return AArch64::ANDSWri;
   1997   case AArch64::ANDWrr:
   1998     Is64Bit = false;
   1999     return AArch64::ANDSWrr;
   2000   case AArch64::ANDWrs:
   2001     Is64Bit = false;
   2002     return AArch64::ANDSWrs;
   2003   case AArch64::BICWrr:
   2004     Is64Bit = false;
   2005     return AArch64::BICSWrr;
   2006   case AArch64::BICWrs:
   2007     Is64Bit = false;
   2008     return AArch64::BICSWrs;
   2009   case AArch64::SUBWri:
   2010     Is64Bit = false;
   2011     return AArch64::SUBSWri;
   2012   case AArch64::SUBWrr:
   2013     Is64Bit = false;
   2014     return AArch64::SUBSWrr;
   2015   case AArch64::SUBWrs:
   2016     Is64Bit = false;
   2017     return AArch64::SUBSWrs;
   2018   case AArch64::SUBWrx:
   2019     Is64Bit = false;
   2020     return AArch64::SUBSWrx;
   2021   // 64-bit cases:
   2022   case AArch64::ADDXri:
   2023     Is64Bit = true;
   2024     return AArch64::ADDSXri;
   2025   case AArch64::ADDXrr:
   2026     Is64Bit = true;
   2027     return AArch64::ADDSXrr;
   2028   case AArch64::ADDXrs:
   2029     Is64Bit = true;
   2030     return AArch64::ADDSXrs;
   2031   case AArch64::ADDXrx:
   2032     Is64Bit = true;
   2033     return AArch64::ADDSXrx;
   2034   case AArch64::ANDXri:
   2035     Is64Bit = true;
   2036     return AArch64::ANDSXri;
   2037   case AArch64::ANDXrr:
   2038     Is64Bit = true;
   2039     return AArch64::ANDSXrr;
   2040   case AArch64::ANDXrs:
   2041     Is64Bit = true;
   2042     return AArch64::ANDSXrs;
   2043   case AArch64::BICXrr:
   2044     Is64Bit = true;
   2045     return AArch64::BICSXrr;
   2046   case AArch64::BICXrs:
   2047     Is64Bit = true;
   2048     return AArch64::BICSXrs;
   2049   case AArch64::SUBXri:
   2050     Is64Bit = true;
   2051     return AArch64::SUBSXri;
   2052   case AArch64::SUBXrr:
   2053     Is64Bit = true;
   2054     return AArch64::SUBSXrr;
   2055   case AArch64::SUBXrs:
   2056     Is64Bit = true;
   2057     return AArch64::SUBSXrs;
   2058   case AArch64::SUBXrx:
   2059     Is64Bit = true;
   2060     return AArch64::SUBSXrx;
   2061   }
   2062 }
   2063 
   2064 // Is this a candidate for ld/st merging or pairing?  For example, we don't
   2065 // touch volatiles or load/stores that have a hint to avoid pair formation.
   2066 bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
   2067   // If this is a volatile load/store, don't mess with it.
   2068   if (MI.hasOrderedMemoryRef())
   2069     return false;
   2070 
   2071   // Make sure this is a reg+imm (as opposed to an address reloc).
   2072   assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
   2073   if (!MI.getOperand(2).isImm())
   2074     return false;
   2075 
   2076   // Can't merge/pair if the instruction modifies the base register.
   2077   // e.g., ldr x0, [x0]
   2078   unsigned BaseReg = MI.getOperand(1).getReg();
   2079   const TargetRegisterInfo *TRI = &getRegisterInfo();
   2080   if (MI.modifiesRegister(BaseReg, TRI))
   2081     return false;
   2082 
   2083   // Check if this load/store has a hint to avoid pair formation.
   2084   // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
   2085   if (isLdStPairSuppressed(MI))
   2086     return false;
   2087 
   2088   // On some CPUs quad load/store pairs are slower than two single load/stores.
   2089   if (Subtarget.isPaired128Slow()) {
   2090     switch (MI.getOpcode()) {
   2091     default:
   2092       break;
   2093     case AArch64::LDURQi:
   2094     case AArch64::STURQi:
   2095     case AArch64::LDRQui:
   2096     case AArch64::STRQui:
   2097       return false;
   2098     }
   2099   }
   2100 
   2101   return true;
   2102 }
   2103 
   2104 bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
   2105     MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
   2106     const TargetRegisterInfo *TRI) const {
   2107   unsigned Width;
   2108   return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
   2109 }
   2110 
   2111 bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
   2112     MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
   2113     const TargetRegisterInfo *TRI) const {
   2114   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
   2115   // Handle only loads/stores with base register followed by immediate offset.
   2116   if (LdSt.getNumExplicitOperands() == 3) {
   2117     // Non-paired instruction (e.g., ldr x1, [x0, #8]).
   2118     if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
   2119       return false;
   2120   } else if (LdSt.getNumExplicitOperands() == 4) {
   2121     // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
   2122     if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
   2123         !LdSt.getOperand(3).isImm())
   2124       return false;
   2125   } else
   2126     return false;
   2127 
   2128   // Get the scaling factor for the instruction and set the width for the
   2129   // instruction.
   2130   unsigned Scale = 0;
   2131   int64_t Dummy1, Dummy2;
   2132 
   2133   // If this returns false, then it's an instruction we don't want to handle.
   2134   if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
   2135     return false;
   2136 
   2137   // Compute the offset. Offset is calculated as the immediate operand
   2138   // multiplied by the scaling factor. Unscaled instructions have scaling factor
   2139   // set to 1.
   2140   if (LdSt.getNumExplicitOperands() == 3) {
   2141     BaseReg = LdSt.getOperand(1).getReg();
   2142     Offset = LdSt.getOperand(2).getImm() * Scale;
   2143   } else {
   2144     assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
   2145     BaseReg = LdSt.getOperand(2).getReg();
   2146     Offset = LdSt.getOperand(3).getImm() * Scale;
   2147   }
   2148   return true;
   2149 }
   2150 
   2151 MachineOperand &
   2152 AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
   2153   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
   2154   MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
   2155   assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
   2156   return OfsOp;
   2157 }
   2158 
   2159 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
   2160                                     unsigned &Width, int64_t &MinOffset,
   2161                                     int64_t &MaxOffset) const {
   2162   switch (Opcode) {
   2163   // Not a memory operation or something we want to handle.
   2164   default:
   2165     Scale = Width = 0;
   2166     MinOffset = MaxOffset = 0;
   2167     return false;
   2168   case AArch64::STRWpost:
   2169   case AArch64::LDRWpost:
   2170     Width = 32;
   2171     Scale = 4;
   2172     MinOffset = -256;
   2173     MaxOffset = 255;
   2174     break;
   2175   case AArch64::LDURQi:
   2176   case AArch64::STURQi:
   2177     Width = 16;
   2178     Scale = 1;
   2179     MinOffset = -256;
   2180     MaxOffset = 255;
   2181     break;
   2182   case AArch64::LDURXi:
   2183   case AArch64::LDURDi:
   2184   case AArch64::STURXi:
   2185   case AArch64::STURDi:
   2186     Width = 8;
   2187     Scale = 1;
   2188     MinOffset = -256;
   2189     MaxOffset = 255;
   2190     break;
   2191   case AArch64::LDURWi:
   2192   case AArch64::LDURSi:
   2193   case AArch64::LDURSWi:
   2194   case AArch64::STURWi:
   2195   case AArch64::STURSi:
   2196     Width = 4;
   2197     Scale = 1;
   2198     MinOffset = -256;
   2199     MaxOffset = 255;
   2200     break;
   2201   case AArch64::LDURHi:
   2202   case AArch64::LDURHHi:
   2203   case AArch64::LDURSHXi:
   2204   case AArch64::LDURSHWi:
   2205   case AArch64::STURHi:
   2206   case AArch64::STURHHi:
   2207     Width = 2;
   2208     Scale = 1;
   2209     MinOffset = -256;
   2210     MaxOffset = 255;
   2211     break;
   2212   case AArch64::LDURBi:
   2213   case AArch64::LDURBBi:
   2214   case AArch64::LDURSBXi:
   2215   case AArch64::LDURSBWi:
   2216   case AArch64::STURBi:
   2217   case AArch64::STURBBi:
   2218     Width = 1;
   2219     Scale = 1;
   2220     MinOffset = -256;
   2221     MaxOffset = 255;
   2222     break;
   2223   case AArch64::LDPQi:
   2224   case AArch64::LDNPQi:
   2225   case AArch64::STPQi:
   2226   case AArch64::STNPQi:
   2227     Scale = 16;
   2228     Width = 32;
   2229     MinOffset = -64;
   2230     MaxOffset = 63;
   2231     break;
   2232   case AArch64::LDRQui:
   2233   case AArch64::STRQui:
   2234     Scale = Width = 16;
   2235     MinOffset = 0;
   2236     MaxOffset = 4095;
   2237     break;
   2238   case AArch64::LDPXi:
   2239   case AArch64::LDPDi:
   2240   case AArch64::LDNPXi:
   2241   case AArch64::LDNPDi:
   2242   case AArch64::STPXi:
   2243   case AArch64::STPDi:
   2244   case AArch64::STNPXi:
   2245   case AArch64::STNPDi:
   2246     Scale = 8;
   2247     Width = 16;
   2248     MinOffset = -64;
   2249     MaxOffset = 63;
   2250     break;
   2251   case AArch64::LDRXui:
   2252   case AArch64::LDRDui:
   2253   case AArch64::STRXui:
   2254   case AArch64::STRDui:
   2255     Scale = Width = 8;
   2256     MinOffset = 0;
   2257     MaxOffset = 4095;
   2258     break;
   2259   case AArch64::LDPWi:
   2260   case AArch64::LDPSi:
   2261   case AArch64::LDNPWi:
   2262   case AArch64::LDNPSi:
   2263   case AArch64::STPWi:
   2264   case AArch64::STPSi:
   2265   case AArch64::STNPWi:
   2266   case AArch64::STNPSi:
   2267     Scale = 4;
   2268     Width = 8;
   2269     MinOffset = -64;
   2270     MaxOffset = 63;
   2271     break;
   2272   case AArch64::LDRWui:
   2273   case AArch64::LDRSui:
   2274   case AArch64::LDRSWui:
   2275   case AArch64::STRWui:
   2276   case AArch64::STRSui:
   2277     Scale = Width = 4;
   2278     MinOffset = 0;
   2279     MaxOffset = 4095;
   2280     break;
   2281   case AArch64::LDRHui:
   2282   case AArch64::LDRHHui:
   2283   case AArch64::STRHui:
   2284   case AArch64::STRHHui:
   2285     Scale = Width = 2;
   2286     MinOffset = 0;
   2287     MaxOffset = 4095;
   2288     break;
   2289   case AArch64::LDRBui:
   2290   case AArch64::LDRBBui:
   2291   case AArch64::STRBui:
   2292   case AArch64::STRBBui:
   2293     Scale = Width = 1;
   2294     MinOffset = 0;
   2295     MaxOffset = 4095;
   2296     break;
   2297   }
   2298 
   2299   return true;
   2300 }
   2301 
   2302 // Scale the unscaled offsets.  Returns false if the unscaled offset can't be
   2303 // scaled.
   2304 static bool scaleOffset(unsigned Opc, int64_t &Offset) {
   2305   unsigned OffsetStride = 1;
   2306   switch (Opc) {
   2307   default:
   2308     return false;
   2309   case AArch64::LDURQi:
   2310   case AArch64::STURQi:
   2311     OffsetStride = 16;
   2312     break;
   2313   case AArch64::LDURXi:
   2314   case AArch64::LDURDi:
   2315   case AArch64::STURXi:
   2316   case AArch64::STURDi:
   2317     OffsetStride = 8;
   2318     break;
   2319   case AArch64::LDURWi:
   2320   case AArch64::LDURSi:
   2321   case AArch64::LDURSWi:
   2322   case AArch64::STURWi:
   2323   case AArch64::STURSi:
   2324     OffsetStride = 4;
   2325     break;
   2326   }
   2327   // If the byte-offset isn't a multiple of the stride, we can't scale this
   2328   // offset.
   2329   if (Offset % OffsetStride != 0)
   2330     return false;
   2331 
   2332   // Convert the byte-offset used by unscaled into an "element" offset used
   2333   // by the scaled pair load/store instructions.
   2334   Offset /= OffsetStride;
   2335   return true;
   2336 }
   2337 
   2338 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
   2339   if (FirstOpc == SecondOpc)
   2340     return true;
   2341   // We can also pair sign-ext and zero-ext instructions.
   2342   switch (FirstOpc) {
   2343   default:
   2344     return false;
   2345   case AArch64::LDRWui:
   2346   case AArch64::LDURWi:
   2347     return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
   2348   case AArch64::LDRSWui:
   2349   case AArch64::LDURSWi:
   2350     return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
   2351   }
   2352   // These instructions can't be paired based on their opcodes.
   2353   return false;
   2354 }
   2355 
   2356 /// Detect opportunities for ldp/stp formation.
   2357 ///
   2358 /// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
   2359 bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
   2360                                            unsigned BaseReg1,
   2361                                            MachineInstr &SecondLdSt,
   2362                                            unsigned BaseReg2,
   2363                                            unsigned NumLoads) const {
   2364   if (BaseReg1 != BaseReg2)
   2365     return false;
   2366 
   2367   // Only cluster up to a single pair.
   2368   if (NumLoads > 1)
   2369     return false;
   2370 
   2371   if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
   2372     return false;
   2373 
   2374   // Can we pair these instructions based on their opcodes?
   2375   unsigned FirstOpc = FirstLdSt.getOpcode();
   2376   unsigned SecondOpc = SecondLdSt.getOpcode();
   2377   if (!canPairLdStOpc(FirstOpc, SecondOpc))
   2378     return false;
   2379 
   2380   // Can't merge volatiles or load/stores that have a hint to avoid pair
   2381   // formation, for example.
   2382   if (!isCandidateToMergeOrPair(FirstLdSt) ||
   2383       !isCandidateToMergeOrPair(SecondLdSt))
   2384     return false;
   2385 
   2386   // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
   2387   int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
   2388   if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
   2389     return false;
   2390 
   2391   int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
   2392   if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
   2393     return false;
   2394 
   2395   // Pairwise instructions have a 7-bit signed offset field.
   2396   if (Offset1 > 63 || Offset1 < -64)
   2397     return false;
   2398 
   2399   // The caller should already have ordered First/SecondLdSt by offset.
   2400   assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
   2401   return Offset1 + 1 == Offset2;
   2402 }
   2403 
   2404 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
   2405                                             unsigned Reg, unsigned SubIdx,
   2406                                             unsigned State,
   2407                                             const TargetRegisterInfo *TRI) {
   2408   if (!SubIdx)
   2409     return MIB.addReg(Reg, State);
   2410 
   2411   if (TargetRegisterInfo::isPhysicalRegister(Reg))
   2412     return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
   2413   return MIB.addReg(Reg, State, SubIdx);
   2414 }
   2415 
   2416 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
   2417                                         unsigned NumRegs) {
   2418   // We really want the positive remainder mod 32 here, that happens to be
   2419   // easily obtainable with a mask.
   2420   return ((DestReg - SrcReg) & 0x1f) < NumRegs;
   2421 }
   2422 
   2423 void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
   2424                                         MachineBasicBlock::iterator I,
   2425                                         const DebugLoc &DL, unsigned DestReg,
   2426                                         unsigned SrcReg, bool KillSrc,
   2427                                         unsigned Opcode,
   2428                                         ArrayRef<unsigned> Indices) const {
   2429   assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
   2430   const TargetRegisterInfo *TRI = &getRegisterInfo();
   2431   uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
   2432   uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
   2433   unsigned NumRegs = Indices.size();
   2434 
   2435   int SubReg = 0, End = NumRegs, Incr = 1;
   2436   if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
   2437     SubReg = NumRegs - 1;
   2438     End = -1;
   2439     Incr = -1;
   2440   }
   2441 
   2442   for (; SubReg != End; SubReg += Incr) {
   2443     const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
   2444     AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
   2445     AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
   2446     AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
   2447   }
   2448 }
   2449 
   2450 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   2451                                    MachineBasicBlock::iterator I,
   2452                                    const DebugLoc &DL, unsigned DestReg,
   2453                                    unsigned SrcReg, bool KillSrc) const {
   2454   if (AArch64::GPR32spRegClass.contains(DestReg) &&
   2455       (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
   2456     const TargetRegisterInfo *TRI = &getRegisterInfo();
   2457 
   2458     if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
   2459       // If either operand is WSP, expand to ADD #0.
   2460       if (Subtarget.hasZeroCycleRegMove()) {
   2461         // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
   2462         unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
   2463                                                      &AArch64::GPR64spRegClass);
   2464         unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
   2465                                                     &AArch64::GPR64spRegClass);
   2466         // This instruction is reading and writing X registers.  This may upset
   2467         // the register scavenger and machine verifier, so we need to indicate
   2468         // that we are reading an undefined value from SrcRegX, but a proper
   2469         // value from SrcReg.
   2470         BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
   2471             .addReg(SrcRegX, RegState::Undef)
   2472             .addImm(0)
   2473             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
   2474             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
   2475       } else {
   2476         BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
   2477             .addReg(SrcReg, getKillRegState(KillSrc))
   2478             .addImm(0)
   2479             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
   2480       }
   2481     } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
   2482       BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
   2483           .addImm(0)
   2484           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
   2485     } else {
   2486       if (Subtarget.hasZeroCycleRegMove()) {
   2487         // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
   2488         unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
   2489                                                      &AArch64::GPR64spRegClass);
   2490         unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
   2491                                                     &AArch64::GPR64spRegClass);
   2492         // This instruction is reading and writing X registers.  This may upset
   2493         // the register scavenger and machine verifier, so we need to indicate
   2494         // that we are reading an undefined value from SrcRegX, but a proper
   2495         // value from SrcReg.
   2496         BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
   2497             .addReg(AArch64::XZR)
   2498             .addReg(SrcRegX, RegState::Undef)
   2499             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
   2500       } else {
   2501         // Otherwise, expand to ORR WZR.
   2502         BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
   2503             .addReg(AArch64::WZR)
   2504             .addReg(SrcReg, getKillRegState(KillSrc));
   2505       }
   2506     }
   2507     return;
   2508   }
   2509 
   2510   if (AArch64::GPR64spRegClass.contains(DestReg) &&
   2511       (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
   2512     if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
   2513       // If either operand is SP, expand to ADD #0.
   2514       BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
   2515           .addReg(SrcReg, getKillRegState(KillSrc))
   2516           .addImm(0)
   2517           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
   2518     } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
   2519       BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
   2520           .addImm(0)
   2521           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
   2522     } else {
   2523       // Otherwise, expand to ORR XZR.
   2524       BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
   2525           .addReg(AArch64::XZR)
   2526           .addReg(SrcReg, getKillRegState(KillSrc));
   2527     }
   2528     return;
   2529   }
   2530 
   2531   // Copy a DDDD register quad by copying the individual sub-registers.
   2532   if (AArch64::DDDDRegClass.contains(DestReg) &&
   2533       AArch64::DDDDRegClass.contains(SrcReg)) {
   2534     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
   2535                                        AArch64::dsub2, AArch64::dsub3};
   2536     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
   2537                      Indices);
   2538     return;
   2539   }
   2540 
   2541   // Copy a DDD register triple by copying the individual sub-registers.
   2542   if (AArch64::DDDRegClass.contains(DestReg) &&
   2543       AArch64::DDDRegClass.contains(SrcReg)) {
   2544     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
   2545                                        AArch64::dsub2};
   2546     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
   2547                      Indices);
   2548     return;
   2549   }
   2550 
   2551   // Copy a DD register pair by copying the individual sub-registers.
   2552   if (AArch64::DDRegClass.contains(DestReg) &&
   2553       AArch64::DDRegClass.contains(SrcReg)) {
   2554     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
   2555     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
   2556                      Indices);
   2557     return;
   2558   }
   2559 
   2560   // Copy a QQQQ register quad by copying the individual sub-registers.
   2561   if (AArch64::QQQQRegClass.contains(DestReg) &&
   2562       AArch64::QQQQRegClass.contains(SrcReg)) {
   2563     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
   2564                                        AArch64::qsub2, AArch64::qsub3};
   2565     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
   2566                      Indices);
   2567     return;
   2568   }
   2569 
   2570   // Copy a QQQ register triple by copying the individual sub-registers.
   2571   if (AArch64::QQQRegClass.contains(DestReg) &&
   2572       AArch64::QQQRegClass.contains(SrcReg)) {
   2573     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
   2574                                        AArch64::qsub2};
   2575     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
   2576                      Indices);
   2577     return;
   2578   }
   2579 
   2580   // Copy a QQ register pair by copying the individual sub-registers.
   2581   if (AArch64::QQRegClass.contains(DestReg) &&
   2582       AArch64::QQRegClass.contains(SrcReg)) {
   2583     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
   2584     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
   2585                      Indices);
   2586     return;
   2587   }
   2588 
   2589   if (AArch64::FPR128RegClass.contains(DestReg) &&
   2590       AArch64::FPR128RegClass.contains(SrcReg)) {
   2591     if (Subtarget.hasNEON()) {
   2592       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
   2593           .addReg(SrcReg)
   2594           .addReg(SrcReg, getKillRegState(KillSrc));
   2595     } else {
   2596       BuildMI(MBB, I, DL, get(AArch64::STRQpre))
   2597           .addReg(AArch64::SP, RegState::Define)
   2598           .addReg(SrcReg, getKillRegState(KillSrc))
   2599           .addReg(AArch64::SP)
   2600           .addImm(-16);
   2601       BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
   2602           .addReg(AArch64::SP, RegState::Define)
   2603           .addReg(DestReg, RegState::Define)
   2604           .addReg(AArch64::SP)
   2605           .addImm(16);
   2606     }
   2607     return;
   2608   }
   2609 
   2610   if (AArch64::FPR64RegClass.contains(DestReg) &&
   2611       AArch64::FPR64RegClass.contains(SrcReg)) {
   2612     if (Subtarget.hasNEON()) {
   2613       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
   2614                                        &AArch64::FPR128RegClass);
   2615       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
   2616                                       &AArch64::FPR128RegClass);
   2617       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
   2618           .addReg(SrcReg)
   2619           .addReg(SrcReg, getKillRegState(KillSrc));
   2620     } else {
   2621       BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
   2622           .addReg(SrcReg, getKillRegState(KillSrc));
   2623     }
   2624     return;
   2625   }
   2626 
   2627   if (AArch64::FPR32RegClass.contains(DestReg) &&
   2628       AArch64::FPR32RegClass.contains(SrcReg)) {
   2629     if (Subtarget.hasNEON()) {
   2630       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
   2631                                        &AArch64::FPR128RegClass);
   2632       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
   2633                                       &AArch64::FPR128RegClass);
   2634       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
   2635           .addReg(SrcReg)
   2636           .addReg(SrcReg, getKillRegState(KillSrc));
   2637     } else {
   2638       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
   2639           .addReg(SrcReg, getKillRegState(KillSrc));
   2640     }
   2641     return;
   2642   }
   2643 
   2644   if (AArch64::FPR16RegClass.contains(DestReg) &&
   2645       AArch64::FPR16RegClass.contains(SrcReg)) {
   2646     if (Subtarget.hasNEON()) {
   2647       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
   2648                                        &AArch64::FPR128RegClass);
   2649       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
   2650                                       &AArch64::FPR128RegClass);
   2651       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
   2652           .addReg(SrcReg)
   2653           .addReg(SrcReg, getKillRegState(KillSrc));
   2654     } else {
   2655       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
   2656                                        &AArch64::FPR32RegClass);
   2657       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
   2658                                       &AArch64::FPR32RegClass);
   2659       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
   2660           .addReg(SrcReg, getKillRegState(KillSrc));
   2661     }
   2662     return;
   2663   }
   2664 
   2665   if (AArch64::FPR8RegClass.contains(DestReg) &&
   2666       AArch64::FPR8RegClass.contains(SrcReg)) {
   2667     if (Subtarget.hasNEON()) {
   2668       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
   2669                                        &AArch64::FPR128RegClass);
   2670       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
   2671                                       &AArch64::FPR128RegClass);
   2672       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
   2673           .addReg(SrcReg)
   2674           .addReg(SrcReg, getKillRegState(KillSrc));
   2675     } else {
   2676       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
   2677                                        &AArch64::FPR32RegClass);
   2678       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
   2679                                       &AArch64::FPR32RegClass);
   2680       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
   2681           .addReg(SrcReg, getKillRegState(KillSrc));
   2682     }
   2683     return;
   2684   }
   2685 
   2686   // Copies between GPR64 and FPR64.
   2687   if (AArch64::FPR64RegClass.contains(DestReg) &&
   2688       AArch64::GPR64RegClass.contains(SrcReg)) {
   2689     BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
   2690         .addReg(SrcReg, getKillRegState(KillSrc));
   2691     return;
   2692   }
   2693   if (AArch64::GPR64RegClass.contains(DestReg) &&
   2694       AArch64::FPR64RegClass.contains(SrcReg)) {
   2695     BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
   2696         .addReg(SrcReg, getKillRegState(KillSrc));
   2697     return;
   2698   }
   2699   // Copies between GPR32 and FPR32.
   2700   if (AArch64::FPR32RegClass.contains(DestReg) &&
   2701       AArch64::GPR32RegClass.contains(SrcReg)) {
   2702     BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
   2703         .addReg(SrcReg, getKillRegState(KillSrc));
   2704     return;
   2705   }
   2706   if (AArch64::GPR32RegClass.contains(DestReg) &&
   2707       AArch64::FPR32RegClass.contains(SrcReg)) {
   2708     BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
   2709         .addReg(SrcReg, getKillRegState(KillSrc));
   2710     return;
   2711   }
   2712 
   2713   if (DestReg == AArch64::NZCV) {
   2714     assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
   2715     BuildMI(MBB, I, DL, get(AArch64::MSR))
   2716         .addImm(AArch64SysReg::NZCV)
   2717         .addReg(SrcReg, getKillRegState(KillSrc))
   2718         .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
   2719     return;
   2720   }
   2721 
   2722   if (SrcReg == AArch64::NZCV) {
   2723     assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
   2724     BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
   2725         .addImm(AArch64SysReg::NZCV)
   2726         .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
   2727     return;
   2728   }
   2729 
   2730   llvm_unreachable("unimplemented reg-to-reg copy");
   2731 }
   2732 
   2733 void AArch64InstrInfo::storeRegToStackSlot(
   2734     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
   2735     bool isKill, int FI, const TargetRegisterClass *RC,
   2736     const TargetRegisterInfo *TRI) const {
   2737   DebugLoc DL;
   2738   if (MBBI != MBB.end())
   2739     DL = MBBI->getDebugLoc();
   2740   MachineFunction &MF = *MBB.getParent();
   2741   MachineFrameInfo &MFI = MF.getFrameInfo();
   2742   unsigned Align = MFI.getObjectAlignment(FI);
   2743 
   2744   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
   2745   MachineMemOperand *MMO = MF.getMachineMemOperand(
   2746       PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
   2747   unsigned Opc = 0;
   2748   bool Offset = true;
   2749   switch (TRI->getSpillSize(*RC)) {
   2750   case 1:
   2751     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
   2752       Opc = AArch64::STRBui;
   2753     break;
   2754   case 2:
   2755     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
   2756       Opc = AArch64::STRHui;
   2757     break;
   2758   case 4:
   2759     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
   2760       Opc = AArch64::STRWui;
   2761       if (TargetRegisterInfo::isVirtualRegister(SrcReg))
   2762         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
   2763       else
   2764         assert(SrcReg != AArch64::WSP);
   2765     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
   2766       Opc = AArch64::STRSui;
   2767     break;
   2768   case 8:
   2769     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
   2770       Opc = AArch64::STRXui;
   2771       if (TargetRegisterInfo::isVirtualRegister(SrcReg))
   2772         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
   2773       else
   2774         assert(SrcReg != AArch64::SP);
   2775     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
   2776       Opc = AArch64::STRDui;
   2777     break;
   2778   case 16:
   2779     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
   2780       Opc = AArch64::STRQui;
   2781     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
   2782       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
   2783       Opc = AArch64::ST1Twov1d;
   2784       Offset = false;
   2785     } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
   2786       BuildMI(MBB, MBBI, DL, get(AArch64::STPXi))
   2787           .addReg(TRI->getSubReg(SrcReg, AArch64::sube64),
   2788                   getKillRegState(isKill))
   2789           .addReg(TRI->getSubReg(SrcReg, AArch64::subo64),
   2790                   getKillRegState(isKill))
   2791           .addFrameIndex(FI)
   2792           .addImm(0)
   2793           .addMemOperand(MMO);
   2794       return;
   2795     }
   2796     break;
   2797   case 24:
   2798     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
   2799       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
   2800       Opc = AArch64::ST1Threev1d;
   2801       Offset = false;
   2802     }
   2803     break;
   2804   case 32:
   2805     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
   2806       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
   2807       Opc = AArch64::ST1Fourv1d;
   2808       Offset = false;
   2809     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
   2810       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
   2811       Opc = AArch64::ST1Twov2d;
   2812       Offset = false;
   2813     }
   2814     break;
   2815   case 48:
   2816     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
   2817       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
   2818       Opc = AArch64::ST1Threev2d;
   2819       Offset = false;
   2820     }
   2821     break;
   2822   case 64:
   2823     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
   2824       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
   2825       Opc = AArch64::ST1Fourv2d;
   2826       Offset = false;
   2827     }
   2828     break;
   2829   }
   2830   assert(Opc && "Unknown register class");
   2831 
   2832   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
   2833                                      .addReg(SrcReg, getKillRegState(isKill))
   2834                                      .addFrameIndex(FI);
   2835 
   2836   if (Offset)
   2837     MI.addImm(0);
   2838   MI.addMemOperand(MMO);
   2839 }
   2840 
   2841 void AArch64InstrInfo::loadRegFromStackSlot(
   2842     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
   2843     int FI, const TargetRegisterClass *RC,
   2844     const TargetRegisterInfo *TRI) const {
   2845   DebugLoc DL;
   2846   if (MBBI != MBB.end())
   2847     DL = MBBI->getDebugLoc();
   2848   MachineFunction &MF = *MBB.getParent();
   2849   MachineFrameInfo &MFI = MF.getFrameInfo();
   2850   unsigned Align = MFI.getObjectAlignment(FI);
   2851   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
   2852   MachineMemOperand *MMO = MF.getMachineMemOperand(
   2853       PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
   2854 
   2855   unsigned Opc = 0;
   2856   bool Offset = true;
   2857   switch (TRI->getSpillSize(*RC)) {
   2858   case 1:
   2859     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
   2860       Opc = AArch64::LDRBui;
   2861     break;
   2862   case 2:
   2863     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
   2864       Opc = AArch64::LDRHui;
   2865     break;
   2866   case 4:
   2867     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
   2868       Opc = AArch64::LDRWui;
   2869       if (TargetRegisterInfo::isVirtualRegister(DestReg))
   2870         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
   2871       else
   2872         assert(DestReg != AArch64::WSP);
   2873     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
   2874       Opc = AArch64::LDRSui;
   2875     break;
   2876   case 8:
   2877     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
   2878       Opc = AArch64::LDRXui;
   2879       if (TargetRegisterInfo::isVirtualRegister(DestReg))
   2880         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
   2881       else
   2882         assert(DestReg != AArch64::SP);
   2883     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
   2884       Opc = AArch64::LDRDui;
   2885     break;
   2886   case 16:
   2887     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
   2888       Opc = AArch64::LDRQui;
   2889     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
   2890       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
   2891       Opc = AArch64::LD1Twov1d;
   2892       Offset = false;
   2893     } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
   2894       BuildMI(MBB, MBBI, DL, get(AArch64::LDPXi))
   2895           .addReg(TRI->getSubReg(DestReg, AArch64::sube64),
   2896                   getDefRegState(true))
   2897           .addReg(TRI->getSubReg(DestReg, AArch64::subo64),
   2898                   getDefRegState(true))
   2899           .addFrameIndex(FI)
   2900           .addImm(0)
   2901           .addMemOperand(MMO);
   2902       return;
   2903     }
   2904     break;
   2905   case 24:
   2906     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
   2907       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
   2908       Opc = AArch64::LD1Threev1d;
   2909       Offset = false;
   2910     }
   2911     break;
   2912   case 32:
   2913     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
   2914       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
   2915       Opc = AArch64::LD1Fourv1d;
   2916       Offset = false;
   2917     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
   2918       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
   2919       Opc = AArch64::LD1Twov2d;
   2920       Offset = false;
   2921     }
   2922     break;
   2923   case 48:
   2924     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
   2925       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
   2926       Opc = AArch64::LD1Threev2d;
   2927       Offset = false;
   2928     }
   2929     break;
   2930   case 64:
   2931     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
   2932       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
   2933       Opc = AArch64::LD1Fourv2d;
   2934       Offset = false;
   2935     }
   2936     break;
   2937   }
   2938   assert(Opc && "Unknown register class");
   2939 
   2940   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
   2941                                      .addReg(DestReg, getDefRegState(true))
   2942                                      .addFrameIndex(FI);
   2943   if (Offset)
   2944     MI.addImm(0);
   2945   MI.addMemOperand(MMO);
   2946 }
   2947 
   2948 void llvm::emitFrameOffset(MachineBasicBlock &MBB,
   2949                            MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
   2950                            unsigned DestReg, unsigned SrcReg, int Offset,
   2951                            const TargetInstrInfo *TII,
   2952                            MachineInstr::MIFlag Flag, bool SetNZCV) {
   2953   if (DestReg == SrcReg && Offset == 0)
   2954     return;
   2955 
   2956   assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
   2957          "SP increment/decrement not 16-byte aligned");
   2958 
   2959   bool isSub = Offset < 0;
   2960   if (isSub)
   2961     Offset = -Offset;
   2962 
   2963   // FIXME: If the offset won't fit in 24-bits, compute the offset into a
   2964   // scratch register.  If DestReg is a virtual register, use it as the
   2965   // scratch register; otherwise, create a new virtual register (to be
   2966   // replaced by the scavenger at the end of PEI).  That case can be optimized
   2967   // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
   2968   // register can be loaded with offset%8 and the add/sub can use an extending
   2969   // instruction with LSL#3.
   2970   // Currently the function handles any offsets but generates a poor sequence
   2971   // of code.
   2972   //  assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
   2973 
   2974   unsigned Opc;
   2975   if (SetNZCV)
   2976     Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
   2977   else
   2978     Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
   2979   const unsigned MaxEncoding = 0xfff;
   2980   const unsigned ShiftSize = 12;
   2981   const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
   2982   while (((unsigned)Offset) >= (1 << ShiftSize)) {
   2983     unsigned ThisVal;
   2984     if (((unsigned)Offset) > MaxEncodableValue) {
   2985       ThisVal = MaxEncodableValue;
   2986     } else {
   2987       ThisVal = Offset & MaxEncodableValue;
   2988     }
   2989     assert((ThisVal >> ShiftSize) <= MaxEncoding &&
   2990            "Encoding cannot handle value that big");
   2991     BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
   2992         .addReg(SrcReg)
   2993         .addImm(ThisVal >> ShiftSize)
   2994         .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
   2995         .setMIFlag(Flag);
   2996 
   2997     SrcReg = DestReg;
   2998     Offset -= ThisVal;
   2999     if (Offset == 0)
   3000       return;
   3001   }
   3002   BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
   3003       .addReg(SrcReg)
   3004       .addImm(Offset)
   3005       .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
   3006       .setMIFlag(Flag);
   3007 }
   3008 
   3009 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
   3010     MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
   3011     MachineBasicBlock::iterator InsertPt, int FrameIndex,
   3012     LiveIntervals *LIS) const {
   3013   // This is a bit of a hack. Consider this instruction:
   3014   //
   3015   //   %0 = COPY %sp; GPR64all:%0
   3016   //
   3017   // We explicitly chose GPR64all for the virtual register so such a copy might
   3018   // be eliminated by RegisterCoalescer. However, that may not be possible, and
   3019   // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
   3020   // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
   3021   //
   3022   // To prevent that, we are going to constrain the %0 register class here.
   3023   //
   3024   // <rdar://problem/11522048>
   3025   //
   3026   if (MI.isFullCopy()) {
   3027     unsigned DstReg = MI.getOperand(0).getReg();
   3028     unsigned SrcReg = MI.getOperand(1).getReg();
   3029     if (SrcReg == AArch64::SP &&
   3030         TargetRegisterInfo::isVirtualRegister(DstReg)) {
   3031       MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
   3032       return nullptr;
   3033     }
   3034     if (DstReg == AArch64::SP &&
   3035         TargetRegisterInfo::isVirtualRegister(SrcReg)) {
   3036       MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
   3037       return nullptr;
   3038     }
   3039   }
   3040 
   3041   // Handle the case where a copy is being spilled or filled but the source
   3042   // and destination register class don't match.  For example:
   3043   //
   3044   //   %0 = COPY %xzr; GPR64common:%0
   3045   //
   3046   // In this case we can still safely fold away the COPY and generate the
   3047   // following spill code:
   3048   //
   3049   //   STRXui %xzr, %stack.0
   3050   //
   3051   // This also eliminates spilled cross register class COPYs (e.g. between x and
   3052   // d regs) of the same size.  For example:
   3053   //
   3054   //   %0 = COPY %1; GPR64:%0, FPR64:%1
   3055   //
   3056   // will be filled as
   3057   //
   3058   //   LDRDui %0, fi<#0>
   3059   //
   3060   // instead of
   3061   //
   3062   //   LDRXui %Temp, fi<#0>
   3063   //   %0 = FMOV %Temp
   3064   //
   3065   if (MI.isCopy() && Ops.size() == 1 &&
   3066       // Make sure we're only folding the explicit COPY defs/uses.
   3067       (Ops[0] == 0 || Ops[0] == 1)) {
   3068     bool IsSpill = Ops[0] == 0;
   3069     bool IsFill = !IsSpill;
   3070     const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
   3071     const MachineRegisterInfo &MRI = MF.getRegInfo();
   3072     MachineBasicBlock &MBB = *MI.getParent();
   3073     const MachineOperand &DstMO = MI.getOperand(0);
   3074     const MachineOperand &SrcMO = MI.getOperand(1);
   3075     unsigned DstReg = DstMO.getReg();
   3076     unsigned SrcReg = SrcMO.getReg();
   3077     // This is slightly expensive to compute for physical regs since
   3078     // getMinimalPhysRegClass is slow.
   3079     auto getRegClass = [&](unsigned Reg) {
   3080       return TargetRegisterInfo::isVirtualRegister(Reg)
   3081                  ? MRI.getRegClass(Reg)
   3082                  : TRI.getMinimalPhysRegClass(Reg);
   3083     };
   3084 
   3085     if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
   3086       assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
   3087                  TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
   3088              "Mismatched register size in non subreg COPY");
   3089       if (IsSpill)
   3090         storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
   3091                             getRegClass(SrcReg), &TRI);
   3092       else
   3093         loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
   3094                              getRegClass(DstReg), &TRI);
   3095       return &*--InsertPt;
   3096     }
   3097 
   3098     // Handle cases like spilling def of:
   3099     //
   3100     //   %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
   3101     //
   3102     // where the physical register source can be widened and stored to the full
   3103     // virtual reg destination stack slot, in this case producing:
   3104     //
   3105     //   STRXui %xzr, %stack.0
   3106     //
   3107     if (IsSpill && DstMO.isUndef() &&
   3108         TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
   3109       assert(SrcMO.getSubReg() == 0 &&
   3110              "Unexpected subreg on physical register");
   3111       const TargetRegisterClass *SpillRC;
   3112       unsigned SpillSubreg;
   3113       switch (DstMO.getSubReg()) {
   3114       default:
   3115         SpillRC = nullptr;
   3116         break;
   3117       case AArch64::sub_32:
   3118       case AArch64::ssub:
   3119         if (AArch64::GPR32RegClass.contains(SrcReg)) {
   3120           SpillRC = &AArch64::GPR64RegClass;
   3121           SpillSubreg = AArch64::sub_32;
   3122         } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
   3123           SpillRC = &AArch64::FPR64RegClass;
   3124           SpillSubreg = AArch64::ssub;
   3125         } else
   3126           SpillRC = nullptr;
   3127         break;
   3128       case AArch64::dsub:
   3129         if (AArch64::FPR64RegClass.contains(SrcReg)) {
   3130           SpillRC = &AArch64::FPR128RegClass;
   3131           SpillSubreg = AArch64::dsub;
   3132         } else
   3133           SpillRC = nullptr;
   3134         break;
   3135       }
   3136 
   3137       if (SpillRC)
   3138         if (unsigned WidenedSrcReg =
   3139                 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
   3140           storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
   3141                               FrameIndex, SpillRC, &TRI);
   3142           return &*--InsertPt;
   3143         }
   3144     }
   3145 
   3146     // Handle cases like filling use of:
   3147     //
   3148     //   %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
   3149     //
   3150     // where we can load the full virtual reg source stack slot, into the subreg
   3151     // destination, in this case producing:
   3152     //
   3153     //   LDRWui %0:sub_32<def,read-undef>, %stack.0
   3154     //
   3155     if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
   3156       const TargetRegisterClass *FillRC;
   3157       switch (DstMO.getSubReg()) {
   3158       default:
   3159         FillRC = nullptr;
   3160         break;
   3161       case AArch64::sub_32:
   3162         FillRC = &AArch64::GPR32RegClass;
   3163         break;
   3164       case AArch64::ssub:
   3165         FillRC = &AArch64::FPR32RegClass;
   3166         break;
   3167       case AArch64::dsub:
   3168         FillRC = &AArch64::FPR64RegClass;
   3169         break;
   3170       }
   3171 
   3172       if (FillRC) {
   3173         assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
   3174                    TRI.getRegSizeInBits(*FillRC) &&
   3175                "Mismatched regclass size on folded subreg COPY");
   3176         loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
   3177         MachineInstr &LoadMI = *--InsertPt;
   3178         MachineOperand &LoadDst = LoadMI.getOperand(0);
   3179         assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
   3180         LoadDst.setSubReg(DstMO.getSubReg());
   3181         LoadDst.setIsUndef();
   3182         return &LoadMI;
   3183       }
   3184     }
   3185   }
   3186 
   3187   // Cannot fold.
   3188   return nullptr;
   3189 }
   3190 
   3191 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
   3192                                     bool *OutUseUnscaledOp,
   3193                                     unsigned *OutUnscaledOp,
   3194                                     int *EmittableOffset) {
   3195   int Scale = 1;
   3196   bool IsSigned = false;
   3197   // The ImmIdx should be changed case by case if it is not 2.
   3198   unsigned ImmIdx = 2;
   3199   unsigned UnscaledOp = 0;
   3200   // Set output values in case of early exit.
   3201   if (EmittableOffset)
   3202     *EmittableOffset = 0;
   3203   if (OutUseUnscaledOp)
   3204     *OutUseUnscaledOp = false;
   3205   if (OutUnscaledOp)
   3206     *OutUnscaledOp = 0;
   3207   switch (MI.getOpcode()) {
   3208   default:
   3209     llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
   3210   // Vector spills/fills can't take an immediate offset.
   3211   case AArch64::LD1Twov2d:
   3212   case AArch64::LD1Threev2d:
   3213   case AArch64::LD1Fourv2d:
   3214   case AArch64::LD1Twov1d:
   3215   case AArch64::LD1Threev1d:
   3216   case AArch64::LD1Fourv1d:
   3217   case AArch64::ST1Twov2d:
   3218   case AArch64::ST1Threev2d:
   3219   case AArch64::ST1Fourv2d:
   3220   case AArch64::ST1Twov1d:
   3221   case AArch64::ST1Threev1d:
   3222   case AArch64::ST1Fourv1d:
   3223     return AArch64FrameOffsetCannotUpdate;
   3224   case AArch64::PRFMui:
   3225     Scale = 8;
   3226     UnscaledOp = AArch64::PRFUMi;
   3227     break;
   3228   case AArch64::LDRXui:
   3229     Scale = 8;
   3230     UnscaledOp = AArch64::LDURXi;
   3231     break;
   3232   case AArch64::LDRWui:
   3233     Scale = 4;
   3234     UnscaledOp = AArch64::LDURWi;
   3235     break;
   3236   case AArch64::LDRBui:
   3237     Scale = 1;
   3238     UnscaledOp = AArch64::LDURBi;
   3239     break;
   3240   case AArch64::LDRHui:
   3241     Scale = 2;
   3242     UnscaledOp = AArch64::LDURHi;
   3243     break;
   3244   case AArch64::LDRSui:
   3245     Scale = 4;
   3246     UnscaledOp = AArch64::LDURSi;
   3247     break;
   3248   case AArch64::LDRDui:
   3249     Scale = 8;
   3250     UnscaledOp = AArch64::LDURDi;
   3251     break;
   3252   case AArch64::LDRQui:
   3253     Scale = 16;
   3254     UnscaledOp = AArch64::LDURQi;
   3255     break;
   3256   case AArch64::LDRBBui:
   3257     Scale = 1;
   3258     UnscaledOp = AArch64::LDURBBi;
   3259     break;
   3260   case AArch64::LDRHHui:
   3261     Scale = 2;
   3262     UnscaledOp = AArch64::LDURHHi;
   3263     break;
   3264   case AArch64::LDRSBXui:
   3265     Scale = 1;
   3266     UnscaledOp = AArch64::LDURSBXi;
   3267     break;
   3268   case AArch64::LDRSBWui:
   3269     Scale = 1;
   3270     UnscaledOp = AArch64::LDURSBWi;
   3271     break;
   3272   case AArch64::LDRSHXui:
   3273     Scale = 2;
   3274     UnscaledOp = AArch64::LDURSHXi;
   3275     break;
   3276   case AArch64::LDRSHWui:
   3277     Scale = 2;
   3278     UnscaledOp = AArch64::LDURSHWi;
   3279     break;
   3280   case AArch64::LDRSWui:
   3281     Scale = 4;
   3282     UnscaledOp = AArch64::LDURSWi;
   3283     break;
   3284 
   3285   case AArch64::STRXui:
   3286     Scale = 8;
   3287     UnscaledOp = AArch64::STURXi;
   3288     break;
   3289   case AArch64::STRWui:
   3290     Scale = 4;
   3291     UnscaledOp = AArch64::STURWi;
   3292     break;
   3293   case AArch64::STRBui:
   3294     Scale = 1;
   3295     UnscaledOp = AArch64::STURBi;
   3296     break;
   3297   case AArch64::STRHui:
   3298     Scale = 2;
   3299     UnscaledOp = AArch64::STURHi;
   3300     break;
   3301   case AArch64::STRSui:
   3302     Scale = 4;
   3303     UnscaledOp = AArch64::STURSi;
   3304     break;
   3305   case AArch64::STRDui:
   3306     Scale = 8;
   3307     UnscaledOp = AArch64::STURDi;
   3308     break;
   3309   case AArch64::STRQui:
   3310     Scale = 16;
   3311     UnscaledOp = AArch64::STURQi;
   3312     break;
   3313   case AArch64::STRBBui:
   3314     Scale = 1;
   3315     UnscaledOp = AArch64::STURBBi;
   3316     break;
   3317   case AArch64::STRHHui:
   3318     Scale = 2;
   3319     UnscaledOp = AArch64::STURHHi;
   3320     break;
   3321 
   3322   case AArch64::LDPXi:
   3323   case AArch64::LDPDi:
   3324   case AArch64::STPXi:
   3325   case AArch64::STPDi:
   3326   case AArch64::LDNPXi:
   3327   case AArch64::LDNPDi:
   3328   case AArch64::STNPXi:
   3329   case AArch64::STNPDi:
   3330     ImmIdx = 3;
   3331     IsSigned = true;
   3332     Scale = 8;
   3333     break;
   3334   case AArch64::LDPQi:
   3335   case AArch64::STPQi:
   3336   case AArch64::LDNPQi:
   3337   case AArch64::STNPQi:
   3338     ImmIdx = 3;
   3339     IsSigned = true;
   3340     Scale = 16;
   3341     break;
   3342   case AArch64::LDPWi:
   3343   case AArch64::LDPSi:
   3344   case AArch64::STPWi:
   3345   case AArch64::STPSi:
   3346   case AArch64::LDNPWi:
   3347   case AArch64::LDNPSi:
   3348   case AArch64::STNPWi:
   3349   case AArch64::STNPSi:
   3350     ImmIdx = 3;
   3351     IsSigned = true;
   3352     Scale = 4;
   3353     break;
   3354 
   3355   case AArch64::LDURXi:
   3356   case AArch64::LDURWi:
   3357   case AArch64::LDURBi:
   3358   case AArch64::LDURHi:
   3359   case AArch64::LDURSi:
   3360   case AArch64::LDURDi:
   3361   case AArch64::LDURQi:
   3362   case AArch64::LDURHHi:
   3363   case AArch64::LDURBBi:
   3364   case AArch64::LDURSBXi:
   3365   case AArch64::LDURSBWi:
   3366   case AArch64::LDURSHXi:
   3367   case AArch64::LDURSHWi:
   3368   case AArch64::LDURSWi:
   3369   case AArch64::STURXi:
   3370   case AArch64::STURWi:
   3371   case AArch64::STURBi:
   3372   case AArch64::STURHi:
   3373   case AArch64::STURSi:
   3374   case AArch64::STURDi:
   3375   case AArch64::STURQi:
   3376   case AArch64::STURBBi:
   3377   case AArch64::STURHHi:
   3378     Scale = 1;
   3379     break;
   3380   }
   3381 
   3382   Offset += MI.getOperand(ImmIdx).getImm() * Scale;
   3383 
   3384   bool useUnscaledOp = false;
   3385   // If the offset doesn't match the scale, we rewrite the instruction to
   3386   // use the unscaled instruction instead. Likewise, if we have a negative
   3387   // offset (and have an unscaled op to use).
   3388   if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
   3389     useUnscaledOp = true;
   3390 
   3391   // Use an unscaled addressing mode if the instruction has a negative offset
   3392   // (or if the instruction is already using an unscaled addressing mode).
   3393   unsigned MaskBits;
   3394   if (IsSigned) {
   3395     // ldp/stp instructions.
   3396     MaskBits = 7;
   3397     Offset /= Scale;
   3398   } else if (UnscaledOp == 0 || useUnscaledOp) {
   3399     MaskBits = 9;
   3400     IsSigned = true;
   3401     Scale = 1;
   3402   } else {
   3403     MaskBits = 12;
   3404     IsSigned = false;
   3405     Offset /= Scale;
   3406   }
   3407 
   3408   // Attempt to fold address computation.
   3409   int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
   3410   int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
   3411   if (Offset >= MinOff && Offset <= MaxOff) {
   3412     if (EmittableOffset)
   3413       *EmittableOffset = Offset;
   3414     Offset = 0;
   3415   } else {
   3416     int NewOff = Offset < 0 ? MinOff : MaxOff;
   3417     if (EmittableOffset)
   3418       *EmittableOffset = NewOff;
   3419     Offset = (Offset - NewOff) * Scale;
   3420   }
   3421   if (OutUseUnscaledOp)
   3422     *OutUseUnscaledOp = useUnscaledOp;
   3423   if (OutUnscaledOp)
   3424     *OutUnscaledOp = UnscaledOp;
   3425   return AArch64FrameOffsetCanUpdate |
   3426          (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
   3427 }
   3428 
   3429 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
   3430                                     unsigned FrameReg, int &Offset,
   3431                                     const AArch64InstrInfo *TII) {
   3432   unsigned Opcode = MI.getOpcode();
   3433   unsigned ImmIdx = FrameRegIdx + 1;
   3434 
   3435   if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
   3436     Offset += MI.getOperand(ImmIdx).getImm();
   3437     emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
   3438                     MI.getOperand(0).getReg(), FrameReg, Offset, TII,
   3439                     MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
   3440     MI.eraseFromParent();
   3441     Offset = 0;
   3442     return true;
   3443   }
   3444 
   3445   int NewOffset;
   3446   unsigned UnscaledOp;
   3447   bool UseUnscaledOp;
   3448   int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
   3449                                          &UnscaledOp, &NewOffset);
   3450   if (Status & AArch64FrameOffsetCanUpdate) {
   3451     if (Status & AArch64FrameOffsetIsLegal)
   3452       // Replace the FrameIndex with FrameReg.
   3453       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
   3454     if (UseUnscaledOp)
   3455       MI.setDesc(TII->get(UnscaledOp));
   3456 
   3457     MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
   3458     return Offset == 0;
   3459   }
   3460 
   3461   return false;
   3462 }
   3463 
   3464 void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
   3465   NopInst.setOpcode(AArch64::HINT);
   3466   NopInst.addOperand(MCOperand::createImm(0));
   3467 }
   3468 
   3469 // AArch64 supports MachineCombiner.
   3470 bool AArch64InstrInfo::useMachineCombiner() const { return true; }
   3471 
   3472 // True when Opc sets flag
   3473 static bool isCombineInstrSettingFlag(unsigned Opc) {
   3474   switch (Opc) {
   3475   case AArch64::ADDSWrr:
   3476   case AArch64::ADDSWri:
   3477   case AArch64::ADDSXrr:
   3478   case AArch64::ADDSXri:
   3479   case AArch64::SUBSWrr:
   3480   case AArch64::SUBSXrr:
   3481   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
   3482   case AArch64::SUBSWri:
   3483   case AArch64::SUBSXri:
   3484     return true;
   3485   default:
   3486     break;
   3487   }
   3488   return false;
   3489 }
   3490 
   3491 // 32b Opcodes that can be combined with a MUL
   3492 static bool isCombineInstrCandidate32(unsigned Opc) {
   3493   switch (Opc) {
   3494   case AArch64::ADDWrr:
   3495   case AArch64::ADDWri:
   3496   case AArch64::SUBWrr:
   3497   case AArch64::ADDSWrr:
   3498   case AArch64::ADDSWri:
   3499   case AArch64::SUBSWrr:
   3500   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
   3501   case AArch64::SUBWri:
   3502   case AArch64::SUBSWri:
   3503     return true;
   3504   default:
   3505     break;
   3506   }
   3507   return false;
   3508 }
   3509 
   3510 // 64b Opcodes that can be combined with a MUL
   3511 static bool isCombineInstrCandidate64(unsigned Opc) {
   3512   switch (Opc) {
   3513   case AArch64::ADDXrr:
   3514   case AArch64::ADDXri:
   3515   case AArch64::SUBXrr:
   3516   case AArch64::ADDSXrr:
   3517   case AArch64::ADDSXri:
   3518   case AArch64::SUBSXrr:
   3519   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
   3520   case AArch64::SUBXri:
   3521   case AArch64::SUBSXri:
   3522     return true;
   3523   default:
   3524     break;
   3525   }
   3526   return false;
   3527 }
   3528 
   3529 // FP Opcodes that can be combined with a FMUL
   3530 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
   3531   switch (Inst.getOpcode()) {
   3532   default:
   3533     break;
   3534   case AArch64::FADDSrr:
   3535   case AArch64::FADDDrr:
   3536   case AArch64::FADDv2f32:
   3537   case AArch64::FADDv2f64:
   3538   case AArch64::FADDv4f32:
   3539   case AArch64::FSUBSrr:
   3540   case AArch64::FSUBDrr:
   3541   case AArch64::FSUBv2f32:
   3542   case AArch64::FSUBv2f64:
   3543   case AArch64::FSUBv4f32:
   3544     TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
   3545     return (Options.UnsafeFPMath ||
   3546             Options.AllowFPOpFusion == FPOpFusion::Fast);
   3547   }
   3548   return false;
   3549 }
   3550 
   3551 // Opcodes that can be combined with a MUL
   3552 static bool isCombineInstrCandidate(unsigned Opc) {
   3553   return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
   3554 }
   3555 
   3556 //
   3557 // Utility routine that checks if \param MO is defined by an
   3558 // \param CombineOpc instruction in the basic block \param MBB
   3559 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
   3560                        unsigned CombineOpc, unsigned ZeroReg = 0,
   3561                        bool CheckZeroReg = false) {
   3562   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
   3563   MachineInstr *MI = nullptr;
   3564 
   3565   if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
   3566     MI = MRI.getUniqueVRegDef(MO.getReg());
   3567   // And it needs to be in the trace (otherwise, it won't have a depth).
   3568   if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
   3569     return false;
   3570   // Must only used by the user we combine with.
   3571   if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
   3572     return false;
   3573 
   3574   if (CheckZeroReg) {
   3575     assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
   3576            MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
   3577            MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
   3578     // The third input reg must be zero.
   3579     if (MI->getOperand(3).getReg() != ZeroReg)
   3580       return false;
   3581   }
   3582 
   3583   return true;
   3584 }
   3585 
   3586 //
   3587 // Is \param MO defined by an integer multiply and can be combined?
   3588 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
   3589                               unsigned MulOpc, unsigned ZeroReg) {
   3590   return canCombine(MBB, MO, MulOpc, ZeroReg, true);
   3591 }
   3592 
   3593 //
   3594 // Is \param MO defined by a floating-point multiply and can be combined?
   3595 static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
   3596                                unsigned MulOpc) {
   3597   return canCombine(MBB, MO, MulOpc);
   3598 }
   3599 
   3600 // TODO: There are many more machine instruction opcodes to match:
   3601 //       1. Other data types (integer, vectors)
   3602 //       2. Other math / logic operations (xor, or)
   3603 //       3. Other forms of the same operation (intrinsics and other variants)
   3604 bool AArch64InstrInfo::isAssociativeAndCommutative(
   3605     const MachineInstr &Inst) const {
   3606   switch (Inst.getOpcode()) {
   3607   case AArch64::FADDDrr:
   3608   case AArch64::FADDSrr:
   3609   case AArch64::FADDv2f32:
   3610   case AArch64::FADDv2f64:
   3611   case AArch64::FADDv4f32:
   3612   case AArch64::FMULDrr:
   3613   case AArch64::FMULSrr:
   3614   case AArch64::FMULX32:
   3615   case AArch64::FMULX64:
   3616   case AArch64::FMULXv2f32:
   3617   case AArch64::FMULXv2f64:
   3618   case AArch64::FMULXv4f32:
   3619   case AArch64::FMULv2f32:
   3620   case AArch64::FMULv2f64:
   3621   case AArch64::FMULv4f32:
   3622     return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
   3623   default:
   3624     return false;
   3625   }
   3626 }
   3627 
   3628 /// Find instructions that can be turned into madd.
   3629 static bool getMaddPatterns(MachineInstr &Root,
   3630                             SmallVectorImpl<MachineCombinerPattern> &Patterns) {
   3631   unsigned Opc = Root.getOpcode();
   3632   MachineBasicBlock &MBB = *Root.getParent();
   3633   bool Found = false;
   3634 
   3635   if (!isCombineInstrCandidate(Opc))
   3636     return false;
   3637   if (isCombineInstrSettingFlag(Opc)) {
   3638     int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
   3639     // When NZCV is live bail out.
   3640     if (Cmp_NZCV == -1)
   3641       return false;
   3642     unsigned NewOpc = convertToNonFlagSettingOpc(Root);
   3643     // When opcode can't change bail out.
   3644     // CHECKME: do we miss any cases for opcode conversion?
   3645     if (NewOpc == Opc)
   3646       return false;
   3647     Opc = NewOpc;
   3648   }
   3649 
   3650   switch (Opc) {
   3651   default:
   3652     break;
   3653   case AArch64::ADDWrr:
   3654     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
   3655            "ADDWrr does not have register operands");
   3656     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
   3657                           AArch64::WZR)) {
   3658       Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
   3659       Found = true;
   3660     }
   3661     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
   3662                           AArch64::WZR)) {
   3663       Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
   3664       Found = true;
   3665     }
   3666     break;
   3667   case AArch64::ADDXrr:
   3668     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
   3669                           AArch64::XZR)) {
   3670       Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
   3671       Found = true;
   3672     }
   3673     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
   3674                           AArch64::XZR)) {
   3675       Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
   3676       Found = true;
   3677     }
   3678     break;
   3679   case AArch64::SUBWrr:
   3680     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
   3681                           AArch64::WZR)) {
   3682       Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
   3683       Found = true;
   3684     }
   3685     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
   3686                           AArch64::WZR)) {
   3687       Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
   3688       Found = true;
   3689     }
   3690     break;
   3691   case AArch64::SUBXrr:
   3692     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
   3693                           AArch64::XZR)) {
   3694       Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
   3695       Found = true;
   3696     }
   3697     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
   3698                           AArch64::XZR)) {
   3699       Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
   3700       Found = true;
   3701     }
   3702     break;
   3703   case AArch64::ADDWri:
   3704     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
   3705                           AArch64::WZR)) {
   3706       Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
   3707       Found = true;
   3708     }
   3709     break;
   3710   case AArch64::ADDXri:
   3711     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
   3712                           AArch64::XZR)) {
   3713       Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
   3714       Found = true;
   3715     }
   3716     break;
   3717   case AArch64::SUBWri:
   3718     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
   3719                           AArch64::WZR)) {
   3720       Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
   3721       Found = true;
   3722     }
   3723     break;
   3724   case AArch64::SUBXri:
   3725     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
   3726                           AArch64::XZR)) {
   3727       Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
   3728       Found = true;
   3729     }
   3730     break;
   3731   }
   3732   return Found;
   3733 }
   3734 /// Floating-Point Support
   3735 
   3736 /// Find instructions that can be turned into madd.
   3737 static bool getFMAPatterns(MachineInstr &Root,
   3738                            SmallVectorImpl<MachineCombinerPattern> &Patterns) {
   3739 
   3740   if (!isCombineInstrCandidateFP(Root))
   3741     return false;
   3742 
   3743   MachineBasicBlock &MBB = *Root.getParent();
   3744   bool Found = false;
   3745 
   3746   switch (Root.getOpcode()) {
   3747   default:
   3748     assert(false && "Unsupported FP instruction in combiner\n");
   3749     break;
   3750   case AArch64::FADDSrr:
   3751     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
   3752            "FADDWrr does not have register operands");
   3753     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
   3754       Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
   3755       Found = true;
   3756     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
   3757                                   AArch64::FMULv1i32_indexed)) {
   3758       Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
   3759       Found = true;
   3760     }
   3761     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
   3762       Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
   3763       Found = true;
   3764     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
   3765                                   AArch64::FMULv1i32_indexed)) {
   3766       Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
   3767       Found = true;
   3768     }
   3769     break;
   3770   case AArch64::FADDDrr:
   3771     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
   3772       Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
   3773       Found = true;
   3774     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
   3775                                   AArch64::FMULv1i64_indexed)) {
   3776       Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
   3777       Found = true;
   3778     }
   3779     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
   3780       Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
   3781       Found = true;
   3782     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
   3783                                   AArch64::FMULv1i64_indexed)) {
   3784       Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
   3785       Found = true;
   3786     }
   3787     break;
   3788   case AArch64::FADDv2f32:
   3789     if (canCombineWithFMUL(MBB, Root.getOperand(1),
   3790                            AArch64::FMULv2i32_indexed)) {
   3791       Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
   3792       Found = true;
   3793     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
   3794                                   AArch64::FMULv2f32)) {
   3795       Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
   3796       Found = true;
   3797     }
   3798     if (canCombineWithFMUL(MBB, Root.getOperand(2),
   3799                            AArch64::FMULv2i32_indexed)) {
   3800       Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
   3801       Found = true;
   3802     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
   3803                                   AArch64::FMULv2f32)) {
   3804       Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
   3805       Found = true;
   3806     }
   3807     break;
   3808   case AArch64::FADDv2f64:
   3809     if (canCombineWithFMUL(MBB, Root.getOperand(1),
   3810                            AArch64::FMULv2i64_indexed)) {
   3811       Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
   3812       Found = true;
   3813     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
   3814                                   AArch64::FMULv2f64)) {
   3815       Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
   3816       Found = true;
   3817     }
   3818     if (canCombineWithFMUL(MBB, Root.getOperand(2),
   3819                            AArch64::FMULv2i64_indexed)) {
   3820       Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
   3821       Found = true;
   3822     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
   3823                                   AArch64::FMULv2f64)) {
   3824       Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
   3825       Found = true;
   3826     }
   3827     break;
   3828   case AArch64::FADDv4f32:
   3829     if (canCombineWithFMUL(MBB, Root.getOperand(1),
   3830                            AArch64::FMULv4i32_indexed)) {
   3831       Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
   3832       Found = true;
   3833     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
   3834                                   AArch64::FMULv4f32)) {
   3835       Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
   3836       Found = true;
   3837     }
   3838     if (canCombineWithFMUL(MBB, Root.getOperand(2),
   3839                            AArch64::FMULv4i32_indexed)) {
   3840       Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
   3841       Found = true;
   3842     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
   3843                                   AArch64::FMULv4f32)) {
   3844       Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
   3845       Found = true;
   3846     }
   3847     break;
   3848 
   3849   case AArch64::FSUBSrr:
   3850     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
   3851       Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
   3852       Found = true;
   3853     }
   3854     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
   3855       Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
   3856       Found = true;
   3857     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
   3858                                   AArch64::FMULv1i32_indexed)) {
   3859       Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
   3860       Found = true;
   3861     }
   3862     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) {
   3863       Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1);
   3864       Found = true;
   3865     }
   3866     break;
   3867   case AArch64::FSUBDrr:
   3868     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
   3869       Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
   3870       Found = true;
   3871     }
   3872     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
   3873       Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
   3874       Found = true;
   3875     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
   3876                                   AArch64::FMULv1i64_indexed)) {
   3877       Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
   3878       Found = true;
   3879     }
   3880     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) {
   3881       Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1);
   3882       Found = true;
   3883     }
   3884     break;
   3885   case AArch64::FSUBv2f32:
   3886     if (canCombineWithFMUL(MBB, Root.getOperand(2),
   3887                            AArch64::FMULv2i32_indexed)) {
   3888       Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
   3889       Found = true;
   3890     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
   3891                                   AArch64::FMULv2f32)) {
   3892       Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
   3893       Found = true;
   3894     }
   3895     if (canCombineWithFMUL(MBB, Root.getOperand(1),
   3896                            AArch64::FMULv2i32_indexed)) {
   3897       Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
   3898       Found = true;
   3899     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
   3900                                   AArch64::FMULv2f32)) {
   3901       Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
   3902       Found = true;
   3903     }
   3904     break;
   3905   case AArch64::FSUBv2f64:
   3906     if (canCombineWithFMUL(MBB, Root.getOperand(2),
   3907                            AArch64::FMULv2i64_indexed)) {
   3908       Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
   3909       Found = true;
   3910     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
   3911                                   AArch64::FMULv2f64)) {
   3912       Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
   3913       Found = true;
   3914     }
   3915     if (canCombineWithFMUL(MBB, Root.getOperand(1),
   3916                            AArch64::FMULv2i64_indexed)) {
   3917       Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
   3918       Found = true;
   3919     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
   3920                                   AArch64::FMULv2f64)) {
   3921       Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
   3922       Found = true;
   3923     }
   3924     break;
   3925   case AArch64::FSUBv4f32:
   3926     if (canCombineWithFMUL(MBB, Root.getOperand(2),
   3927                            AArch64::FMULv4i32_indexed)) {
   3928       Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
   3929       Found = true;
   3930     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
   3931                                   AArch64::FMULv4f32)) {
   3932       Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
   3933       Found = true;
   3934     }
   3935     if (canCombineWithFMUL(MBB, Root.getOperand(1),
   3936                            AArch64::FMULv4i32_indexed)) {
   3937       Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
   3938       Found = true;
   3939     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
   3940                                   AArch64::FMULv4f32)) {
   3941       Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
   3942       Found = true;
   3943     }
   3944     break;
   3945   }
   3946   return Found;
   3947 }
   3948 
   3949 /// Return true when a code sequence can improve throughput. It
   3950 /// should be called only for instructions in loops.
   3951 /// \param Pattern - combiner pattern
   3952 bool AArch64InstrInfo::isThroughputPattern(
   3953     MachineCombinerPattern Pattern) const {
   3954   switch (Pattern) {
   3955   default:
   3956     break;
   3957   case MachineCombinerPattern::FMULADDS_OP1:
   3958   case MachineCombinerPattern::FMULADDS_OP2:
   3959   case MachineCombinerPattern::FMULSUBS_OP1:
   3960   case MachineCombinerPattern::FMULSUBS_OP2:
   3961   case MachineCombinerPattern::FMULADDD_OP1:
   3962   case MachineCombinerPattern::FMULADDD_OP2:
   3963   case MachineCombinerPattern::FMULSUBD_OP1:
   3964   case MachineCombinerPattern::FMULSUBD_OP2:
   3965   case MachineCombinerPattern::FNMULSUBS_OP1:
   3966   case MachineCombinerPattern::FNMULSUBD_OP1:
   3967   case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
   3968   case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
   3969   case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
   3970   case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
   3971   case MachineCombinerPattern::FMLAv2f32_OP2:
   3972   case MachineCombinerPattern::FMLAv2f32_OP1:
   3973   case MachineCombinerPattern::FMLAv2f64_OP1:
   3974   case MachineCombinerPattern::FMLAv2f64_OP2:
   3975   case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
   3976   case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
   3977   case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
   3978   case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
   3979   case MachineCombinerPattern::FMLAv4f32_OP1:
   3980   case MachineCombinerPattern::FMLAv4f32_OP2:
   3981   case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
   3982   case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
   3983   case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
   3984   case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
   3985   case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
   3986   case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
   3987   case MachineCombinerPattern::FMLSv2f32_OP2:
   3988   case MachineCombinerPattern::FMLSv2f64_OP2:
   3989   case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
   3990   case MachineCombinerPattern::FMLSv4f32_OP2:
   3991     return true;
   3992   } // end switch (Pattern)
   3993   return false;
   3994 }
   3995 /// Return true when there is potentially a faster code sequence for an
   3996 /// instruction chain ending in \p Root. All potential patterns are listed in
   3997 /// the \p Pattern vector. Pattern should be sorted in priority order since the
   3998 /// pattern evaluator stops checking as soon as it finds a faster sequence.
   3999 
   4000 bool AArch64InstrInfo::getMachineCombinerPatterns(
   4001     MachineInstr &Root,
   4002     SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
   4003   // Integer patterns
   4004   if (getMaddPatterns(Root, Patterns))
   4005     return true;
   4006   // Floating point patterns
   4007   if (getFMAPatterns(Root, Patterns))
   4008     return true;
   4009 
   4010   return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
   4011 }
   4012 
   4013 enum class FMAInstKind { Default, Indexed, Accumulator };
   4014 /// genFusedMultiply - Generate fused multiply instructions.
   4015 /// This function supports both integer and floating point instructions.
   4016 /// A typical example:
   4017 ///  F|MUL I=A,B,0
   4018 ///  F|ADD R,I,C
   4019 ///  ==> F|MADD R,A,B,C
   4020 /// \param MF Containing MachineFunction
   4021 /// \param MRI Register information
   4022 /// \param TII Target information
   4023 /// \param Root is the F|ADD instruction
   4024 /// \param [out] InsInstrs is a vector of machine instructions and will
   4025 /// contain the generated madd instruction
   4026 /// \param IdxMulOpd is index of operand in Root that is the result of
   4027 /// the F|MUL. In the example above IdxMulOpd is 1.
   4028 /// \param MaddOpc the opcode fo the f|madd instruction
   4029 /// \param RC Register class of operands
   4030 /// \param kind of fma instruction (addressing mode) to be generated
   4031 /// \param ReplacedAddend is the result register from the instruction
   4032 /// replacing the non-combined operand, if any.
   4033 static MachineInstr *
   4034 genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
   4035                  const TargetInstrInfo *TII, MachineInstr &Root,
   4036                  SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
   4037                  unsigned MaddOpc, const TargetRegisterClass *RC,
   4038                  FMAInstKind kind = FMAInstKind::Default,
   4039                  const unsigned *ReplacedAddend = nullptr) {
   4040   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
   4041 
   4042   unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
   4043   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
   4044   unsigned ResultReg = Root.getOperand(0).getReg();
   4045   unsigned SrcReg0 = MUL->getOperand(1).getReg();
   4046   bool Src0IsKill = MUL->getOperand(1).isKill();
   4047   unsigned SrcReg1 = MUL->getOperand(2).getReg();
   4048   bool Src1IsKill = MUL->getOperand(2).isKill();
   4049 
   4050   unsigned SrcReg2;
   4051   bool Src2IsKill;
   4052   if (ReplacedAddend) {
   4053     // If we just generated a new addend, we must be it's only use.
   4054     SrcReg2 = *ReplacedAddend;
   4055     Src2IsKill = true;
   4056   } else {
   4057     SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
   4058     Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
   4059   }
   4060 
   4061   if (TargetRegisterInfo::isVirtualRegister(ResultReg))
   4062     MRI.constrainRegClass(ResultReg, RC);
   4063   if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
   4064     MRI.constrainRegClass(SrcReg0, RC);
   4065   if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
   4066     MRI.constrainRegClass(SrcReg1, RC);
   4067   if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
   4068     MRI.constrainRegClass(SrcReg2, RC);
   4069 
   4070   MachineInstrBuilder MIB;
   4071   if (kind == FMAInstKind::Default)
   4072     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
   4073               .addReg(SrcReg0, getKillRegState(Src0IsKill))
   4074               .addReg(SrcReg1, getKillRegState(Src1IsKill))
   4075               .addReg(SrcReg2, getKillRegState(Src2IsKill));
   4076   else if (kind == FMAInstKind::Indexed)
   4077     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
   4078               .addReg(SrcReg2, getKillRegState(Src2IsKill))
   4079               .addReg(SrcReg0, getKillRegState(Src0IsKill))
   4080               .addReg(SrcReg1, getKillRegState(Src1IsKill))
   4081               .addImm(MUL->getOperand(3).getImm());
   4082   else if (kind == FMAInstKind::Accumulator)
   4083     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
   4084               .addReg(SrcReg2, getKillRegState(Src2IsKill))
   4085               .addReg(SrcReg0, getKillRegState(Src0IsKill))
   4086               .addReg(SrcReg1, getKillRegState(Src1IsKill));
   4087   else
   4088     assert(false && "Invalid FMA instruction kind \n");
   4089   // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
   4090   InsInstrs.push_back(MIB);
   4091   return MUL;
   4092 }
   4093 
   4094 /// genMaddR - Generate madd instruction and combine mul and add using
   4095 /// an extra virtual register
   4096 /// Example - an ADD intermediate needs to be stored in a register:
   4097 ///   MUL I=A,B,0
   4098 ///   ADD R,I,Imm
   4099 ///   ==> ORR  V, ZR, Imm
   4100 ///   ==> MADD R,A,B,V
   4101 /// \param MF Containing MachineFunction
   4102 /// \param MRI Register information
   4103 /// \param TII Target information
   4104 /// \param Root is the ADD instruction
   4105 /// \param [out] InsInstrs is a vector of machine instructions and will
   4106 /// contain the generated madd instruction
   4107 /// \param IdxMulOpd is index of operand in Root that is the result of
   4108 /// the MUL. In the example above IdxMulOpd is 1.
   4109 /// \param MaddOpc the opcode fo the madd instruction
   4110 /// \param VR is a virtual register that holds the value of an ADD operand
   4111 /// (V in the example above).
   4112 /// \param RC Register class of operands
   4113 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
   4114                               const TargetInstrInfo *TII, MachineInstr &Root,
   4115                               SmallVectorImpl<MachineInstr *> &InsInstrs,
   4116                               unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
   4117                               const TargetRegisterClass *RC) {
   4118   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
   4119 
   4120   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
   4121   unsigned ResultReg = Root.getOperand(0).getReg();
   4122   unsigned SrcReg0 = MUL->getOperand(1).getReg();
   4123   bool Src0IsKill = MUL->getOperand(1).isKill();
   4124   unsigned SrcReg1 = MUL->getOperand(2).getReg();
   4125   bool Src1IsKill = MUL->getOperand(2).isKill();
   4126 
   4127   if (TargetRegisterInfo::isVirtualRegister(ResultReg))
   4128     MRI.constrainRegClass(ResultReg, RC);
   4129   if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
   4130     MRI.constrainRegClass(SrcReg0, RC);
   4131   if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
   4132     MRI.constrainRegClass(SrcReg1, RC);
   4133   if (TargetRegisterInfo::isVirtualRegister(VR))
   4134     MRI.constrainRegClass(VR, RC);
   4135 
   4136   MachineInstrBuilder MIB =
   4137       BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
   4138           .addReg(SrcReg0, getKillRegState(Src0IsKill))
   4139           .addReg(SrcReg1, getKillRegState(Src1IsKill))
   4140           .addReg(VR);
   4141   // Insert the MADD
   4142   InsInstrs.push_back(MIB);
   4143   return MUL;
   4144 }
   4145 
   4146 /// When getMachineCombinerPatterns() finds potential patterns,
   4147 /// this function generates the instructions that could replace the
   4148 /// original code sequence
   4149 void AArch64InstrInfo::genAlternativeCodeSequence(
   4150     MachineInstr &Root, MachineCombinerPattern Pattern,
   4151     SmallVectorImpl<MachineInstr *> &InsInstrs,
   4152     SmallVectorImpl<MachineInstr *> &DelInstrs,
   4153     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
   4154   MachineBasicBlock &MBB = *Root.getParent();
   4155   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
   4156   MachineFunction &MF = *MBB.getParent();
   4157   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
   4158 
   4159   MachineInstr *MUL;
   4160   const TargetRegisterClass *RC;
   4161   unsigned Opc;
   4162   switch (Pattern) {
   4163   default:
   4164     // Reassociate instructions.
   4165     TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
   4166                                                 DelInstrs, InstrIdxForVirtReg);
   4167     return;
   4168   case MachineCombinerPattern::MULADDW_OP1:
   4169   case MachineCombinerPattern::MULADDX_OP1:
   4170     // MUL I=A,B,0
   4171     // ADD R,I,C
   4172     // ==> MADD R,A,B,C
   4173     // --- Create(MADD);
   4174     if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
   4175       Opc = AArch64::MADDWrrr;
   4176       RC = &AArch64::GPR32RegClass;
   4177     } else {
   4178       Opc = AArch64::MADDXrrr;
   4179       RC = &AArch64::GPR64RegClass;
   4180     }
   4181     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   4182     break;
   4183   case MachineCombinerPattern::MULADDW_OP2:
   4184   case MachineCombinerPattern::MULADDX_OP2:
   4185     // MUL I=A,B,0
   4186     // ADD R,C,I
   4187     // ==> MADD R,A,B,C
   4188     // --- Create(MADD);
   4189     if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
   4190       Opc = AArch64::MADDWrrr;
   4191       RC = &AArch64::GPR32RegClass;
   4192     } else {
   4193       Opc = AArch64::MADDXrrr;
   4194       RC = &AArch64::GPR64RegClass;
   4195     }
   4196     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   4197     break;
   4198   case MachineCombinerPattern::MULADDWI_OP1:
   4199   case MachineCombinerPattern::MULADDXI_OP1: {
   4200     // MUL I=A,B,0
   4201     // ADD R,I,Imm
   4202     // ==> ORR  V, ZR, Imm
   4203     // ==> MADD R,A,B,V
   4204     // --- Create(MADD);
   4205     const TargetRegisterClass *OrrRC;
   4206     unsigned BitSize, OrrOpc, ZeroReg;
   4207     if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
   4208       OrrOpc = AArch64::ORRWri;
   4209       OrrRC = &AArch64::GPR32spRegClass;
   4210       BitSize = 32;
   4211       ZeroReg = AArch64::WZR;
   4212       Opc = AArch64::MADDWrrr;
   4213       RC = &AArch64::GPR32RegClass;
   4214     } else {
   4215       OrrOpc = AArch64::ORRXri;
   4216       OrrRC = &AArch64::GPR64spRegClass;
   4217       BitSize = 64;
   4218       ZeroReg = AArch64::XZR;
   4219       Opc = AArch64::MADDXrrr;
   4220       RC = &AArch64::GPR64RegClass;
   4221     }
   4222     unsigned NewVR = MRI.createVirtualRegister(OrrRC);
   4223     uint64_t Imm = Root.getOperand(2).getImm();
   4224 
   4225     if (Root.getOperand(3).isImm()) {
   4226       unsigned Val = Root.getOperand(3).getImm();
   4227       Imm = Imm << Val;
   4228     }
   4229     uint64_t UImm = SignExtend64(Imm, BitSize);
   4230     uint64_t Encoding;
   4231     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
   4232       MachineInstrBuilder MIB1 =
   4233           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
   4234               .addReg(ZeroReg)
   4235               .addImm(Encoding);
   4236       InsInstrs.push_back(MIB1);
   4237       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
   4238       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
   4239     }
   4240     break;
   4241   }
   4242   case MachineCombinerPattern::MULSUBW_OP1:
   4243   case MachineCombinerPattern::MULSUBX_OP1: {
   4244     // MUL I=A,B,0
   4245     // SUB R,I, C
   4246     // ==> SUB  V, 0, C
   4247     // ==> MADD R,A,B,V // = -C + A*B
   4248     // --- Create(MADD);
   4249     const TargetRegisterClass *SubRC;
   4250     unsigned SubOpc, ZeroReg;
   4251     if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
   4252       SubOpc = AArch64::SUBWrr;
   4253       SubRC = &AArch64::GPR32spRegClass;
   4254       ZeroReg = AArch64::WZR;
   4255       Opc = AArch64::MADDWrrr;
   4256       RC = &AArch64::GPR32RegClass;
   4257     } else {
   4258       SubOpc = AArch64::SUBXrr;
   4259       SubRC = &AArch64::GPR64spRegClass;
   4260       ZeroReg = AArch64::XZR;
   4261       Opc = AArch64::MADDXrrr;
   4262       RC = &AArch64::GPR64RegClass;
   4263     }
   4264     unsigned NewVR = MRI.createVirtualRegister(SubRC);
   4265     // SUB NewVR, 0, C
   4266     MachineInstrBuilder MIB1 =
   4267         BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
   4268             .addReg(ZeroReg)
   4269             .add(Root.getOperand(2));
   4270     InsInstrs.push_back(MIB1);
   4271     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
   4272     MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
   4273     break;
   4274   }
   4275   case MachineCombinerPattern::MULSUBW_OP2:
   4276   case MachineCombinerPattern::MULSUBX_OP2:
   4277     // MUL I=A,B,0
   4278     // SUB R,C,I
   4279     // ==> MSUB R,A,B,C (computes C - A*B)
   4280     // --- Create(MSUB);
   4281     if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
   4282       Opc = AArch64::MSUBWrrr;
   4283       RC = &AArch64::GPR32RegClass;
   4284     } else {
   4285       Opc = AArch64::MSUBXrrr;
   4286       RC = &AArch64::GPR64RegClass;
   4287     }
   4288     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   4289     break;
   4290   case MachineCombinerPattern::MULSUBWI_OP1:
   4291   case MachineCombinerPattern::MULSUBXI_OP1: {
   4292     // MUL I=A,B,0
   4293     // SUB R,I, Imm
   4294     // ==> ORR  V, ZR, -Imm
   4295     // ==> MADD R,A,B,V // = -Imm + A*B
   4296     // --- Create(MADD);
   4297     const TargetRegisterClass *OrrRC;
   4298     unsigned BitSize, OrrOpc, ZeroReg;
   4299     if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
   4300       OrrOpc = AArch64::ORRWri;
   4301       OrrRC = &AArch64::GPR32spRegClass;
   4302       BitSize = 32;
   4303       ZeroReg = AArch64::WZR;
   4304       Opc = AArch64::MADDWrrr;
   4305       RC = &AArch64::GPR32RegClass;
   4306     } else {
   4307       OrrOpc = AArch64::ORRXri;
   4308       OrrRC = &AArch64::GPR64spRegClass;
   4309       BitSize = 64;
   4310       ZeroReg = AArch64::XZR;
   4311       Opc = AArch64::MADDXrrr;
   4312       RC = &AArch64::GPR64RegClass;
   4313     }
   4314     unsigned NewVR = MRI.createVirtualRegister(OrrRC);
   4315     uint64_t Imm = Root.getOperand(2).getImm();
   4316     if (Root.getOperand(3).isImm()) {
   4317       unsigned Val = Root.getOperand(3).getImm();
   4318       Imm = Imm << Val;
   4319     }
   4320     uint64_t UImm = SignExtend64(-Imm, BitSize);
   4321     uint64_t Encoding;
   4322     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
   4323       MachineInstrBuilder MIB1 =
   4324           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
   4325               .addReg(ZeroReg)
   4326               .addImm(Encoding);
   4327       InsInstrs.push_back(MIB1);
   4328       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
   4329       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
   4330     }
   4331     break;
   4332   }
   4333   // Floating Point Support
   4334   case MachineCombinerPattern::FMULADDS_OP1:
   4335   case MachineCombinerPattern::FMULADDD_OP1:
   4336     // MUL I=A,B,0
   4337     // ADD R,I,C
   4338     // ==> MADD R,A,B,C
   4339     // --- Create(MADD);
   4340     if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
   4341       Opc = AArch64::FMADDSrrr;
   4342       RC = &AArch64::FPR32RegClass;
   4343     } else {
   4344       Opc = AArch64::FMADDDrrr;
   4345       RC = &AArch64::FPR64RegClass;
   4346     }
   4347     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   4348     break;
   4349   case MachineCombinerPattern::FMULADDS_OP2:
   4350   case MachineCombinerPattern::FMULADDD_OP2:
   4351     // FMUL I=A,B,0
   4352     // FADD R,C,I
   4353     // ==> FMADD R,A,B,C
   4354     // --- Create(FMADD);
   4355     if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
   4356       Opc = AArch64::FMADDSrrr;
   4357       RC = &AArch64::FPR32RegClass;
   4358     } else {
   4359       Opc = AArch64::FMADDDrrr;
   4360       RC = &AArch64::FPR64RegClass;
   4361     }
   4362     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   4363     break;
   4364 
   4365   case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
   4366     Opc = AArch64::FMLAv1i32_indexed;
   4367     RC = &AArch64::FPR32RegClass;
   4368     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   4369                            FMAInstKind::Indexed);
   4370     break;
   4371   case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
   4372     Opc = AArch64::FMLAv1i32_indexed;
   4373     RC = &AArch64::FPR32RegClass;
   4374     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   4375                            FMAInstKind::Indexed);
   4376     break;
   4377 
   4378   case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
   4379     Opc = AArch64::FMLAv1i64_indexed;
   4380     RC = &AArch64::FPR64RegClass;
   4381     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   4382                            FMAInstKind::Indexed);
   4383     break;
   4384   case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
   4385     Opc = AArch64::FMLAv1i64_indexed;
   4386     RC = &AArch64::FPR64RegClass;
   4387     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   4388                            FMAInstKind::Indexed);
   4389     break;
   4390 
   4391   case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
   4392   case MachineCombinerPattern::FMLAv2f32_OP1:
   4393     RC = &AArch64::FPR64RegClass;
   4394     if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
   4395       Opc = AArch64::FMLAv2i32_indexed;
   4396       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   4397                              FMAInstKind::Indexed);
   4398     } else {
   4399       Opc = AArch64::FMLAv2f32;
   4400       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   4401                              FMAInstKind::Accumulator);
   4402     }
   4403     break;
   4404   case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
   4405   case MachineCombinerPattern::FMLAv2f32_OP2:
   4406     RC = &AArch64::FPR64RegClass;
   4407     if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
   4408       Opc = AArch64::FMLAv2i32_indexed;
   4409       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   4410                              FMAInstKind::Indexed);
   4411     } else {
   4412       Opc = AArch64::FMLAv2f32;
   4413       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   4414                              FMAInstKind::Accumulator);
   4415     }
   4416     break;
   4417 
   4418   case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
   4419   case MachineCombinerPattern::FMLAv2f64_OP1:
   4420     RC = &AArch64::FPR128RegClass;
   4421     if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
   4422       Opc = AArch64::FMLAv2i64_indexed;
   4423       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   4424                              FMAInstKind::Indexed);
   4425     } else {
   4426       Opc = AArch64::FMLAv2f64;
   4427       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   4428                              FMAInstKind::Accumulator);
   4429     }
   4430     break;
   4431   case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
   4432   case MachineCombinerPattern::FMLAv2f64_OP2:
   4433     RC = &AArch64::FPR128RegClass;
   4434     if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
   4435       Opc = AArch64::FMLAv2i64_indexed;
   4436       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   4437                              FMAInstKind::Indexed);
   4438     } else {
   4439       Opc = AArch64::FMLAv2f64;
   4440       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   4441                              FMAInstKind::Accumulator);
   4442     }
   4443     break;
   4444 
   4445   case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
   4446   case MachineCombinerPattern::FMLAv4f32_OP1:
   4447     RC = &AArch64::FPR128RegClass;
   4448     if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
   4449       Opc = AArch64::FMLAv4i32_indexed;
   4450       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   4451                              FMAInstKind::Indexed);
   4452     } else {
   4453       Opc = AArch64::FMLAv4f32;
   4454       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   4455                              FMAInstKind::Accumulator);
   4456     }
   4457     break;
   4458 
   4459   case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
   4460   case MachineCombinerPattern::FMLAv4f32_OP2:
   4461     RC = &AArch64::FPR128RegClass;
   4462     if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
   4463       Opc = AArch64::FMLAv4i32_indexed;
   4464       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   4465                              FMAInstKind::Indexed);
   4466     } else {
   4467       Opc = AArch64::FMLAv4f32;
   4468       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   4469                              FMAInstKind::Accumulator);
   4470     }
   4471     break;
   4472 
   4473   case MachineCombinerPattern::FMULSUBS_OP1:
   4474   case MachineCombinerPattern::FMULSUBD_OP1: {
   4475     // FMUL I=A,B,0
   4476     // FSUB R,I,C
   4477     // ==> FNMSUB R,A,B,C // = -C + A*B
   4478     // --- Create(FNMSUB);
   4479     if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
   4480       Opc = AArch64::FNMSUBSrrr;
   4481       RC = &AArch64::FPR32RegClass;
   4482     } else {
   4483       Opc = AArch64::FNMSUBDrrr;
   4484       RC = &AArch64::FPR64RegClass;
   4485     }
   4486     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   4487     break;
   4488   }
   4489 
   4490   case MachineCombinerPattern::FNMULSUBS_OP1:
   4491   case MachineCombinerPattern::FNMULSUBD_OP1: {
   4492     // FNMUL I=A,B,0
   4493     // FSUB R,I,C
   4494     // ==> FNMADD R,A,B,C // = -A*B - C
   4495     // --- Create(FNMADD);
   4496     if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
   4497       Opc = AArch64::FNMADDSrrr;
   4498       RC = &AArch64::FPR32RegClass;
   4499     } else {
   4500       Opc = AArch64::FNMADDDrrr;
   4501       RC = &AArch64::FPR64RegClass;
   4502     }
   4503     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   4504     break;
   4505   }
   4506 
   4507   case MachineCombinerPattern::FMULSUBS_OP2:
   4508   case MachineCombinerPattern::FMULSUBD_OP2: {
   4509     // FMUL I=A,B,0
   4510     // FSUB R,C,I
   4511     // ==> FMSUB R,A,B,C (computes C - A*B)
   4512     // --- Create(FMSUB);
   4513     if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
   4514       Opc = AArch64::FMSUBSrrr;
   4515       RC = &AArch64::FPR32RegClass;
   4516     } else {
   4517       Opc = AArch64::FMSUBDrrr;
   4518       RC = &AArch64::FPR64RegClass;
   4519     }
   4520     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   4521     break;
   4522   }
   4523 
   4524   case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
   4525     Opc = AArch64::FMLSv1i32_indexed;
   4526     RC = &AArch64::FPR32RegClass;
   4527     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   4528                            FMAInstKind::Indexed);
   4529     break;
   4530 
   4531   case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
   4532     Opc = AArch64::FMLSv1i64_indexed;
   4533     RC = &AArch64::FPR64RegClass;
   4534     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   4535                            FMAInstKind::Indexed);
   4536     break;
   4537 
   4538   case MachineCombinerPattern::FMLSv2f32_OP2:
   4539   case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
   4540     RC = &AArch64::FPR64RegClass;
   4541     if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
   4542       Opc = AArch64::FMLSv2i32_indexed;
   4543       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   4544                              FMAInstKind::Indexed);
   4545     } else {
   4546       Opc = AArch64::FMLSv2f32;
   4547       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   4548                              FMAInstKind::Accumulator);
   4549     }
   4550     break;
   4551 
   4552   case MachineCombinerPattern::FMLSv2f64_OP2:
   4553   case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
   4554     RC = &AArch64::FPR128RegClass;
   4555     if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
   4556       Opc = AArch64::FMLSv2i64_indexed;
   4557       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   4558                              FMAInstKind::Indexed);
   4559     } else {
   4560       Opc = AArch64::FMLSv2f64;
   4561       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   4562                              FMAInstKind::Accumulator);
   4563     }
   4564     break;
   4565 
   4566   case MachineCombinerPattern::FMLSv4f32_OP2:
   4567   case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
   4568     RC = &AArch64::FPR128RegClass;
   4569     if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
   4570       Opc = AArch64::FMLSv4i32_indexed;
   4571       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   4572                              FMAInstKind::Indexed);
   4573     } else {
   4574       Opc = AArch64::FMLSv4f32;
   4575       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   4576                              FMAInstKind::Accumulator);
   4577     }
   4578     break;
   4579   case MachineCombinerPattern::FMLSv2f32_OP1:
   4580   case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
   4581     RC = &AArch64::FPR64RegClass;
   4582     unsigned NewVR = MRI.createVirtualRegister(RC);
   4583     MachineInstrBuilder MIB1 =
   4584         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
   4585             .add(Root.getOperand(2));
   4586     InsInstrs.push_back(MIB1);
   4587     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
   4588     if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
   4589       Opc = AArch64::FMLAv2i32_indexed;
   4590       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   4591                              FMAInstKind::Indexed, &NewVR);
   4592     } else {
   4593       Opc = AArch64::FMLAv2f32;
   4594       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   4595                              FMAInstKind::Accumulator, &NewVR);
   4596     }
   4597     break;
   4598   }
   4599   case MachineCombinerPattern::FMLSv4f32_OP1:
   4600   case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
   4601     RC = &AArch64::FPR128RegClass;
   4602     unsigned NewVR = MRI.createVirtualRegister(RC);
   4603     MachineInstrBuilder MIB1 =
   4604         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
   4605             .add(Root.getOperand(2));
   4606     InsInstrs.push_back(MIB1);
   4607     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
   4608     if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
   4609       Opc = AArch64::FMLAv4i32_indexed;
   4610       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   4611                              FMAInstKind::Indexed, &NewVR);
   4612     } else {
   4613       Opc = AArch64::FMLAv4f32;
   4614       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   4615                              FMAInstKind::Accumulator, &NewVR);
   4616     }
   4617     break;
   4618   }
   4619   case MachineCombinerPattern::FMLSv2f64_OP1:
   4620   case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
   4621     RC = &AArch64::FPR128RegClass;
   4622     unsigned NewVR = MRI.createVirtualRegister(RC);
   4623     MachineInstrBuilder MIB1 =
   4624         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
   4625             .add(Root.getOperand(2));
   4626     InsInstrs.push_back(MIB1);
   4627     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
   4628     if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
   4629       Opc = AArch64::FMLAv2i64_indexed;
   4630       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   4631                              FMAInstKind::Indexed, &NewVR);
   4632     } else {
   4633       Opc = AArch64::FMLAv2f64;
   4634       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   4635                              FMAInstKind::Accumulator, &NewVR);
   4636     }
   4637     break;
   4638   }
   4639   } // end switch (Pattern)
   4640   // Record MUL and ADD/SUB for deletion
   4641   DelInstrs.push_back(MUL);
   4642   DelInstrs.push_back(&Root);
   4643 }
   4644 
   4645 /// Replace csincr-branch sequence by simple conditional branch
   4646 ///
   4647 /// Examples:
   4648 /// 1. \code
   4649 ///   csinc  w9, wzr, wzr, <condition code>
   4650 ///   tbnz   w9, #0, 0x44
   4651 ///    \endcode
   4652 /// to
   4653 ///    \code
   4654 ///   b.<inverted condition code>
   4655 ///    \endcode
   4656 ///
   4657 /// 2. \code
   4658 ///   csinc w9, wzr, wzr, <condition code>
   4659 ///   tbz   w9, #0, 0x44
   4660 ///    \endcode
   4661 /// to
   4662 ///    \code
   4663 ///   b.<condition code>
   4664 ///    \endcode
   4665 ///
   4666 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
   4667 /// compare's constant operand is power of 2.
   4668 ///
   4669 /// Examples:
   4670 ///    \code
   4671 ///   and  w8, w8, #0x400
   4672 ///   cbnz w8, L1
   4673 ///    \endcode
   4674 /// to
   4675 ///    \code
   4676 ///   tbnz w8, #10, L1
   4677 ///    \endcode
   4678 ///
   4679 /// \param  MI Conditional Branch
   4680 /// \return True when the simple conditional branch is generated
   4681 ///
   4682 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
   4683   bool IsNegativeBranch = false;
   4684   bool IsTestAndBranch = false;
   4685   unsigned TargetBBInMI = 0;
   4686   switch (MI.getOpcode()) {
   4687   default:
   4688     llvm_unreachable("Unknown branch instruction?");
   4689   case AArch64::Bcc:
   4690     return false;
   4691   case AArch64::CBZW:
   4692   case AArch64::CBZX:
   4693     TargetBBInMI = 1;
   4694     break;
   4695   case AArch64::CBNZW:
   4696   case AArch64::CBNZX:
   4697     TargetBBInMI = 1;
   4698     IsNegativeBranch = true;
   4699     break;
   4700   case AArch64::TBZW:
   4701   case AArch64::TBZX:
   4702     TargetBBInMI = 2;
   4703     IsTestAndBranch = true;
   4704     break;
   4705   case AArch64::TBNZW:
   4706   case AArch64::TBNZX:
   4707     TargetBBInMI = 2;
   4708     IsNegativeBranch = true;
   4709     IsTestAndBranch = true;
   4710     break;
   4711   }
   4712   // So we increment a zero register and test for bits other
   4713   // than bit 0? Conservatively bail out in case the verifier
   4714   // missed this case.
   4715   if (IsTestAndBranch && MI.getOperand(1).getImm())
   4716     return false;
   4717 
   4718   // Find Definition.
   4719   assert(MI.getParent() && "Incomplete machine instruciton\n");
   4720   MachineBasicBlock *MBB = MI.getParent();
   4721   MachineFunction *MF = MBB->getParent();
   4722   MachineRegisterInfo *MRI = &MF->getRegInfo();
   4723   unsigned VReg = MI.getOperand(0).getReg();
   4724   if (!TargetRegisterInfo::isVirtualRegister(VReg))
   4725     return false;
   4726 
   4727   MachineInstr *DefMI = MRI->getVRegDef(VReg);
   4728 
   4729   // Look through COPY instructions to find definition.
   4730   while (DefMI->isCopy()) {
   4731     unsigned CopyVReg = DefMI->getOperand(1).getReg();
   4732     if (!MRI->hasOneNonDBGUse(CopyVReg))
   4733       return false;
   4734     if (!MRI->hasOneDef(CopyVReg))
   4735       return false;
   4736     DefMI = MRI->getVRegDef(CopyVReg);
   4737   }
   4738 
   4739   switch (DefMI->getOpcode()) {
   4740   default:
   4741     return false;
   4742   // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
   4743   case AArch64::ANDWri:
   4744   case AArch64::ANDXri: {
   4745     if (IsTestAndBranch)
   4746       return false;
   4747     if (DefMI->getParent() != MBB)
   4748       return false;
   4749     if (!MRI->hasOneNonDBGUse(VReg))
   4750       return false;
   4751 
   4752     bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
   4753     uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
   4754         DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
   4755     if (!isPowerOf2_64(Mask))
   4756       return false;
   4757 
   4758     MachineOperand &MO = DefMI->getOperand(1);
   4759     unsigned NewReg = MO.getReg();
   4760     if (!TargetRegisterInfo::isVirtualRegister(NewReg))
   4761       return false;
   4762 
   4763     assert(!MRI->def_empty(NewReg) && "Register must be defined.");
   4764 
   4765     MachineBasicBlock &RefToMBB = *MBB;
   4766     MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
   4767     DebugLoc DL = MI.getDebugLoc();
   4768     unsigned Imm = Log2_64(Mask);
   4769     unsigned Opc = (Imm < 32)
   4770                        ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
   4771                        : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
   4772     MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
   4773                               .addReg(NewReg)
   4774                               .addImm(Imm)
   4775                               .addMBB(TBB);
   4776     // Register lives on to the CBZ now.
   4777     MO.setIsKill(false);
   4778 
   4779     // For immediate smaller than 32, we need to use the 32-bit
   4780     // variant (W) in all cases. Indeed the 64-bit variant does not
   4781     // allow to encode them.
   4782     // Therefore, if the input register is 64-bit, we need to take the
   4783     // 32-bit sub-part.
   4784     if (!Is32Bit && Imm < 32)
   4785       NewMI->getOperand(0).setSubReg(AArch64::sub_32);
   4786     MI.eraseFromParent();
   4787     return true;
   4788   }
   4789   // Look for CSINC
   4790   case AArch64::CSINCWr:
   4791   case AArch64::CSINCXr: {
   4792     if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
   4793           DefMI->getOperand(2).getReg() == AArch64::WZR) &&
   4794         !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
   4795           DefMI->getOperand(2).getReg() == AArch64::XZR))
   4796       return false;
   4797 
   4798     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
   4799       return false;
   4800 
   4801     AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
   4802     // Convert only when the condition code is not modified between
   4803     // the CSINC and the branch. The CC may be used by other
   4804     // instructions in between.
   4805     if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
   4806       return false;
   4807     MachineBasicBlock &RefToMBB = *MBB;
   4808     MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
   4809     DebugLoc DL = MI.getDebugLoc();
   4810     if (IsNegativeBranch)
   4811       CC = AArch64CC::getInvertedCondCode(CC);
   4812     BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
   4813     MI.eraseFromParent();
   4814     return true;
   4815   }
   4816   }
   4817 }
   4818 
   4819 std::pair<unsigned, unsigned>
   4820 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
   4821   const unsigned Mask = AArch64II::MO_FRAGMENT;
   4822   return std::make_pair(TF & Mask, TF & ~Mask);
   4823 }
   4824 
   4825 ArrayRef<std::pair<unsigned, const char *>>
   4826 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
   4827   using namespace AArch64II;
   4828 
   4829   static const std::pair<unsigned, const char *> TargetFlags[] = {
   4830       {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
   4831       {MO_G3, "aarch64-g3"},     {MO_G2, "aarch64-g2"},
   4832       {MO_G1, "aarch64-g1"},     {MO_G0, "aarch64-g0"},
   4833       {MO_HI12, "aarch64-hi12"}};
   4834   return makeArrayRef(TargetFlags);
   4835 }
   4836 
   4837 ArrayRef<std::pair<unsigned, const char *>>
   4838 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
   4839   using namespace AArch64II;
   4840 
   4841   static const std::pair<unsigned, const char *> TargetFlags[] = {
   4842       {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, {MO_TLS, "aarch64-tls"}};
   4843   return makeArrayRef(TargetFlags);
   4844 }
   4845 
   4846 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
   4847 AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
   4848   static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
   4849       {{MOSuppressPair, "aarch64-suppress-pair"},
   4850        {MOStridedAccess, "aarch64-strided-access"}};
   4851   return makeArrayRef(TargetFlags);
   4852 }
   4853 
   4854 /// Constants defining how certain sequences should be outlined.
   4855 /// This encompasses how an outlined function should be called, and what kind of
   4856 /// frame should be emitted for that outlined function.
   4857 ///
   4858 /// \p MachineOutlinerDefault implies that the function should be called with
   4859 /// a save and restore of LR to the stack.
   4860 ///
   4861 /// That is,
   4862 ///
   4863 /// I1     Save LR                    OUTLINED_FUNCTION:
   4864 /// I2 --> BL OUTLINED_FUNCTION       I1
   4865 /// I3     Restore LR                 I2
   4866 ///                                   I3
   4867 ///                                   RET
   4868 ///
   4869 /// * Call construction overhead: 3 (save + BL + restore)
   4870 /// * Frame construction overhead: 1 (ret)
   4871 /// * Requires stack fixups? Yes
   4872 ///
   4873 /// \p MachineOutlinerTailCall implies that the function is being created from
   4874 /// a sequence of instructions ending in a return.
   4875 ///
   4876 /// That is,
   4877 ///
   4878 /// I1                             OUTLINED_FUNCTION:
   4879 /// I2 --> B OUTLINED_FUNCTION     I1
   4880 /// RET                            I2
   4881 ///                                RET
   4882 ///
   4883 /// * Call construction overhead: 1 (B)
   4884 /// * Frame construction overhead: 0 (Return included in sequence)
   4885 /// * Requires stack fixups? No
   4886 ///
   4887 /// \p MachineOutlinerNoLRSave implies that the function should be called using
   4888 /// a BL instruction, but doesn't require LR to be saved and restored. This
   4889 /// happens when LR is known to be dead.
   4890 ///
   4891 /// That is,
   4892 ///
   4893 /// I1                                OUTLINED_FUNCTION:
   4894 /// I2 --> BL OUTLINED_FUNCTION       I1
   4895 /// I3                                I2
   4896 ///                                   I3
   4897 ///                                   RET
   4898 ///
   4899 /// * Call construction overhead: 1 (BL)
   4900 /// * Frame construction overhead: 1 (RET)
   4901 /// * Requires stack fixups? No
   4902 ///
   4903 /// \p MachineOutlinerThunk implies that the function is being created from
   4904 /// a sequence of instructions ending in a call. The outlined function is
   4905 /// called with a BL instruction, and the outlined function tail-calls the
   4906 /// original call destination.
   4907 ///
   4908 /// That is,
   4909 ///
   4910 /// I1                                OUTLINED_FUNCTION:
   4911 /// I2 --> BL OUTLINED_FUNCTION       I1
   4912 /// BL f                              I2
   4913 ///                                   B f
   4914 /// * Call construction overhead: 1 (BL)
   4915 /// * Frame construction overhead: 0
   4916 /// * Requires stack fixups? No
   4917 ///
   4918 /// \p MachineOutlinerRegSave implies that the function should be called with a
   4919 /// save and restore of LR to an available register. This allows us to avoid
   4920 /// stack fixups. Note that this outlining variant is compatible with the
   4921 /// NoLRSave case.
   4922 ///
   4923 /// That is,
   4924 ///
   4925 /// I1     Save LR                    OUTLINED_FUNCTION:
   4926 /// I2 --> BL OUTLINED_FUNCTION       I1
   4927 /// I3     Restore LR                 I2
   4928 ///                                   I3
   4929 ///                                   RET
   4930 ///
   4931 /// * Call construction overhead: 3 (save + BL + restore)
   4932 /// * Frame construction overhead: 1 (ret)
   4933 /// * Requires stack fixups? No
   4934 enum MachineOutlinerClass {
   4935   MachineOutlinerDefault,  /// Emit a save, restore, call, and return.
   4936   MachineOutlinerTailCall, /// Only emit a branch.
   4937   MachineOutlinerNoLRSave, /// Emit a call and return.
   4938   MachineOutlinerThunk,    /// Emit a call and tail-call.
   4939   MachineOutlinerRegSave   /// Same as default, but save to a register.
   4940 };
   4941 
   4942 enum MachineOutlinerMBBFlags {
   4943   LRUnavailableSomewhere = 0x2,
   4944   HasCalls = 0x4
   4945 };
   4946 
   4947 unsigned
   4948 AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
   4949   MachineFunction *MF = C.getMF();
   4950   const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
   4951       MF->getSubtarget().getRegisterInfo());
   4952 
   4953   // Check if there is an available register across the sequence that we can
   4954   // use.
   4955   for (unsigned Reg : AArch64::GPR64RegClass) {
   4956     if (!ARI->isReservedReg(*MF, Reg) &&
   4957         Reg != AArch64::LR &&  // LR is not reserved, but don't use it.
   4958         Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
   4959         Reg != AArch64::X17 && // Ditto for X17.
   4960         C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
   4961       return Reg;
   4962   }
   4963 
   4964   // No suitable register. Return 0.
   4965   return 0u;
   4966 }
   4967 
   4968 outliner::OutlinedFunction
   4969 AArch64InstrInfo::getOutliningCandidateInfo(
   4970     std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
   4971   unsigned SequenceSize = std::accumulate(
   4972       RepeatedSequenceLocs[0].front(),
   4973       std::next(RepeatedSequenceLocs[0].back()),
   4974       0, [this](unsigned Sum, const MachineInstr &MI) {
   4975         return Sum + getInstSizeInBytes(MI);
   4976       });
   4977 
   4978   // Compute liveness information for each candidate.
   4979   const TargetRegisterInfo &TRI = getRegisterInfo();
   4980   std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
   4981                 [&TRI](outliner::Candidate &C) { C.initLRU(TRI); });
   4982 
   4983   // According to the AArch64 Procedure Call Standard, the following are
   4984   // undefined on entry/exit from a function call:
   4985   //
   4986   // * Registers x16, x17, (and thus w16, w17)
   4987   // * Condition codes (and thus the NZCV register)
   4988   //
   4989   // Because if this, we can't outline any sequence of instructions where
   4990   // one
   4991   // of these registers is live into/across it. Thus, we need to delete
   4992   // those
   4993   // candidates.
   4994   auto CantGuaranteeValueAcrossCall = [](outliner::Candidate &C) {
   4995     LiveRegUnits LRU = C.LRU;
   4996     return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
   4997             !LRU.available(AArch64::NZCV));
   4998   };
   4999 
   5000   // Erase every candidate that violates the restrictions above. (It could be
   5001   // true that we have viable candidates, so it's not worth bailing out in
   5002   // the case that, say, 1 out of 20 candidates violate the restructions.)
   5003   RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
   5004                                             RepeatedSequenceLocs.end(),
   5005                                             CantGuaranteeValueAcrossCall),
   5006                              RepeatedSequenceLocs.end());
   5007 
   5008   // If the sequence is empty, we're done.
   5009   if (RepeatedSequenceLocs.empty())
   5010     return outliner::OutlinedFunction();
   5011 
   5012   // At this point, we have only "safe" candidates to outline. Figure out
   5013   // frame + call instruction information.
   5014 
   5015   unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
   5016 
   5017   // Helper lambda which sets call information for every candidate.
   5018   auto SetCandidateCallInfo =
   5019       [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
   5020         for (outliner::Candidate &C : RepeatedSequenceLocs)
   5021           C.setCallInfo(CallID, NumBytesForCall);
   5022       };
   5023 
   5024   unsigned FrameID = MachineOutlinerDefault;
   5025   unsigned NumBytesToCreateFrame = 4;
   5026 
   5027   // If the last instruction in any candidate is a terminator, then we should
   5028   // tail call all of the candidates.
   5029   if (RepeatedSequenceLocs[0].back()->isTerminator()) {
   5030     FrameID = MachineOutlinerTailCall;
   5031     NumBytesToCreateFrame = 0;
   5032     SetCandidateCallInfo(MachineOutlinerTailCall, 4);
   5033   }
   5034 
   5035   else if (LastInstrOpcode == AArch64::BL || LastInstrOpcode == AArch64::BLR) {
   5036     // FIXME: Do we need to check if the code after this uses the value of LR?
   5037     FrameID = MachineOutlinerThunk;
   5038     NumBytesToCreateFrame = 0;
   5039     SetCandidateCallInfo(MachineOutlinerThunk, 4);
   5040   }
   5041 
   5042   // Make sure that LR isn't live on entry to this candidate. The only
   5043   // instructions that use LR that could possibly appear in a repeated sequence
   5044   // are calls. Therefore, we only have to check and see if LR is dead on entry
   5045   // to (or exit from) some candidate.
   5046   else if (std::all_of(RepeatedSequenceLocs.begin(),
   5047                        RepeatedSequenceLocs.end(),
   5048                        [](outliner::Candidate &C) {
   5049                          return C.LRU.available(AArch64::LR);
   5050                          })) {
   5051     FrameID = MachineOutlinerNoLRSave;
   5052     NumBytesToCreateFrame = 4;
   5053     SetCandidateCallInfo(MachineOutlinerNoLRSave, 4);
   5054   }
   5055 
   5056   // LR is live, so we need to save it. Decide whether it should be saved to
   5057   // the stack, or if it can be saved to a register.
   5058   else {
   5059     if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
   5060                     [this](outliner::Candidate &C) {
   5061                       return findRegisterToSaveLRTo(C);
   5062                     })) {
   5063       // Every candidate has an available callee-saved register for the save.
   5064       // We can save LR to a register.
   5065       FrameID = MachineOutlinerRegSave;
   5066       NumBytesToCreateFrame = 4;
   5067       SetCandidateCallInfo(MachineOutlinerRegSave, 12);
   5068     }
   5069 
   5070     else {
   5071       // At least one candidate does not have an available callee-saved
   5072       // register. We must save LR to the stack.
   5073       FrameID = MachineOutlinerDefault;
   5074       NumBytesToCreateFrame = 4;
   5075       SetCandidateCallInfo(MachineOutlinerDefault, 12);
   5076     }
   5077   }
   5078 
   5079   // Check if the range contains a call. These require a save + restore of the
   5080   // link register.
   5081   if (std::any_of(RepeatedSequenceLocs[0].front(),
   5082                   RepeatedSequenceLocs[0].back(),
   5083                   [](const MachineInstr &MI) { return MI.isCall(); }))
   5084     NumBytesToCreateFrame += 8; // Save + restore the link register.
   5085 
   5086   // Handle the last instruction separately. If this is a tail call, then the
   5087   // last instruction is a call. We don't want to save + restore in this case.
   5088   // However, it could be possible that the last instruction is a call without
   5089   // it being valid to tail call this sequence. We should consider this as well.
   5090   else if (FrameID != MachineOutlinerThunk &&
   5091            FrameID != MachineOutlinerTailCall &&
   5092            RepeatedSequenceLocs[0].back()->isCall())
   5093     NumBytesToCreateFrame += 8;
   5094 
   5095   return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
   5096                                     NumBytesToCreateFrame, FrameID);
   5097 }
   5098 
   5099 bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
   5100     MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
   5101   const Function &F = MF.getFunction();
   5102 
   5103   // Can F be deduplicated by the linker? If it can, don't outline from it.
   5104   if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
   5105     return false;
   5106 
   5107   // Don't outline from functions with section markings; the program could
   5108   // expect that all the code is in the named section.
   5109   // FIXME: Allow outlining from multiple functions with the same section
   5110   // marking.
   5111   if (F.hasSection())
   5112     return false;
   5113 
   5114   // Outlining from functions with redzones is unsafe since the outliner may
   5115   // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
   5116   // outline from it.
   5117   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   5118   if (!AFI || AFI->hasRedZone().getValueOr(true))
   5119     return false;
   5120 
   5121   // It's safe to outline from MF.
   5122   return true;
   5123 }
   5124 
   5125 unsigned
   5126 AArch64InstrInfo::getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const {
   5127   unsigned Flags = 0x0;
   5128   // Check if there's a call inside this MachineBasicBlock. If there is, then
   5129   // set a flag.
   5130   if (std::any_of(MBB.begin(), MBB.end(),
   5131                   [](MachineInstr &MI) { return MI.isCall(); }))
   5132     Flags |= MachineOutlinerMBBFlags::HasCalls;
   5133 
   5134   // Check if LR is available through all of the MBB. If it's not, then set
   5135   // a flag.
   5136   assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
   5137          "Suitable Machine Function for outlining must track liveness");
   5138   LiveRegUnits LRU(getRegisterInfo());
   5139   LRU.addLiveOuts(MBB);
   5140 
   5141   std::for_each(MBB.rbegin(),
   5142                 MBB.rend(),
   5143                 [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
   5144 
   5145   if (!LRU.available(AArch64::LR))
   5146       Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
   5147 
   5148   return Flags;
   5149 }
   5150 
   5151 outliner::InstrType
   5152 AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
   5153                                    unsigned Flags) const {
   5154   MachineInstr &MI = *MIT;
   5155   MachineBasicBlock *MBB = MI.getParent();
   5156   MachineFunction *MF = MBB->getParent();
   5157   AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
   5158 
   5159   // Don't outline LOHs.
   5160   if (FuncInfo->getLOHRelated().count(&MI))
   5161     return outliner::InstrType::Illegal;
   5162 
   5163   // Don't allow debug values to impact outlining type.
   5164   if (MI.isDebugInstr() || MI.isIndirectDebugValue())
   5165     return outliner::InstrType::Invisible;
   5166 
   5167   // At this point, KILL instructions don't really tell us much so we can go
   5168   // ahead and skip over them.
   5169   if (MI.isKill())
   5170     return outliner::InstrType::Invisible;
   5171 
   5172   // Is this a terminator for a basic block?
   5173   if (MI.isTerminator()) {
   5174 
   5175     // Is this the end of a function?
   5176     if (MI.getParent()->succ_empty())
   5177       return outliner::InstrType::Legal;
   5178 
   5179     // It's not, so don't outline it.
   5180     return outliner::InstrType::Illegal;
   5181   }
   5182 
   5183   // Make sure none of the operands are un-outlinable.
   5184   for (const MachineOperand &MOP : MI.operands()) {
   5185     if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
   5186         MOP.isTargetIndex())
   5187       return outliner::InstrType::Illegal;
   5188 
   5189     // If it uses LR or W30 explicitly, then don't touch it.
   5190     if (MOP.isReg() && !MOP.isImplicit() &&
   5191         (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
   5192       return outliner::InstrType::Illegal;
   5193   }
   5194 
   5195   // Special cases for instructions that can always be outlined, but will fail
   5196   // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
   5197   // be outlined because they don't require a *specific* value to be in LR.
   5198   if (MI.getOpcode() == AArch64::ADRP)
   5199     return outliner::InstrType::Legal;
   5200 
   5201   // If MI is a call we might be able to outline it. We don't want to outline
   5202   // any calls that rely on the position of items on the stack. When we outline
   5203   // something containing a call, we have to emit a save and restore of LR in
   5204   // the outlined function. Currently, this always happens by saving LR to the
   5205   // stack. Thus, if we outline, say, half the parameters for a function call
   5206   // plus the call, then we'll break the callee's expectations for the layout
   5207   // of the stack.
   5208   //
   5209   // FIXME: Allow calls to functions which construct a stack frame, as long
   5210   // as they don't access arguments on the stack.
   5211   // FIXME: Figure out some way to analyze functions defined in other modules.
   5212   // We should be able to compute the memory usage based on the IR calling
   5213   // convention, even if we can't see the definition.
   5214   if (MI.isCall()) {
   5215     // Get the function associated with the call. Look at each operand and find
   5216     // the one that represents the callee and get its name.
   5217     const Function *Callee = nullptr;
   5218     for (const MachineOperand &MOP : MI.operands()) {
   5219       if (MOP.isGlobal()) {
   5220         Callee = dyn_cast<Function>(MOP.getGlobal());
   5221         break;
   5222       }
   5223     }
   5224 
   5225     // Never outline calls to mcount.  There isn't any rule that would require
   5226     // this, but the Linux kernel's "ftrace" feature depends on it.
   5227     if (Callee && Callee->getName() == "\01_mcount")
   5228       return outliner::InstrType::Illegal;
   5229 
   5230     // If we don't know anything about the callee, assume it depends on the
   5231     // stack layout of the caller. In that case, it's only legal to outline
   5232     // as a tail-call.  Whitelist the call instructions we know about so we
   5233     // don't get unexpected results with call pseudo-instructions.
   5234     auto UnknownCallOutlineType = outliner::InstrType::Illegal;
   5235     if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL)
   5236       UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
   5237 
   5238     if (!Callee)
   5239       return UnknownCallOutlineType;
   5240 
   5241     // We have a function we have information about. Check it if it's something
   5242     // can safely outline.
   5243     MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
   5244 
   5245     // We don't know what's going on with the callee at all. Don't touch it.
   5246     if (!CalleeMF)
   5247       return UnknownCallOutlineType;
   5248 
   5249     // Check if we know anything about the callee saves on the function. If we
   5250     // don't, then don't touch it, since that implies that we haven't
   5251     // computed anything about its stack frame yet.
   5252     MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
   5253     if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
   5254         MFI.getNumObjects() > 0)
   5255       return UnknownCallOutlineType;
   5256 
   5257     // At this point, we can say that CalleeMF ought to not pass anything on the
   5258     // stack. Therefore, we can outline it.
   5259     return outliner::InstrType::Legal;
   5260   }
   5261 
   5262   // Don't outline positions.
   5263   if (MI.isPosition())
   5264     return outliner::InstrType::Illegal;
   5265 
   5266   // Don't touch the link register or W30.
   5267   if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
   5268       MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
   5269     return outliner::InstrType::Illegal;
   5270 
   5271   // Does this use the stack?
   5272   if (MI.modifiesRegister(AArch64::SP, &RI) ||
   5273       MI.readsRegister(AArch64::SP, &RI)) {
   5274     // True if there is no chance that any outlined candidate from this range
   5275     // could require stack fixups. That is, both
   5276     // * LR is available in the range (No save/restore around call)
   5277     // * The range doesn't include calls (No save/restore in outlined frame)
   5278     // are true.
   5279     // FIXME: This is very restrictive; the flags check the whole block,
   5280     // not just the bit we will try to outline.
   5281     bool MightNeedStackFixUp =
   5282         (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere |
   5283                   MachineOutlinerMBBFlags::HasCalls));
   5284 
   5285     // If this instruction is in a range where it *never* needs to be fixed
   5286     // up, then we can *always* outline it. This is true even if it's not
   5287     // possible to fix that instruction up.
   5288     //
   5289     // Why? Consider two equivalent instructions I1, I2 where both I1 and I2
   5290     // use SP. Suppose that I1 sits within a range that definitely doesn't
   5291     // need stack fixups, while I2 sits in a range that does.
   5292     //
   5293     // First, I1 can be outlined as long as we *never* fix up the stack in
   5294     // any sequence containing it. I1 is already a safe instruction in the
   5295     // original program, so as long as we don't modify it we're good to go.
   5296     // So this leaves us with showing that outlining I2 won't break our
   5297     // program.
   5298     //
   5299     // Suppose I1 and I2 belong to equivalent candidate sequences. When we
   5300     // look at I2, we need to see if it can be fixed up. Suppose I2, (and
   5301     // thus I1) cannot be fixed up. Then I2 will be assigned an unique
   5302     // integer label; thus, I2 cannot belong to any candidate sequence (a
   5303     // contradiction). Suppose I2 can be fixed up. Then I1 can be fixed up
   5304     // as well, so we're good. Thus, I1 is always safe to outline.
   5305     //
   5306     // This gives us two things: first off, it buys us some more instructions
   5307     // for our search space by deeming stack instructions illegal only when
   5308     // they can't be fixed up AND we might have to fix them up. Second off,
   5309     // This allows us to catch tricky instructions like, say,
   5310     // %xi = ADDXri %sp, n, 0. We can't safely outline these since they might
   5311     // be paired with later SUBXris, which might *not* end up being outlined.
   5312     // If we mess with the stack to save something, then an ADDXri messes with
   5313     // it *after*, then we aren't going to restore the right something from
   5314     // the stack if we don't outline the corresponding SUBXri first. ADDXris and
   5315     // SUBXris are extremely common in prologue/epilogue code, so supporting
   5316     // them in the outliner can be a pretty big win!
   5317     if (!MightNeedStackFixUp)
   5318       return outliner::InstrType::Legal;
   5319 
   5320     // Any modification of SP will break our code to save/restore LR.
   5321     // FIXME: We could handle some instructions which add a constant offset to
   5322     // SP, with a bit more work.
   5323     if (MI.modifiesRegister(AArch64::SP, &RI))
   5324       return outliner::InstrType::Illegal;
   5325 
   5326     // At this point, we have a stack instruction that we might need to fix
   5327     // up. We'll handle it if it's a load or store.
   5328     if (MI.mayLoadOrStore()) {
   5329       unsigned Base;  // Filled with the base regiser of MI.
   5330       int64_t Offset; // Filled with the offset of MI.
   5331       unsigned DummyWidth;
   5332 
   5333       // Does it allow us to offset the base register and is the base SP?
   5334       if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
   5335           Base != AArch64::SP)
   5336         return outliner::InstrType::Illegal;
   5337 
   5338       // Find the minimum/maximum offset for this instruction and check if
   5339       // fixing it up would be in range.
   5340       int64_t MinOffset, MaxOffset; // Unscaled offsets for the instruction.
   5341       unsigned Scale;               // The scale to multiply the offsets by.
   5342       getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
   5343 
   5344       // TODO: We should really test what happens if an instruction overflows.
   5345       // This is tricky to test with IR tests, but when the outliner is moved
   5346       // to a MIR test, it really ought to be checked.
   5347       Offset += 16; // Update the offset to what it would be if we outlined.
   5348       if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
   5349         return outliner::InstrType::Illegal;
   5350 
   5351       // It's in range, so we can outline it.
   5352       return outliner::InstrType::Legal;
   5353     }
   5354 
   5355     // FIXME: Add handling for instructions like "add x0, sp, #8".
   5356 
   5357     // We can't fix it up, so don't outline it.
   5358     return outliner::InstrType::Illegal;
   5359   }
   5360 
   5361   return outliner::InstrType::Legal;
   5362 }
   5363 
   5364 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
   5365   for (MachineInstr &MI : MBB) {
   5366     unsigned Base, Width;
   5367     int64_t Offset;
   5368 
   5369     // Is this a load or store with an immediate offset with SP as the base?
   5370     if (!MI.mayLoadOrStore() ||
   5371         !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) ||
   5372         Base != AArch64::SP)
   5373       continue;
   5374 
   5375     // It is, so we have to fix it up.
   5376     unsigned Scale;
   5377     int64_t Dummy1, Dummy2;
   5378 
   5379     MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
   5380     assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
   5381     getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
   5382     assert(Scale != 0 && "Unexpected opcode!");
   5383 
   5384     // We've pushed the return address to the stack, so add 16 to the offset.
   5385     // This is safe, since we already checked if it would overflow when we
   5386     // checked if this instruction was legal to outline.
   5387     int64_t NewImm = (Offset + 16) / Scale;
   5388     StackOffsetOperand.setImm(NewImm);
   5389   }
   5390 }
   5391 
   5392 void AArch64InstrInfo::buildOutlinedFrame(
   5393     MachineBasicBlock &MBB, MachineFunction &MF,
   5394     const outliner::OutlinedFunction &OF) const {
   5395   // For thunk outlining, rewrite the last instruction from a call to a
   5396   // tail-call.
   5397   if (OF.FrameConstructionID == MachineOutlinerThunk) {
   5398     MachineInstr *Call = &*--MBB.instr_end();
   5399     unsigned TailOpcode;
   5400     if (Call->getOpcode() == AArch64::BL) {
   5401       TailOpcode = AArch64::TCRETURNdi;
   5402     } else {
   5403       assert(Call->getOpcode() == AArch64::BLR);
   5404       TailOpcode = AArch64::TCRETURNri;
   5405     }
   5406     MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
   5407                             .add(Call->getOperand(0))
   5408                             .addImm(0);
   5409     MBB.insert(MBB.end(), TC);
   5410     Call->eraseFromParent();
   5411   }
   5412 
   5413   // Is there a call in the outlined range?
   5414   auto IsNonTailCall = [](MachineInstr &MI) {
   5415     return MI.isCall() && !MI.isReturn();
   5416   };
   5417   if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
   5418     // Fix up the instructions in the range, since we're going to modify the
   5419     // stack.
   5420     assert(OF.FrameConstructionID != MachineOutlinerDefault &&
   5421            "Can only fix up stack references once");
   5422     fixupPostOutline(MBB);
   5423 
   5424     // LR has to be a live in so that we can save it.
   5425     MBB.addLiveIn(AArch64::LR);
   5426 
   5427     MachineBasicBlock::iterator It = MBB.begin();
   5428     MachineBasicBlock::iterator Et = MBB.end();
   5429 
   5430     if (OF.FrameConstructionID == MachineOutlinerTailCall ||
   5431         OF.FrameConstructionID == MachineOutlinerThunk)
   5432       Et = std::prev(MBB.end());
   5433 
   5434     // Insert a save before the outlined region
   5435     MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
   5436                                 .addReg(AArch64::SP, RegState::Define)
   5437                                 .addReg(AArch64::LR)
   5438                                 .addReg(AArch64::SP)
   5439                                 .addImm(-16);
   5440     It = MBB.insert(It, STRXpre);
   5441 
   5442     const TargetSubtargetInfo &STI = MF.getSubtarget();
   5443     const MCRegisterInfo *MRI = STI.getRegisterInfo();
   5444     unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
   5445 
   5446     // Add a CFI saying the stack was moved 16 B down.
   5447     int64_t StackPosEntry =
   5448         MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 16));
   5449     BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
   5450         .addCFIIndex(StackPosEntry)
   5451         .setMIFlags(MachineInstr::FrameSetup);
   5452 
   5453     // Add a CFI saying that the LR that we want to find is now 16 B higher than
   5454     // before.
   5455     int64_t LRPosEntry =
   5456         MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 16));
   5457     BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
   5458         .addCFIIndex(LRPosEntry)
   5459         .setMIFlags(MachineInstr::FrameSetup);
   5460 
   5461     // Insert a restore before the terminator for the function.
   5462     MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
   5463                                  .addReg(AArch64::SP, RegState::Define)
   5464                                  .addReg(AArch64::LR, RegState::Define)
   5465                                  .addReg(AArch64::SP)
   5466                                  .addImm(16);
   5467     Et = MBB.insert(Et, LDRXpost);
   5468   }
   5469 
   5470   // If this is a tail call outlined function, then there's already a return.
   5471   if (OF.FrameConstructionID == MachineOutlinerTailCall ||
   5472       OF.FrameConstructionID == MachineOutlinerThunk)
   5473     return;
   5474 
   5475   // It's not a tail call, so we have to insert the return ourselves.
   5476   MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
   5477                           .addReg(AArch64::LR, RegState::Undef);
   5478   MBB.insert(MBB.end(), ret);
   5479 
   5480   // Did we have to modify the stack by saving the link register?
   5481   if (OF.FrameConstructionID != MachineOutlinerDefault)
   5482     return;
   5483 
   5484   // We modified the stack.
   5485   // Walk over the basic block and fix up all the stack accesses.
   5486   fixupPostOutline(MBB);
   5487 }
   5488 
   5489 MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
   5490     Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
   5491     MachineFunction &MF, const outliner::Candidate &C) const {
   5492 
   5493   // Are we tail calling?
   5494   if (C.CallConstructionID == MachineOutlinerTailCall) {
   5495     // If yes, then we can just branch to the label.
   5496     It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
   5497                             .addGlobalAddress(M.getNamedValue(MF.getName()))
   5498                             .addImm(0));
   5499     return It;
   5500   }
   5501 
   5502   // Are we saving the link register?
   5503   if (C.CallConstructionID == MachineOutlinerNoLRSave ||
   5504       C.CallConstructionID == MachineOutlinerThunk) {
   5505     // No, so just insert the call.
   5506     It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
   5507                             .addGlobalAddress(M.getNamedValue(MF.getName())));
   5508     return It;
   5509   }
   5510 
   5511   // We want to return the spot where we inserted the call.
   5512   MachineBasicBlock::iterator CallPt;
   5513 
   5514   // Instructions for saving and restoring LR around the call instruction we're
   5515   // going to insert.
   5516   MachineInstr *Save;
   5517   MachineInstr *Restore;
   5518   // Can we save to a register?
   5519   if (C.CallConstructionID == MachineOutlinerRegSave) {
   5520     // FIXME: This logic should be sunk into a target-specific interface so that
   5521     // we don't have to recompute the register.
   5522     unsigned Reg = findRegisterToSaveLRTo(C);
   5523     assert(Reg != 0 && "No callee-saved register available?");
   5524 
   5525     // Save and restore LR from that register.
   5526     Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
   5527                .addReg(AArch64::XZR)
   5528                .addReg(AArch64::LR)
   5529                .addImm(0);
   5530     Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
   5531                 .addReg(AArch64::XZR)
   5532                 .addReg(Reg)
   5533                 .addImm(0);
   5534   } else {
   5535     // We have the default case. Save and restore from SP.
   5536     Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
   5537                .addReg(AArch64::SP, RegState::Define)
   5538                .addReg(AArch64::LR)
   5539                .addReg(AArch64::SP)
   5540                .addImm(-16);
   5541     Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
   5542                   .addReg(AArch64::SP, RegState::Define)
   5543                   .addReg(AArch64::LR, RegState::Define)
   5544                   .addReg(AArch64::SP)
   5545                   .addImm(16);
   5546   }
   5547 
   5548   It = MBB.insert(It, Save);
   5549   It++;
   5550 
   5551   // Insert the call.
   5552   It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
   5553                           .addGlobalAddress(M.getNamedValue(MF.getName())));
   5554   CallPt = It;
   5555   It++;
   5556 
   5557   It = MBB.insert(It, Restore);
   5558   return CallPt;
   5559 }
   5560 
   5561 bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
   5562   MachineFunction &MF) const {
   5563   return MF.getFunction().optForMinSize();
   5564 }
   5565