Home | History | Annotate | Download | only in AArch64
      1 //===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements the AArch64ConditionalCompares pass which reduces
     11 // branching and code size by using the conditional compare instructions CCMP,
     12 // CCMN, and FCMP.
     13 //
     14 // The CFG transformations for forming conditional compares are very similar to
     15 // if-conversion, and this pass should run immediately before the early
     16 // if-conversion pass.
     17 //
     18 //===----------------------------------------------------------------------===//
     19 
     20 #include "AArch64.h"
     21 #include "llvm/ADT/DepthFirstIterator.h"
     22 #include "llvm/ADT/SetVector.h"
     23 #include "llvm/ADT/SmallPtrSet.h"
     24 #include "llvm/ADT/Statistic.h"
     25 #include "llvm/CodeGen/MachineDominators.h"
     26 #include "llvm/CodeGen/MachineFunction.h"
     27 #include "llvm/CodeGen/MachineFunctionPass.h"
     28 #include "llvm/CodeGen/MachineInstrBuilder.h"
     29 #include "llvm/CodeGen/MachineLoopInfo.h"
     30 #include "llvm/CodeGen/MachineRegisterInfo.h"
     31 #include "llvm/CodeGen/MachineTraceMetrics.h"
     32 #include "llvm/CodeGen/Passes.h"
     33 #include "llvm/Support/CommandLine.h"
     34 #include "llvm/Support/Debug.h"
     35 #include "llvm/Support/raw_ostream.h"
     36 #include "llvm/Target/TargetInstrInfo.h"
     37 #include "llvm/Target/TargetRegisterInfo.h"
     38 #include "llvm/Target/TargetSubtargetInfo.h"
     39 
     40 using namespace llvm;
     41 
     42 #define DEBUG_TYPE "aarch64-ccmp"
     43 
     44 // Absolute maximum number of instructions allowed per speculated block.
     45 // This bypasses all other heuristics, so it should be set fairly high.
     46 static cl::opt<unsigned> BlockInstrLimit(
     47     "aarch64-ccmp-limit", cl::init(30), cl::Hidden,
     48     cl::desc("Maximum number of instructions per speculated block."));
     49 
     50 // Stress testing mode - disable heuristics.
     51 static cl::opt<bool> Stress("aarch64-stress-ccmp", cl::Hidden,
     52                             cl::desc("Turn all knobs to 11"));
     53 
     54 STATISTIC(NumConsidered, "Number of ccmps considered");
     55 STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)");
     56 STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)");
     57 STATISTIC(NumPhi2Rejs, "Number of ccmps rejected (PHI2)");
     58 STATISTIC(NumHeadBranchRejs, "Number of ccmps rejected (Head branch)");
     59 STATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)");
     60 STATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)");
     61 STATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)");
     62 STATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)");
     63 STATISTIC(NumMultNZCVUses, "Number of ccmps rejected (NZCV used)");
     64 STATISTIC(NumUnknNZCVDefs, "Number of ccmps rejected (NZCV def unknown)");
     65 
     66 STATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)");
     67 
     68 STATISTIC(NumConverted, "Number of ccmp instructions created");
     69 STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted");
     70 
     71 //===----------------------------------------------------------------------===//
     72 //                                 SSACCmpConv
     73 //===----------------------------------------------------------------------===//
     74 //
     75 // The SSACCmpConv class performs ccmp-conversion on SSA form machine code
     76 // after determining if it is possible. The class contains no heuristics;
     77 // external code should be used to determine when ccmp-conversion is a good
     78 // idea.
     79 //
     80 // CCmp-formation works on a CFG representing chained conditions, typically
     81 // from C's short-circuit || and && operators:
     82 //
     83 //   From:         Head            To:         Head
     84 //                 / |                         CmpBB
     85 //                /  |                         / |
     86 //               |  CmpBB                     /  |
     87 //               |  / |                    Tail  |
     88 //               | /  |                      |   |
     89 //              Tail  |                      |   |
     90 //                |   |                      |   |
     91 //               ... ...                    ... ...
     92 //
     93 // The Head block is terminated by a br.cond instruction, and the CmpBB block
     94 // contains compare + br.cond. Tail must be a successor of both.
     95 //
     96 // The cmp-conversion turns the compare instruction in CmpBB into a conditional
     97 // compare, and merges CmpBB into Head, speculatively executing its
     98 // instructions. The AArch64 conditional compare instructions have an immediate
     99 // operand that specifies the NZCV flag values when the condition is false and
    100 // the compare isn't executed. This makes it possible to chain compares with
    101 // different condition codes.
    102 //
    103 // Example:
    104 //
    105 //    if (a == 5 || b == 17)
    106 //      foo();
    107 //
    108 //    Head:
    109 //       cmp  w0, #5
    110 //       b.eq Tail
    111 //    CmpBB:
    112 //       cmp  w1, #17
    113 //       b.eq Tail
    114 //    ...
    115 //    Tail:
    116 //      bl _foo
    117 //
    118 //  Becomes:
    119 //
    120 //    Head:
    121 //       cmp  w0, #5
    122 //       ccmp w1, #17, 4, ne  ; 4 = nZcv
    123 //       b.eq Tail
    124 //    ...
    125 //    Tail:
    126 //      bl _foo
    127 //
    128 // The ccmp condition code is the one that would cause the Head terminator to
    129 // branch to CmpBB.
    130 //
    131 // FIXME: It should also be possible to speculate a block on the critical edge
    132 // between Head and Tail, just like if-converting a diamond.
    133 //
    134 // FIXME: Handle PHIs in Tail by turning them into selects (if-conversion).
    135 
    136 namespace {
    137 class SSACCmpConv {
    138   MachineFunction *MF;
    139   const TargetInstrInfo *TII;
    140   const TargetRegisterInfo *TRI;
    141   MachineRegisterInfo *MRI;
    142 
    143 public:
    144   /// The first block containing a conditional branch, dominating everything
    145   /// else.
    146   MachineBasicBlock *Head;
    147 
    148   /// The block containing cmp+br.cond with a successor shared with Head.
    149   MachineBasicBlock *CmpBB;
    150 
    151   /// The common successor for Head and CmpBB.
    152   MachineBasicBlock *Tail;
    153 
    154   /// The compare instruction in CmpBB that can be converted to a ccmp.
    155   MachineInstr *CmpMI;
    156 
    157 private:
    158   /// The branch condition in Head as determined by AnalyzeBranch.
    159   SmallVector<MachineOperand, 4> HeadCond;
    160 
    161   /// The condition code that makes Head branch to CmpBB.
    162   AArch64CC::CondCode HeadCmpBBCC;
    163 
    164   /// The branch condition in CmpBB.
    165   SmallVector<MachineOperand, 4> CmpBBCond;
    166 
    167   /// The condition code that makes CmpBB branch to Tail.
    168   AArch64CC::CondCode CmpBBTailCC;
    169 
    170   /// Check if the Tail PHIs are trivially convertible.
    171   bool trivialTailPHIs();
    172 
    173   /// Remove CmpBB from the Tail PHIs.
    174   void updateTailPHIs();
    175 
    176   /// Check if an operand defining DstReg is dead.
    177   bool isDeadDef(unsigned DstReg);
    178 
    179   /// Find the compare instruction in MBB that controls the conditional branch.
    180   /// Return NULL if a convertible instruction can't be found.
    181   MachineInstr *findConvertibleCompare(MachineBasicBlock *MBB);
    182 
    183   /// Return true if all non-terminator instructions in MBB can be safely
    184   /// speculated.
    185   bool canSpeculateInstrs(MachineBasicBlock *MBB, const MachineInstr *CmpMI);
    186 
    187 public:
    188   /// runOnMachineFunction - Initialize per-function data structures.
    189   void runOnMachineFunction(MachineFunction &MF) {
    190     this->MF = &MF;
    191     TII = MF.getSubtarget().getInstrInfo();
    192     TRI = MF.getSubtarget().getRegisterInfo();
    193     MRI = &MF.getRegInfo();
    194   }
    195 
    196   /// If the sub-CFG headed by MBB can be cmp-converted, initialize the
    197   /// internal state, and return true.
    198   bool canConvert(MachineBasicBlock *MBB);
    199 
    200   /// Cmo-convert the last block passed to canConvertCmp(), assuming
    201   /// it is possible. Add any erased blocks to RemovedBlocks.
    202   void convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks);
    203 
    204   /// Return the expected code size delta if the conversion into a
    205   /// conditional compare is performed.
    206   int expectedCodeSizeDelta() const;
    207 };
    208 } // end anonymous namespace
    209 
    210 // Check that all PHIs in Tail are selecting the same value from Head and CmpBB.
    211 // This means that no if-conversion is required when merging CmpBB into Head.
    212 bool SSACCmpConv::trivialTailPHIs() {
    213   for (auto &I : *Tail) {
    214     if (!I.isPHI())
    215       break;
    216     unsigned HeadReg = 0, CmpBBReg = 0;
    217     // PHI operands come in (VReg, MBB) pairs.
    218     for (unsigned oi = 1, oe = I.getNumOperands(); oi != oe; oi += 2) {
    219       MachineBasicBlock *MBB = I.getOperand(oi + 1).getMBB();
    220       unsigned Reg = I.getOperand(oi).getReg();
    221       if (MBB == Head) {
    222         assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands");
    223         HeadReg = Reg;
    224       }
    225       if (MBB == CmpBB) {
    226         assert((!CmpBBReg || CmpBBReg == Reg) && "Inconsistent PHI operands");
    227         CmpBBReg = Reg;
    228       }
    229     }
    230     if (HeadReg != CmpBBReg)
    231       return false;
    232   }
    233   return true;
    234 }
    235 
    236 // Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply
    237 // removing the CmpBB operands. The Head operands will be identical.
    238 void SSACCmpConv::updateTailPHIs() {
    239   for (auto &I : *Tail) {
    240     if (!I.isPHI())
    241       break;
    242     // I is a PHI. It can have multiple entries for CmpBB.
    243     for (unsigned oi = I.getNumOperands(); oi > 2; oi -= 2) {
    244       // PHI operands are (Reg, MBB) at (oi-2, oi-1).
    245       if (I.getOperand(oi - 1).getMBB() == CmpBB) {
    246         I.RemoveOperand(oi - 1);
    247         I.RemoveOperand(oi - 2);
    248       }
    249     }
    250   }
    251 }
    252 
    253 // This pass runs before the AArch64DeadRegisterDefinitions pass, so compares
    254 // are still writing virtual registers without any uses.
    255 bool SSACCmpConv::isDeadDef(unsigned DstReg) {
    256   // Writes to the zero register are dead.
    257   if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
    258     return true;
    259   if (!TargetRegisterInfo::isVirtualRegister(DstReg))
    260     return false;
    261   // A virtual register def without any uses will be marked dead later, and
    262   // eventually replaced by the zero register.
    263   return MRI->use_nodbg_empty(DstReg);
    264 }
    265 
    266 // Parse a condition code returned by AnalyzeBranch, and compute the CondCode
    267 // corresponding to TBB.
    268 // Return
    269 static bool parseCond(ArrayRef<MachineOperand> Cond, AArch64CC::CondCode &CC) {
    270   // A normal br.cond simply has the condition code.
    271   if (Cond[0].getImm() != -1) {
    272     assert(Cond.size() == 1 && "Unknown Cond array format");
    273     CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
    274     return true;
    275   }
    276   // For tbz and cbz instruction, the opcode is next.
    277   switch (Cond[1].getImm()) {
    278   default:
    279     // This includes tbz / tbnz branches which can't be converted to
    280     // ccmp + br.cond.
    281     return false;
    282   case AArch64::CBZW:
    283   case AArch64::CBZX:
    284     assert(Cond.size() == 3 && "Unknown Cond array format");
    285     CC = AArch64CC::EQ;
    286     return true;
    287   case AArch64::CBNZW:
    288   case AArch64::CBNZX:
    289     assert(Cond.size() == 3 && "Unknown Cond array format");
    290     CC = AArch64CC::NE;
    291     return true;
    292   }
    293 }
    294 
    295 MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
    296   MachineBasicBlock::iterator I = MBB->getFirstTerminator();
    297   if (I == MBB->end())
    298     return nullptr;
    299   // The terminator must be controlled by the flags.
    300   if (!I->readsRegister(AArch64::NZCV)) {
    301     switch (I->getOpcode()) {
    302     case AArch64::CBZW:
    303     case AArch64::CBZX:
    304     case AArch64::CBNZW:
    305     case AArch64::CBNZX:
    306       // These can be converted into a ccmp against #0.
    307       return &*I;
    308     }
    309     ++NumCmpTermRejs;
    310     DEBUG(dbgs() << "Flags not used by terminator: " << *I);
    311     return nullptr;
    312   }
    313 
    314   // Now find the instruction controlling the terminator.
    315   for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
    316     --I;
    317     assert(!I->isTerminator() && "Spurious terminator");
    318     switch (I->getOpcode()) {
    319     // cmp is an alias for subs with a dead destination register.
    320     case AArch64::SUBSWri:
    321     case AArch64::SUBSXri:
    322     // cmn is an alias for adds with a dead destination register.
    323     case AArch64::ADDSWri:
    324     case AArch64::ADDSXri:
    325       // Check that the immediate operand is within range, ccmp wants a uimm5.
    326       // Rd = SUBSri Rn, imm, shift
    327       if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) {
    328         DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I);
    329         ++NumImmRangeRejs;
    330         return nullptr;
    331       }
    332     // Fall through.
    333     case AArch64::SUBSWrr:
    334     case AArch64::SUBSXrr:
    335     case AArch64::ADDSWrr:
    336     case AArch64::ADDSXrr:
    337       if (isDeadDef(I->getOperand(0).getReg()))
    338         return &*I;
    339       DEBUG(dbgs() << "Can't convert compare with live destination: " << *I);
    340       ++NumLiveDstRejs;
    341       return nullptr;
    342     case AArch64::FCMPSrr:
    343     case AArch64::FCMPDrr:
    344     case AArch64::FCMPESrr:
    345     case AArch64::FCMPEDrr:
    346       return &*I;
    347     }
    348 
    349     // Check for flag reads and clobbers.
    350     MIOperands::PhysRegInfo PRI =
    351         MIOperands(*I).analyzePhysReg(AArch64::NZCV, TRI);
    352 
    353     if (PRI.Read) {
    354       // The ccmp doesn't produce exactly the same flags as the original
    355       // compare, so reject the transform if there are uses of the flags
    356       // besides the terminators.
    357       DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I);
    358       ++NumMultNZCVUses;
    359       return nullptr;
    360     }
    361 
    362     if (PRI.Defined || PRI.Clobbered) {
    363       DEBUG(dbgs() << "Not convertible compare: " << *I);
    364       ++NumUnknNZCVDefs;
    365       return nullptr;
    366     }
    367   }
    368   DEBUG(dbgs() << "Flags not defined in BB#" << MBB->getNumber() << '\n');
    369   return nullptr;
    370 }
    371 
    372 /// Determine if all the instructions in MBB can safely
    373 /// be speculated. The terminators are not considered.
    374 ///
    375 /// Only CmpMI is allowed to clobber the flags.
    376 ///
    377 bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
    378                                      const MachineInstr *CmpMI) {
    379   // Reject any live-in physregs. It's probably NZCV/EFLAGS, and very hard to
    380   // get right.
    381   if (!MBB->livein_empty()) {
    382     DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n");
    383     return false;
    384   }
    385 
    386   unsigned InstrCount = 0;
    387 
    388   // Check all instructions, except the terminators. It is assumed that
    389   // terminators never have side effects or define any used register values.
    390   for (auto &I : make_range(MBB->begin(), MBB->getFirstTerminator())) {
    391     if (I.isDebugValue())
    392       continue;
    393 
    394     if (++InstrCount > BlockInstrLimit && !Stress) {
    395       DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than "
    396                    << BlockInstrLimit << " instructions.\n");
    397       return false;
    398     }
    399 
    400     // There shouldn't normally be any phis in a single-predecessor block.
    401     if (I.isPHI()) {
    402       DEBUG(dbgs() << "Can't hoist: " << I);
    403       return false;
    404     }
    405 
    406     // Don't speculate loads. Note that it may be possible and desirable to
    407     // speculate GOT or constant pool loads that are guaranteed not to trap,
    408     // but we don't support that for now.
    409     if (I.mayLoad()) {
    410       DEBUG(dbgs() << "Won't speculate load: " << I);
    411       return false;
    412     }
    413 
    414     // We never speculate stores, so an AA pointer isn't necessary.
    415     bool DontMoveAcrossStore = true;
    416     if (!I.isSafeToMove(nullptr, DontMoveAcrossStore)) {
    417       DEBUG(dbgs() << "Can't speculate: " << I);
    418       return false;
    419     }
    420 
    421     // Only CmpMI is allowed to clobber the flags.
    422     if (&I != CmpMI && I.modifiesRegister(AArch64::NZCV, TRI)) {
    423       DEBUG(dbgs() << "Clobbers flags: " << I);
    424       return false;
    425     }
    426   }
    427   return true;
    428 }
    429 
    430 /// Analyze the sub-cfg rooted in MBB, and return true if it is a potential
    431 /// candidate for cmp-conversion. Fill out the internal state.
    432 ///
    433 bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
    434   Head = MBB;
    435   Tail = CmpBB = nullptr;
    436 
    437   if (Head->succ_size() != 2)
    438     return false;
    439   MachineBasicBlock *Succ0 = Head->succ_begin()[0];
    440   MachineBasicBlock *Succ1 = Head->succ_begin()[1];
    441 
    442   // CmpBB can only have a single predecessor. Tail is allowed many.
    443   if (Succ0->pred_size() != 1)
    444     std::swap(Succ0, Succ1);
    445 
    446   // Succ0 is our candidate for CmpBB.
    447   if (Succ0->pred_size() != 1 || Succ0->succ_size() != 2)
    448     return false;
    449 
    450   CmpBB = Succ0;
    451   Tail = Succ1;
    452 
    453   if (!CmpBB->isSuccessor(Tail))
    454     return false;
    455 
    456   // The CFG topology checks out.
    457   DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber() << " -> BB#"
    458                << CmpBB->getNumber() << " -> BB#" << Tail->getNumber() << '\n');
    459   ++NumConsidered;
    460 
    461   // Tail is allowed to have many predecessors, but we can't handle PHIs yet.
    462   //
    463   // FIXME: Real PHIs could be if-converted as long as the CmpBB values are
    464   // defined before The CmpBB cmp clobbers the flags. Alternatively, it should
    465   // always be safe to sink the ccmp down to immediately before the CmpBB
    466   // terminators.
    467   if (!trivialTailPHIs()) {
    468     DEBUG(dbgs() << "Can't handle phis in Tail.\n");
    469     ++NumPhiRejs;
    470     return false;
    471   }
    472 
    473   if (!Tail->livein_empty()) {
    474     DEBUG(dbgs() << "Can't handle live-in physregs in Tail.\n");
    475     ++NumPhysRejs;
    476     return false;
    477   }
    478 
    479   // CmpBB should never have PHIs since Head is its only predecessor.
    480   // FIXME: Clean them up if it happens.
    481   if (!CmpBB->empty() && CmpBB->front().isPHI()) {
    482     DEBUG(dbgs() << "Can't handle phis in CmpBB.\n");
    483     ++NumPhi2Rejs;
    484     return false;
    485   }
    486 
    487   if (!CmpBB->livein_empty()) {
    488     DEBUG(dbgs() << "Can't handle live-in physregs in CmpBB.\n");
    489     ++NumPhysRejs;
    490     return false;
    491   }
    492 
    493   // The branch we're looking to eliminate must be analyzable.
    494   HeadCond.clear();
    495   MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
    496   if (TII->analyzeBranch(*Head, TBB, FBB, HeadCond)) {
    497     DEBUG(dbgs() << "Head branch not analyzable.\n");
    498     ++NumHeadBranchRejs;
    499     return false;
    500   }
    501 
    502   // This is weird, probably some sort of degenerate CFG, or an edge to a
    503   // landing pad.
    504   if (!TBB || HeadCond.empty()) {
    505     DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch in Head.\n");
    506     ++NumHeadBranchRejs;
    507     return false;
    508   }
    509 
    510   if (!parseCond(HeadCond, HeadCmpBBCC)) {
    511     DEBUG(dbgs() << "Unsupported branch type on Head\n");
    512     ++NumHeadBranchRejs;
    513     return false;
    514   }
    515 
    516   // Make sure the branch direction is right.
    517   if (TBB != CmpBB) {
    518     assert(TBB == Tail && "Unexpected TBB");
    519     HeadCmpBBCC = AArch64CC::getInvertedCondCode(HeadCmpBBCC);
    520   }
    521 
    522   CmpBBCond.clear();
    523   TBB = FBB = nullptr;
    524   if (TII->analyzeBranch(*CmpBB, TBB, FBB, CmpBBCond)) {
    525     DEBUG(dbgs() << "CmpBB branch not analyzable.\n");
    526     ++NumCmpBranchRejs;
    527     return false;
    528   }
    529 
    530   if (!TBB || CmpBBCond.empty()) {
    531     DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch in CmpBB.\n");
    532     ++NumCmpBranchRejs;
    533     return false;
    534   }
    535 
    536   if (!parseCond(CmpBBCond, CmpBBTailCC)) {
    537     DEBUG(dbgs() << "Unsupported branch type on CmpBB\n");
    538     ++NumCmpBranchRejs;
    539     return false;
    540   }
    541 
    542   if (TBB != Tail)
    543     CmpBBTailCC = AArch64CC::getInvertedCondCode(CmpBBTailCC);
    544 
    545   DEBUG(dbgs() << "Head->CmpBB on " << AArch64CC::getCondCodeName(HeadCmpBBCC)
    546                << ", CmpBB->Tail on " << AArch64CC::getCondCodeName(CmpBBTailCC)
    547                << '\n');
    548 
    549   CmpMI = findConvertibleCompare(CmpBB);
    550   if (!CmpMI)
    551     return false;
    552 
    553   if (!canSpeculateInstrs(CmpBB, CmpMI)) {
    554     ++NumSpeculateRejs;
    555     return false;
    556   }
    557   return true;
    558 }
    559 
    560 void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
    561   DEBUG(dbgs() << "Merging BB#" << CmpBB->getNumber() << " into BB#"
    562                << Head->getNumber() << ":\n" << *CmpBB);
    563 
    564   // All CmpBB instructions are moved into Head, and CmpBB is deleted.
    565   // Update the CFG first.
    566   updateTailPHIs();
    567   Head->removeSuccessor(CmpBB, true);
    568   CmpBB->removeSuccessor(Tail, true);
    569   Head->transferSuccessorsAndUpdatePHIs(CmpBB);
    570   DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
    571   TII->RemoveBranch(*Head);
    572 
    573   // If the Head terminator was one of the cbz / tbz branches with built-in
    574   // compare, we need to insert an explicit compare instruction in its place.
    575   if (HeadCond[0].getImm() == -1) {
    576     ++NumCompBranches;
    577     unsigned Opc = 0;
    578     switch (HeadCond[1].getImm()) {
    579     case AArch64::CBZW:
    580     case AArch64::CBNZW:
    581       Opc = AArch64::SUBSWri;
    582       break;
    583     case AArch64::CBZX:
    584     case AArch64::CBNZX:
    585       Opc = AArch64::SUBSXri;
    586       break;
    587     default:
    588       llvm_unreachable("Cannot convert Head branch");
    589     }
    590     const MCInstrDesc &MCID = TII->get(Opc);
    591     // Create a dummy virtual register for the SUBS def.
    592     unsigned DestReg =
    593         MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF));
    594     // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz.
    595     BuildMI(*Head, Head->end(), TermDL, MCID)
    596         .addReg(DestReg, RegState::Define | RegState::Dead)
    597         .addOperand(HeadCond[2])
    598         .addImm(0)
    599         .addImm(0);
    600     // SUBS uses the GPR*sp register classes.
    601     MRI->constrainRegClass(HeadCond[2].getReg(),
    602                            TII->getRegClass(MCID, 1, TRI, *MF));
    603   }
    604 
    605   Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end());
    606 
    607   // Now replace CmpMI with a ccmp instruction that also considers the incoming
    608   // flags.
    609   unsigned Opc = 0;
    610   unsigned FirstOp = 1;   // First CmpMI operand to copy.
    611   bool isZBranch = false; // CmpMI is a cbz/cbnz instruction.
    612   switch (CmpMI->getOpcode()) {
    613   default:
    614     llvm_unreachable("Unknown compare opcode");
    615   case AArch64::SUBSWri:    Opc = AArch64::CCMPWi; break;
    616   case AArch64::SUBSWrr:    Opc = AArch64::CCMPWr; break;
    617   case AArch64::SUBSXri:    Opc = AArch64::CCMPXi; break;
    618   case AArch64::SUBSXrr:    Opc = AArch64::CCMPXr; break;
    619   case AArch64::ADDSWri:    Opc = AArch64::CCMNWi; break;
    620   case AArch64::ADDSWrr:    Opc = AArch64::CCMNWr; break;
    621   case AArch64::ADDSXri:    Opc = AArch64::CCMNXi; break;
    622   case AArch64::ADDSXrr:    Opc = AArch64::CCMNXr; break;
    623   case AArch64::FCMPSrr:    Opc = AArch64::FCCMPSrr; FirstOp = 0; break;
    624   case AArch64::FCMPDrr:    Opc = AArch64::FCCMPDrr; FirstOp = 0; break;
    625   case AArch64::FCMPESrr:   Opc = AArch64::FCCMPESrr; FirstOp = 0; break;
    626   case AArch64::FCMPEDrr:   Opc = AArch64::FCCMPEDrr; FirstOp = 0; break;
    627   case AArch64::CBZW:
    628   case AArch64::CBNZW:
    629     Opc = AArch64::CCMPWi;
    630     FirstOp = 0;
    631     isZBranch = true;
    632     break;
    633   case AArch64::CBZX:
    634   case AArch64::CBNZX:
    635     Opc = AArch64::CCMPXi;
    636     FirstOp = 0;
    637     isZBranch = true;
    638     break;
    639   }
    640 
    641   // The ccmp instruction should set the flags according to the comparison when
    642   // Head would have branched to CmpBB.
    643   // The NZCV immediate operand should provide flags for the case where Head
    644   // would have branched to Tail. These flags should cause the new Head
    645   // terminator to branch to tail.
    646   unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC);
    647   const MCInstrDesc &MCID = TII->get(Opc);
    648   MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(),
    649                          TII->getRegClass(MCID, 0, TRI, *MF));
    650   if (CmpMI->getOperand(FirstOp + 1).isReg())
    651     MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(),
    652                            TII->getRegClass(MCID, 1, TRI, *MF));
    653   MachineInstrBuilder MIB =
    654       BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID)
    655           .addOperand(CmpMI->getOperand(FirstOp)); // Register Rn
    656   if (isZBranch)
    657     MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0
    658   else
    659     MIB.addOperand(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate
    660   MIB.addImm(NZCV).addImm(HeadCmpBBCC);
    661 
    662   // If CmpMI was a terminator, we need a new conditional branch to replace it.
    663   // This now becomes a Head terminator.
    664   if (isZBranch) {
    665     bool isNZ = CmpMI->getOpcode() == AArch64::CBNZW ||
    666                 CmpMI->getOpcode() == AArch64::CBNZX;
    667     BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(AArch64::Bcc))
    668         .addImm(isNZ ? AArch64CC::NE : AArch64CC::EQ)
    669         .addOperand(CmpMI->getOperand(1)); // Branch target.
    670   }
    671   CmpMI->eraseFromParent();
    672   Head->updateTerminator();
    673 
    674   RemovedBlocks.push_back(CmpBB);
    675   CmpBB->eraseFromParent();
    676   DEBUG(dbgs() << "Result:\n" << *Head);
    677   ++NumConverted;
    678 }
    679 
    680 int SSACCmpConv::expectedCodeSizeDelta() const {
    681   int delta = 0;
    682   // If the Head terminator was one of the cbz / tbz branches with built-in
    683   // compare, we need to insert an explicit compare instruction in its place
    684   // plus a branch instruction.
    685   if (HeadCond[0].getImm() == -1) {
    686     switch (HeadCond[1].getImm()) {
    687     case AArch64::CBZW:
    688     case AArch64::CBNZW:
    689     case AArch64::CBZX:
    690     case AArch64::CBNZX:
    691       // Therefore delta += 1
    692       delta = 1;
    693       break;
    694     default:
    695       llvm_unreachable("Cannot convert Head branch");
    696     }
    697   }
    698   // If the Cmp terminator was one of the cbz / tbz branches with
    699   // built-in compare, it will be turned into a compare instruction
    700   // into Head, but we do not save any instruction.
    701   // Otherwise, we save the branch instruction.
    702   switch (CmpMI->getOpcode()) {
    703   default:
    704     --delta;
    705     break;
    706   case AArch64::CBZW:
    707   case AArch64::CBNZW:
    708   case AArch64::CBZX:
    709   case AArch64::CBNZX:
    710     break;
    711   }
    712   return delta;
    713 }
    714 
    715 //===----------------------------------------------------------------------===//
    716 //                       AArch64ConditionalCompares Pass
    717 //===----------------------------------------------------------------------===//
    718 
    719 namespace {
    720 class AArch64ConditionalCompares : public MachineFunctionPass {
    721   const TargetInstrInfo *TII;
    722   const TargetRegisterInfo *TRI;
    723   MCSchedModel SchedModel;
    724   // Does the proceeded function has Oz attribute.
    725   bool MinSize;
    726   MachineRegisterInfo *MRI;
    727   MachineDominatorTree *DomTree;
    728   MachineLoopInfo *Loops;
    729   MachineTraceMetrics *Traces;
    730   MachineTraceMetrics::Ensemble *MinInstr;
    731   SSACCmpConv CmpConv;
    732 
    733 public:
    734   static char ID;
    735   AArch64ConditionalCompares() : MachineFunctionPass(ID) {}
    736   void getAnalysisUsage(AnalysisUsage &AU) const override;
    737   bool runOnMachineFunction(MachineFunction &MF) override;
    738   const char *getPassName() const override {
    739     return "AArch64 Conditional Compares";
    740   }
    741 
    742 private:
    743   bool tryConvert(MachineBasicBlock *);
    744   void updateDomTree(ArrayRef<MachineBasicBlock *> Removed);
    745   void updateLoops(ArrayRef<MachineBasicBlock *> Removed);
    746   void invalidateTraces();
    747   bool shouldConvert();
    748 };
    749 } // end anonymous namespace
    750 
    751 char AArch64ConditionalCompares::ID = 0;
    752 
    753 namespace llvm {
    754 void initializeAArch64ConditionalComparesPass(PassRegistry &);
    755 }
    756 
    757 INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp",
    758                       "AArch64 CCMP Pass", false, false)
    759 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
    760 INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
    761 INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp",
    762                     "AArch64 CCMP Pass", false, false)
    763 
    764 FunctionPass *llvm::createAArch64ConditionalCompares() {
    765   return new AArch64ConditionalCompares();
    766 }
    767 
    768 void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
    769   AU.addRequired<MachineDominatorTree>();
    770   AU.addPreserved<MachineDominatorTree>();
    771   AU.addRequired<MachineLoopInfo>();
    772   AU.addPreserved<MachineLoopInfo>();
    773   AU.addRequired<MachineTraceMetrics>();
    774   AU.addPreserved<MachineTraceMetrics>();
    775   MachineFunctionPass::getAnalysisUsage(AU);
    776 }
    777 
    778 /// Update the dominator tree after if-conversion erased some blocks.
    779 void AArch64ConditionalCompares::updateDomTree(
    780     ArrayRef<MachineBasicBlock *> Removed) {
    781   // convert() removes CmpBB which was previously dominated by Head.
    782   // CmpBB children should be transferred to Head.
    783   MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head);
    784   for (MachineBasicBlock *RemovedMBB : Removed) {
    785     MachineDomTreeNode *Node = DomTree->getNode(RemovedMBB);
    786     assert(Node != HeadNode && "Cannot erase the head node");
    787     assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head");
    788     while (Node->getNumChildren())
    789       DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode);
    790     DomTree->eraseNode(RemovedMBB);
    791   }
    792 }
    793 
    794 /// Update LoopInfo after if-conversion.
    795 void
    796 AArch64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
    797   if (!Loops)
    798     return;
    799   for (MachineBasicBlock *RemovedMBB : Removed)
    800     Loops->removeBlock(RemovedMBB);
    801 }
    802 
    803 /// Invalidate MachineTraceMetrics before if-conversion.
    804 void AArch64ConditionalCompares::invalidateTraces() {
    805   Traces->invalidate(CmpConv.Head);
    806   Traces->invalidate(CmpConv.CmpBB);
    807 }
    808 
    809 /// Apply cost model and heuristics to the if-conversion in IfConv.
    810 /// Return true if the conversion is a good idea.
    811 ///
    812 bool AArch64ConditionalCompares::shouldConvert() {
    813   // Stress testing mode disables all cost considerations.
    814   if (Stress)
    815     return true;
    816   if (!MinInstr)
    817     MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
    818 
    819   // Head dominates CmpBB, so it is always included in its trace.
    820   MachineTraceMetrics::Trace Trace = MinInstr->getTrace(CmpConv.CmpBB);
    821 
    822   // If code size is the main concern
    823   if (MinSize) {
    824     int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
    825     DEBUG(dbgs() << "Code size delta:  " << CodeSizeDelta << '\n');
    826     // If we are minimizing the code size, do the conversion whatever
    827     // the cost is.
    828     if (CodeSizeDelta < 0)
    829       return true;
    830     if (CodeSizeDelta > 0) {
    831       DEBUG(dbgs() << "Code size is increasing, give up on this one.\n");
    832       return false;
    833     }
    834     // CodeSizeDelta == 0, continue with the regular heuristics
    835   }
    836 
    837   // Heuristic: The compare conversion delays the execution of the branch
    838   // instruction because we must wait for the inputs to the second compare as
    839   // well. The branch has no dependent instructions, but delaying it increases
    840   // the cost of a misprediction.
    841   //
    842   // Set a limit on the delay we will accept.
    843   unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4;
    844 
    845   // Instruction depths can be computed for all trace instructions above CmpBB.
    846   unsigned HeadDepth =
    847       Trace.getInstrCycles(*CmpConv.Head->getFirstTerminator()).Depth;
    848   unsigned CmpBBDepth =
    849       Trace.getInstrCycles(*CmpConv.CmpBB->getFirstTerminator()).Depth;
    850   DEBUG(dbgs() << "Head depth:  " << HeadDepth
    851                << "\nCmpBB depth: " << CmpBBDepth << '\n');
    852   if (CmpBBDepth > HeadDepth + DelayLimit) {
    853     DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit
    854                  << " cycles.\n");
    855     return false;
    856   }
    857 
    858   // Check the resource depth at the bottom of CmpBB - these instructions will
    859   // be speculated.
    860   unsigned ResDepth = Trace.getResourceDepth(true);
    861   DEBUG(dbgs() << "Resources:   " << ResDepth << '\n');
    862 
    863   // Heuristic: The speculatively executed instructions must all be able to
    864   // merge into the Head block. The Head critical path should dominate the
    865   // resource cost of the speculated instructions.
    866   if (ResDepth > HeadDepth) {
    867     DEBUG(dbgs() << "Too many instructions to speculate.\n");
    868     return false;
    869   }
    870   return true;
    871 }
    872 
    873 bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
    874   bool Changed = false;
    875   while (CmpConv.canConvert(MBB) && shouldConvert()) {
    876     invalidateTraces();
    877     SmallVector<MachineBasicBlock *, 4> RemovedBlocks;
    878     CmpConv.convert(RemovedBlocks);
    879     Changed = true;
    880     updateDomTree(RemovedBlocks);
    881     updateLoops(RemovedBlocks);
    882   }
    883   return Changed;
    884 }
    885 
    886 bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
    887   DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
    888                << "********** Function: " << MF.getName() << '\n');
    889   if (skipFunction(*MF.getFunction()))
    890     return false;
    891 
    892   TII = MF.getSubtarget().getInstrInfo();
    893   TRI = MF.getSubtarget().getRegisterInfo();
    894   SchedModel = MF.getSubtarget().getSchedModel();
    895   MRI = &MF.getRegInfo();
    896   DomTree = &getAnalysis<MachineDominatorTree>();
    897   Loops = getAnalysisIfAvailable<MachineLoopInfo>();
    898   Traces = &getAnalysis<MachineTraceMetrics>();
    899   MinInstr = nullptr;
    900   MinSize = MF.getFunction()->optForMinSize();
    901 
    902   bool Changed = false;
    903   CmpConv.runOnMachineFunction(MF);
    904 
    905   // Visit blocks in dominator tree pre-order. The pre-order enables multiple
    906   // cmp-conversions from the same head block.
    907   // Note that updateDomTree() modifies the children of the DomTree node
    908   // currently being visited. The df_iterator supports that; it doesn't look at
    909   // child_begin() / child_end() until after a node has been visited.
    910   for (auto *I : depth_first(DomTree))
    911     if (tryConvert(I->getBlock()))
    912       Changed = true;
    913 
    914   return Changed;
    915 }
    916