Home | History | Annotate | Download | only in X86
      1 //====- X86FlagsCopyLowering.cpp - Lowers COPY nodes of EFLAGS ------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 /// \file
     10 ///
     11 /// Lowers COPY nodes of EFLAGS by directly extracting and preserving individual
     12 /// flag bits.
     13 ///
     14 /// We have to do this by carefully analyzing and rewriting the usage of the
     15 /// copied EFLAGS register because there is no general way to rematerialize the
     16 /// entire EFLAGS register safely and efficiently. Using `popf` both forces
     17 /// dynamic stack adjustment and can create correctness issues due to IF, TF,
     18 /// and other non-status flags being overwritten. Using sequences involving
     19 /// SAHF don't work on all x86 processors and are often quite slow compared to
     20 /// directly testing a single status preserved in its own GPR.
     21 ///
     22 //===----------------------------------------------------------------------===//
     23 
     24 #include "X86.h"
     25 #include "X86InstrBuilder.h"
     26 #include "X86InstrInfo.h"
     27 #include "X86Subtarget.h"
     28 #include "llvm/ADT/ArrayRef.h"
     29 #include "llvm/ADT/DenseMap.h"
     30 #include "llvm/ADT/PostOrderIterator.h"
     31 #include "llvm/ADT/STLExtras.h"
     32 #include "llvm/ADT/ScopeExit.h"
     33 #include "llvm/ADT/SmallPtrSet.h"
     34 #include "llvm/ADT/SmallSet.h"
     35 #include "llvm/ADT/SmallVector.h"
     36 #include "llvm/ADT/SparseBitVector.h"
     37 #include "llvm/ADT/Statistic.h"
     38 #include "llvm/CodeGen/MachineBasicBlock.h"
     39 #include "llvm/CodeGen/MachineConstantPool.h"
     40 #include "llvm/CodeGen/MachineDominators.h"
     41 #include "llvm/CodeGen/MachineFunction.h"
     42 #include "llvm/CodeGen/MachineFunctionPass.h"
     43 #include "llvm/CodeGen/MachineInstr.h"
     44 #include "llvm/CodeGen/MachineInstrBuilder.h"
     45 #include "llvm/CodeGen/MachineModuleInfo.h"
     46 #include "llvm/CodeGen/MachineOperand.h"
     47 #include "llvm/CodeGen/MachineRegisterInfo.h"
     48 #include "llvm/CodeGen/MachineSSAUpdater.h"
     49 #include "llvm/CodeGen/TargetInstrInfo.h"
     50 #include "llvm/CodeGen/TargetRegisterInfo.h"
     51 #include "llvm/CodeGen/TargetSchedule.h"
     52 #include "llvm/CodeGen/TargetSubtargetInfo.h"
     53 #include "llvm/IR/DebugLoc.h"
     54 #include "llvm/MC/MCSchedule.h"
     55 #include "llvm/Pass.h"
     56 #include "llvm/Support/CommandLine.h"
     57 #include "llvm/Support/Debug.h"
     58 #include "llvm/Support/raw_ostream.h"
     59 #include <algorithm>
     60 #include <cassert>
     61 #include <iterator>
     62 #include <utility>
     63 
     64 using namespace llvm;
     65 
     66 #define PASS_KEY "x86-flags-copy-lowering"
     67 #define DEBUG_TYPE PASS_KEY
     68 
     69 STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated");
     70 STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted");
     71 STATISTIC(NumTestsInserted, "Number of test instructions inserted");
     72 STATISTIC(NumAddsInserted, "Number of adds instructions inserted");
     73 
     74 namespace llvm {
     75 
     76 void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
     77 
     78 } // end namespace llvm
     79 
     80 namespace {
     81 
     82 // Convenient array type for storing registers associated with each condition.
     83 using CondRegArray = std::array<unsigned, X86::LAST_VALID_COND + 1>;
     84 
     85 class X86FlagsCopyLoweringPass : public MachineFunctionPass {
     86 public:
     87   X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) {
     88     initializeX86FlagsCopyLoweringPassPass(*PassRegistry::getPassRegistry());
     89   }
     90 
     91   StringRef getPassName() const override { return "X86 EFLAGS copy lowering"; }
     92   bool runOnMachineFunction(MachineFunction &MF) override;
     93   void getAnalysisUsage(AnalysisUsage &AU) const override;
     94 
     95   /// Pass identification, replacement for typeid.
     96   static char ID;
     97 
     98 private:
     99   MachineRegisterInfo *MRI;
    100   const X86Subtarget *Subtarget;
    101   const X86InstrInfo *TII;
    102   const TargetRegisterInfo *TRI;
    103   const TargetRegisterClass *PromoteRC;
    104   MachineDominatorTree *MDT;
    105 
    106   CondRegArray collectCondsInRegs(MachineBasicBlock &MBB,
    107                                   MachineBasicBlock::iterator CopyDefI);
    108 
    109   unsigned promoteCondToReg(MachineBasicBlock &MBB,
    110                             MachineBasicBlock::iterator TestPos,
    111                             DebugLoc TestLoc, X86::CondCode Cond);
    112   std::pair<unsigned, bool>
    113   getCondOrInverseInReg(MachineBasicBlock &TestMBB,
    114                         MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
    115                         X86::CondCode Cond, CondRegArray &CondRegs);
    116   void insertTest(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos,
    117                   DebugLoc Loc, unsigned Reg);
    118 
    119   void rewriteArithmetic(MachineBasicBlock &TestMBB,
    120                          MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
    121                          MachineInstr &MI, MachineOperand &FlagUse,
    122                          CondRegArray &CondRegs);
    123   void rewriteCMov(MachineBasicBlock &TestMBB,
    124                    MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
    125                    MachineInstr &CMovI, MachineOperand &FlagUse,
    126                    CondRegArray &CondRegs);
    127   void rewriteCondJmp(MachineBasicBlock &TestMBB,
    128                       MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
    129                       MachineInstr &JmpI, CondRegArray &CondRegs);
    130   void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse,
    131                    MachineInstr &CopyDefI);
    132   void rewriteSetCarryExtended(MachineBasicBlock &TestMBB,
    133                                MachineBasicBlock::iterator TestPos,
    134                                DebugLoc TestLoc, MachineInstr &SetBI,
    135                                MachineOperand &FlagUse, CondRegArray &CondRegs);
    136   void rewriteSetCC(MachineBasicBlock &TestMBB,
    137                     MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
    138                     MachineInstr &SetCCI, MachineOperand &FlagUse,
    139                     CondRegArray &CondRegs);
    140 };
    141 
    142 } // end anonymous namespace
    143 
    144 INITIALIZE_PASS_BEGIN(X86FlagsCopyLoweringPass, DEBUG_TYPE,
    145                       "X86 EFLAGS copy lowering", false, false)
    146 INITIALIZE_PASS_END(X86FlagsCopyLoweringPass, DEBUG_TYPE,
    147                     "X86 EFLAGS copy lowering", false, false)
    148 
    149 FunctionPass *llvm::createX86FlagsCopyLoweringPass() {
    150   return new X86FlagsCopyLoweringPass();
    151 }
    152 
    153 char X86FlagsCopyLoweringPass::ID = 0;
    154 
    155 void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const {
    156   AU.addRequired<MachineDominatorTree>();
    157   MachineFunctionPass::getAnalysisUsage(AU);
    158 }
    159 
    160 namespace {
    161 /// An enumeration of the arithmetic instruction mnemonics which have
    162 /// interesting flag semantics.
    163 ///
    164 /// We can map instruction opcodes into these mnemonics to make it easy to
    165 /// dispatch with specific functionality.
    166 enum class FlagArithMnemonic {
    167   ADC,
    168   ADCX,
    169   ADOX,
    170   RCL,
    171   RCR,
    172   SBB,
    173 };
    174 } // namespace
    175 
    176 static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) {
    177   switch (Opcode) {
    178   default:
    179     report_fatal_error("No support for lowering a copy into EFLAGS when used "
    180                        "by this instruction!");
    181 
    182 #define LLVM_EXPAND_INSTR_SIZES(MNEMONIC, SUFFIX)                              \
    183   case X86::MNEMONIC##8##SUFFIX:                                               \
    184   case X86::MNEMONIC##16##SUFFIX:                                              \
    185   case X86::MNEMONIC##32##SUFFIX:                                              \
    186   case X86::MNEMONIC##64##SUFFIX:
    187 
    188 #define LLVM_EXPAND_ADC_SBB_INSTR(MNEMONIC)                                    \
    189   LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr)                                        \
    190   LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr_REV)                                    \
    191   LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rm)                                        \
    192   LLVM_EXPAND_INSTR_SIZES(MNEMONIC, mr)                                        \
    193   case X86::MNEMONIC##8ri:                                                     \
    194   case X86::MNEMONIC##16ri8:                                                   \
    195   case X86::MNEMONIC##32ri8:                                                   \
    196   case X86::MNEMONIC##64ri8:                                                   \
    197   case X86::MNEMONIC##16ri:                                                    \
    198   case X86::MNEMONIC##32ri:                                                    \
    199   case X86::MNEMONIC##64ri32:                                                  \
    200   case X86::MNEMONIC##8mi:                                                     \
    201   case X86::MNEMONIC##16mi8:                                                   \
    202   case X86::MNEMONIC##32mi8:                                                   \
    203   case X86::MNEMONIC##64mi8:                                                   \
    204   case X86::MNEMONIC##16mi:                                                    \
    205   case X86::MNEMONIC##32mi:                                                    \
    206   case X86::MNEMONIC##64mi32:                                                  \
    207   case X86::MNEMONIC##8i8:                                                     \
    208   case X86::MNEMONIC##16i16:                                                   \
    209   case X86::MNEMONIC##32i32:                                                   \
    210   case X86::MNEMONIC##64i32:
    211 
    212     LLVM_EXPAND_ADC_SBB_INSTR(ADC)
    213     return FlagArithMnemonic::ADC;
    214 
    215     LLVM_EXPAND_ADC_SBB_INSTR(SBB)
    216     return FlagArithMnemonic::SBB;
    217 
    218 #undef LLVM_EXPAND_ADC_SBB_INSTR
    219 
    220     LLVM_EXPAND_INSTR_SIZES(RCL, rCL)
    221     LLVM_EXPAND_INSTR_SIZES(RCL, r1)
    222     LLVM_EXPAND_INSTR_SIZES(RCL, ri)
    223     return FlagArithMnemonic::RCL;
    224 
    225     LLVM_EXPAND_INSTR_SIZES(RCR, rCL)
    226     LLVM_EXPAND_INSTR_SIZES(RCR, r1)
    227     LLVM_EXPAND_INSTR_SIZES(RCR, ri)
    228     return FlagArithMnemonic::RCR;
    229 
    230 #undef LLVM_EXPAND_INSTR_SIZES
    231 
    232   case X86::ADCX32rr:
    233   case X86::ADCX64rr:
    234   case X86::ADCX32rm:
    235   case X86::ADCX64rm:
    236     return FlagArithMnemonic::ADCX;
    237 
    238   case X86::ADOX32rr:
    239   case X86::ADOX64rr:
    240   case X86::ADOX32rm:
    241   case X86::ADOX64rm:
    242     return FlagArithMnemonic::ADOX;
    243   }
    244 }
    245 
    246 static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB,
    247                                      MachineInstr &SplitI,
    248                                      const X86InstrInfo &TII) {
    249   MachineFunction &MF = *MBB.getParent();
    250 
    251   assert(SplitI.getParent() == &MBB &&
    252          "Split instruction must be in the split block!");
    253   assert(SplitI.isBranch() &&
    254          "Only designed to split a tail of branch instructions!");
    255   assert(X86::getCondFromBranchOpc(SplitI.getOpcode()) != X86::COND_INVALID &&
    256          "Must split on an actual jCC instruction!");
    257 
    258   // Dig out the previous instruction to the split point.
    259   MachineInstr &PrevI = *std::prev(SplitI.getIterator());
    260   assert(PrevI.isBranch() && "Must split after a branch!");
    261   assert(X86::getCondFromBranchOpc(PrevI.getOpcode()) != X86::COND_INVALID &&
    262          "Must split after an actual jCC instruction!");
    263   assert(!std::prev(PrevI.getIterator())->isTerminator() &&
    264          "Must only have this one terminator prior to the split!");
    265 
    266   // Grab the one successor edge that will stay in `MBB`.
    267   MachineBasicBlock &UnsplitSucc = *PrevI.getOperand(0).getMBB();
    268 
    269   // Analyze the original block to see if we are actually splitting an edge
    270   // into two edges. This can happen when we have multiple conditional jumps to
    271   // the same successor.
    272   bool IsEdgeSplit =
    273       std::any_of(SplitI.getIterator(), MBB.instr_end(),
    274                   [&](MachineInstr &MI) {
    275                     assert(MI.isTerminator() &&
    276                            "Should only have spliced terminators!");
    277                     return llvm::any_of(
    278                         MI.operands(), [&](MachineOperand &MOp) {
    279                           return MOp.isMBB() && MOp.getMBB() == &UnsplitSucc;
    280                         });
    281                   }) ||
    282       MBB.getFallThrough() == &UnsplitSucc;
    283 
    284   MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock();
    285 
    286   // Insert the new block immediately after the current one. Any existing
    287   // fallthrough will be sunk into this new block anyways.
    288   MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
    289 
    290   // Splice the tail of instructions into the new block.
    291   NewMBB.splice(NewMBB.end(), &MBB, SplitI.getIterator(), MBB.end());
    292 
    293   // Copy the necessary succesors (and their probability info) into the new
    294   // block.
    295   for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI)
    296     if (IsEdgeSplit || *SI != &UnsplitSucc)
    297       NewMBB.copySuccessor(&MBB, SI);
    298   // Normalize the probabilities if we didn't end up splitting the edge.
    299   if (!IsEdgeSplit)
    300     NewMBB.normalizeSuccProbs();
    301 
    302   // Now replace all of the moved successors in the original block with the new
    303   // block. This will merge their probabilities.
    304   for (MachineBasicBlock *Succ : NewMBB.successors())
    305     if (Succ != &UnsplitSucc)
    306       MBB.replaceSuccessor(Succ, &NewMBB);
    307 
    308   // We should always end up replacing at least one successor.
    309   assert(MBB.isSuccessor(&NewMBB) &&
    310          "Failed to make the new block a successor!");
    311 
    312   // Now update all the PHIs.
    313   for (MachineBasicBlock *Succ : NewMBB.successors()) {
    314     for (MachineInstr &MI : *Succ) {
    315       if (!MI.isPHI())
    316         break;
    317 
    318       for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
    319            OpIdx += 2) {
    320         MachineOperand &OpV = MI.getOperand(OpIdx);
    321         MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
    322         assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
    323         if (OpMBB.getMBB() != &MBB)
    324           continue;
    325 
    326         // Replace the operand for unsplit successors
    327         if (!IsEdgeSplit || Succ != &UnsplitSucc) {
    328           OpMBB.setMBB(&NewMBB);
    329 
    330           // We have to continue scanning as there may be multiple entries in
    331           // the PHI.
    332           continue;
    333         }
    334 
    335         // When we have split the edge append a new successor.
    336         MI.addOperand(MF, OpV);
    337         MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
    338         break;
    339       }
    340     }
    341   }
    342 
    343   return NewMBB;
    344 }
    345 
    346 bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
    347   LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
    348                     << " **********\n");
    349 
    350   Subtarget = &MF.getSubtarget<X86Subtarget>();
    351   MRI = &MF.getRegInfo();
    352   TII = Subtarget->getInstrInfo();
    353   TRI = Subtarget->getRegisterInfo();
    354   MDT = &getAnalysis<MachineDominatorTree>();
    355   PromoteRC = &X86::GR8RegClass;
    356 
    357   if (MF.begin() == MF.end())
    358     // Nothing to do for a degenerate empty function...
    359     return false;
    360 
    361   // Collect the copies in RPO so that when there are chains where a copy is in
    362   // turn copied again we visit the first one first. This ensures we can find
    363   // viable locations for testing the original EFLAGS that dominate all the
    364   // uses across complex CFGs.
    365   SmallVector<MachineInstr *, 4> Copies;
    366   ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
    367   for (MachineBasicBlock *MBB : RPOT)
    368     for (MachineInstr &MI : *MBB)
    369       if (MI.getOpcode() == TargetOpcode::COPY &&
    370           MI.getOperand(0).getReg() == X86::EFLAGS)
    371         Copies.push_back(&MI);
    372 
    373   for (MachineInstr *CopyI : Copies) {
    374     MachineBasicBlock &MBB = *CopyI->getParent();
    375 
    376     MachineOperand &VOp = CopyI->getOperand(1);
    377     assert(VOp.isReg() &&
    378            "The input to the copy for EFLAGS should always be a register!");
    379     MachineInstr &CopyDefI = *MRI->getVRegDef(VOp.getReg());
    380     if (CopyDefI.getOpcode() != TargetOpcode::COPY) {
    381       // FIXME: The big likely candidate here are PHI nodes. We could in theory
    382       // handle PHI nodes, but it gets really, really hard. Insanely hard. Hard
    383       // enough that it is probably better to change every other part of LLVM
    384       // to avoid creating them. The issue is that once we have PHIs we won't
    385       // know which original EFLAGS value we need to capture with our setCCs
    386       // below. The end result will be computing a complete set of setCCs that
    387       // we *might* want, computing them in every place where we copy *out* of
    388       // EFLAGS and then doing SSA formation on all of them to insert necessary
    389       // PHI nodes and consume those here. Then hoping that somehow we DCE the
    390       // unnecessary ones. This DCE seems very unlikely to be successful and so
    391       // we will almost certainly end up with a glut of dead setCC
    392       // instructions. Until we have a motivating test case and fail to avoid
    393       // it by changing other parts of LLVM's lowering, we refuse to handle
    394       // this complex case here.
    395       LLVM_DEBUG(
    396           dbgs() << "ERROR: Encountered unexpected def of an eflags copy: ";
    397           CopyDefI.dump());
    398       report_fatal_error(
    399           "Cannot lower EFLAGS copy unless it is defined in turn by a copy!");
    400     }
    401 
    402     auto Cleanup = make_scope_exit([&] {
    403       // All uses of the EFLAGS copy are now rewritten, kill the copy into
    404       // eflags and if dead the copy from.
    405       CopyI->eraseFromParent();
    406       if (MRI->use_empty(CopyDefI.getOperand(0).getReg()))
    407         CopyDefI.eraseFromParent();
    408       ++NumCopiesEliminated;
    409     });
    410 
    411     MachineOperand &DOp = CopyI->getOperand(0);
    412     assert(DOp.isDef() && "Expected register def!");
    413     assert(DOp.getReg() == X86::EFLAGS && "Unexpected copy def register!");
    414     if (DOp.isDead())
    415       continue;
    416 
    417     MachineBasicBlock *TestMBB = CopyDefI.getParent();
    418     auto TestPos = CopyDefI.getIterator();
    419     DebugLoc TestLoc = CopyDefI.getDebugLoc();
    420 
    421     LLVM_DEBUG(dbgs() << "Rewriting copy: "; CopyI->dump());
    422 
    423     // Walk up across live-in EFLAGS to find where they were actually def'ed.
    424     //
    425     // This copy's def may just be part of a region of blocks covered by
    426     // a single def of EFLAGS and we want to find the top of that region where
    427     // possible.
    428     //
    429     // This is essentially a search for a *candidate* reaching definition
    430     // location. We don't need to ever find the actual reaching definition here,
    431     // but we want to walk up the dominator tree to find the highest point which
    432     // would be viable for such a definition.
    433     auto HasEFLAGSClobber = [&](MachineBasicBlock::iterator Begin,
    434                                 MachineBasicBlock::iterator End) {
    435       // Scan backwards as we expect these to be relatively short and often find
    436       // a clobber near the end.
    437       return llvm::any_of(
    438           llvm::reverse(llvm::make_range(Begin, End)), [&](MachineInstr &MI) {
    439             // Flag any instruction (other than the copy we are
    440             // currently rewriting) that defs EFLAGS.
    441             return &MI != CopyI && MI.findRegisterDefOperand(X86::EFLAGS);
    442           });
    443     };
    444     auto HasEFLAGSClobberPath = [&](MachineBasicBlock *BeginMBB,
    445                                     MachineBasicBlock *EndMBB) {
    446       assert(MDT->dominates(BeginMBB, EndMBB) &&
    447              "Only support paths down the dominator tree!");
    448       SmallPtrSet<MachineBasicBlock *, 4> Visited;
    449       SmallVector<MachineBasicBlock *, 4> Worklist;
    450       // We terminate at the beginning. No need to scan it.
    451       Visited.insert(BeginMBB);
    452       Worklist.push_back(EndMBB);
    453       do {
    454         auto *MBB = Worklist.pop_back_val();
    455         for (auto *PredMBB : MBB->predecessors()) {
    456           if (!Visited.insert(PredMBB).second)
    457             continue;
    458           if (HasEFLAGSClobber(PredMBB->begin(), PredMBB->end()))
    459             return true;
    460           // Enqueue this block to walk its predecessors.
    461           Worklist.push_back(PredMBB);
    462         }
    463       } while (!Worklist.empty());
    464       // No clobber found along a path from the begin to end.
    465       return false;
    466     };
    467     while (TestMBB->isLiveIn(X86::EFLAGS) && !TestMBB->pred_empty() &&
    468            !HasEFLAGSClobber(TestMBB->begin(), TestPos)) {
    469       // Find the nearest common dominator of the predecessors, as
    470       // that will be the best candidate to hoist into.
    471       MachineBasicBlock *HoistMBB =
    472           std::accumulate(std::next(TestMBB->pred_begin()), TestMBB->pred_end(),
    473                           *TestMBB->pred_begin(),
    474                           [&](MachineBasicBlock *LHS, MachineBasicBlock *RHS) {
    475                             return MDT->findNearestCommonDominator(LHS, RHS);
    476                           });
    477 
    478       // Now we need to scan all predecessors that may be reached along paths to
    479       // the hoist block. A clobber anywhere in any of these blocks the hoist.
    480       // Note that this even handles loops because we require *no* clobbers.
    481       if (HasEFLAGSClobberPath(HoistMBB, TestMBB))
    482         break;
    483 
    484       // We also need the terminators to not sneakily clobber flags.
    485       if (HasEFLAGSClobber(HoistMBB->getFirstTerminator()->getIterator(),
    486                            HoistMBB->instr_end()))
    487         break;
    488 
    489       // We found a viable location, hoist our test position to it.
    490       TestMBB = HoistMBB;
    491       TestPos = TestMBB->getFirstTerminator()->getIterator();
    492       // Clear the debug location as it would just be confusing after hoisting.
    493       TestLoc = DebugLoc();
    494     }
    495     LLVM_DEBUG({
    496       auto DefIt = llvm::find_if(
    497           llvm::reverse(llvm::make_range(TestMBB->instr_begin(), TestPos)),
    498           [&](MachineInstr &MI) {
    499             return MI.findRegisterDefOperand(X86::EFLAGS);
    500           });
    501       if (DefIt.base() != TestMBB->instr_begin()) {
    502         dbgs() << "  Using EFLAGS defined by: ";
    503         DefIt->dump();
    504       } else {
    505         dbgs() << "  Using live-in flags for BB:\n";
    506         TestMBB->dump();
    507       }
    508     });
    509 
    510     // While rewriting uses, we buffer jumps and rewrite them in a second pass
    511     // because doing so will perturb the CFG that we are walking to find the
    512     // uses in the first place.
    513     SmallVector<MachineInstr *, 4> JmpIs;
    514 
    515     // Gather the condition flags that have already been preserved in
    516     // registers. We do this from scratch each time as we expect there to be
    517     // very few of them and we expect to not revisit the same copy definition
    518     // many times. If either of those change sufficiently we could build a map
    519     // of these up front instead.
    520     CondRegArray CondRegs = collectCondsInRegs(*TestMBB, TestPos);
    521 
    522     // Collect the basic blocks we need to scan. Typically this will just be
    523     // a single basic block but we may have to scan multiple blocks if the
    524     // EFLAGS copy lives into successors.
    525     SmallVector<MachineBasicBlock *, 2> Blocks;
    526     SmallPtrSet<MachineBasicBlock *, 2> VisitedBlocks;
    527     Blocks.push_back(&MBB);
    528 
    529     do {
    530       MachineBasicBlock &UseMBB = *Blocks.pop_back_val();
    531 
    532       // Track when if/when we find a kill of the flags in this block.
    533       bool FlagsKilled = false;
    534 
    535       // In most cases, we walk from the beginning to the end of the block. But
    536       // when the block is the same block as the copy is from, we will visit it
    537       // twice. The first time we start from the copy and go to the end. The
    538       // second time we start from the beginning and go to the copy. This lets
    539       // us handle copies inside of cycles.
    540       // FIXME: This loop is *super* confusing. This is at least in part
    541       // a symptom of all of this routine needing to be refactored into
    542       // documentable components. Once done, there may be a better way to write
    543       // this loop.
    544       for (auto MII = (&UseMBB == &MBB && !VisitedBlocks.count(&UseMBB))
    545                           ? std::next(CopyI->getIterator())
    546                           : UseMBB.instr_begin(),
    547                 MIE = UseMBB.instr_end();
    548            MII != MIE;) {
    549         MachineInstr &MI = *MII++;
    550         // If we are in the original copy block and encounter either the copy
    551         // def or the copy itself, break so that we don't re-process any part of
    552         // the block or process the instructions in the range that was copied
    553         // over.
    554         if (&MI == CopyI || &MI == &CopyDefI) {
    555           assert(&UseMBB == &MBB && VisitedBlocks.count(&MBB) &&
    556                  "Should only encounter these on the second pass over the "
    557                  "original block.");
    558           break;
    559         }
    560 
    561         MachineOperand *FlagUse = MI.findRegisterUseOperand(X86::EFLAGS);
    562         if (!FlagUse) {
    563           if (MI.findRegisterDefOperand(X86::EFLAGS)) {
    564             // If EFLAGS are defined, it's as-if they were killed. We can stop
    565             // scanning here.
    566             //
    567             // NB!!! Many instructions only modify some flags. LLVM currently
    568             // models this as clobbering all flags, but if that ever changes
    569             // this will need to be carefully updated to handle that more
    570             // complex logic.
    571             FlagsKilled = true;
    572             break;
    573           }
    574           continue;
    575         }
    576 
    577         LLVM_DEBUG(dbgs() << "  Rewriting use: "; MI.dump());
    578 
    579         // Check the kill flag before we rewrite as that may change it.
    580         if (FlagUse->isKill())
    581           FlagsKilled = true;
    582 
    583         // Once we encounter a branch, the rest of the instructions must also be
    584         // branches. We can't rewrite in place here, so we handle them below.
    585         //
    586         // Note that we don't have to handle tail calls here, even conditional
    587         // tail calls, as those are not introduced into the X86 MI until post-RA
    588         // branch folding or black placement. As a consequence, we get to deal
    589         // with the simpler formulation of conditional branches followed by tail
    590         // calls.
    591         if (X86::getCondFromBranchOpc(MI.getOpcode()) != X86::COND_INVALID) {
    592           auto JmpIt = MI.getIterator();
    593           do {
    594             JmpIs.push_back(&*JmpIt);
    595             ++JmpIt;
    596           } while (JmpIt != UseMBB.instr_end() &&
    597                    X86::getCondFromBranchOpc(JmpIt->getOpcode()) !=
    598                        X86::COND_INVALID);
    599           break;
    600         }
    601 
    602         // Otherwise we can just rewrite in-place.
    603         if (X86::getCondFromCMovOpc(MI.getOpcode()) != X86::COND_INVALID) {
    604           rewriteCMov(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
    605         } else if (X86::getCondFromSETOpc(MI.getOpcode()) !=
    606                    X86::COND_INVALID) {
    607           rewriteSetCC(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
    608         } else if (MI.getOpcode() == TargetOpcode::COPY) {
    609           rewriteCopy(MI, *FlagUse, CopyDefI);
    610         } else {
    611           // We assume all other instructions that use flags also def them.
    612           assert(MI.findRegisterDefOperand(X86::EFLAGS) &&
    613                  "Expected a def of EFLAGS for this instruction!");
    614 
    615           // NB!!! Several arithmetic instructions only *partially* update
    616           // flags. Theoretically, we could generate MI code sequences that
    617           // would rely on this fact and observe different flags independently.
    618           // But currently LLVM models all of these instructions as clobbering
    619           // all the flags in an undef way. We rely on that to simplify the
    620           // logic.
    621           FlagsKilled = true;
    622 
    623           switch (MI.getOpcode()) {
    624           case X86::SETB_C8r:
    625           case X86::SETB_C16r:
    626           case X86::SETB_C32r:
    627           case X86::SETB_C64r:
    628             // Use custom lowering for arithmetic that is merely extending the
    629             // carry flag. We model this as the SETB_C* pseudo instructions.
    630             rewriteSetCarryExtended(*TestMBB, TestPos, TestLoc, MI, *FlagUse,
    631                                     CondRegs);
    632             break;
    633 
    634           default:
    635             // Generically handle remaining uses as arithmetic instructions.
    636             rewriteArithmetic(*TestMBB, TestPos, TestLoc, MI, *FlagUse,
    637                               CondRegs);
    638             break;
    639           }
    640           break;
    641         }
    642 
    643         // If this was the last use of the flags, we're done.
    644         if (FlagsKilled)
    645           break;
    646       }
    647 
    648       // If the flags were killed, we're done with this block.
    649       if (FlagsKilled)
    650         continue;
    651 
    652       // Otherwise we need to scan successors for ones where the flags live-in
    653       // and queue those up for processing.
    654       for (MachineBasicBlock *SuccMBB : UseMBB.successors())
    655         if (SuccMBB->isLiveIn(X86::EFLAGS) &&
    656             VisitedBlocks.insert(SuccMBB).second) {
    657           // We currently don't do any PHI insertion and so we require that the
    658           // test basic block dominates all of the use basic blocks. Further, we
    659           // can't have a cycle from the test block back to itself as that would
    660           // create a cycle requiring a PHI to break it.
    661           //
    662           // We could in theory do PHI insertion here if it becomes useful by
    663           // just taking undef values in along every edge that we don't trace
    664           // this EFLAGS copy along. This isn't as bad as fully general PHI
    665           // insertion, but still seems like a great deal of complexity.
    666           //
    667           // Because it is theoretically possible that some earlier MI pass or
    668           // other lowering transformation could induce this to happen, we do
    669           // a hard check even in non-debug builds here.
    670           if (SuccMBB == TestMBB || !MDT->dominates(TestMBB, SuccMBB)) {
    671             LLVM_DEBUG({
    672               dbgs()
    673                   << "ERROR: Encountered use that is not dominated by our test "
    674                      "basic block! Rewriting this would require inserting PHI "
    675                      "nodes to track the flag state across the CFG.\n\nTest "
    676                      "block:\n";
    677               TestMBB->dump();
    678               dbgs() << "Use block:\n";
    679               SuccMBB->dump();
    680             });
    681             report_fatal_error(
    682                 "Cannot lower EFLAGS copy when original copy def "
    683                 "does not dominate all uses.");
    684           }
    685 
    686           Blocks.push_back(SuccMBB);
    687         }
    688     } while (!Blocks.empty());
    689 
    690     // Now rewrite the jumps that use the flags. These we handle specially
    691     // because if there are multiple jumps in a single basic block we'll have
    692     // to do surgery on the CFG.
    693     MachineBasicBlock *LastJmpMBB = nullptr;
    694     for (MachineInstr *JmpI : JmpIs) {
    695       // Past the first jump within a basic block we need to split the blocks
    696       // apart.
    697       if (JmpI->getParent() == LastJmpMBB)
    698         splitBlock(*JmpI->getParent(), *JmpI, *TII);
    699       else
    700         LastJmpMBB = JmpI->getParent();
    701 
    702       rewriteCondJmp(*TestMBB, TestPos, TestLoc, *JmpI, CondRegs);
    703     }
    704 
    705     // FIXME: Mark the last use of EFLAGS before the copy's def as a kill if
    706     // the copy's def operand is itself a kill.
    707   }
    708 
    709 #ifndef NDEBUG
    710   for (MachineBasicBlock &MBB : MF)
    711     for (MachineInstr &MI : MBB)
    712       if (MI.getOpcode() == TargetOpcode::COPY &&
    713           (MI.getOperand(0).getReg() == X86::EFLAGS ||
    714            MI.getOperand(1).getReg() == X86::EFLAGS)) {
    715         LLVM_DEBUG(dbgs() << "ERROR: Found a COPY involving EFLAGS: ";
    716                    MI.dump());
    717         llvm_unreachable("Unlowered EFLAGS copy!");
    718       }
    719 #endif
    720 
    721   return true;
    722 }
    723 
    724 /// Collect any conditions that have already been set in registers so that we
    725 /// can re-use them rather than adding duplicates.
    726 CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs(
    727     MachineBasicBlock &MBB, MachineBasicBlock::iterator TestPos) {
    728   CondRegArray CondRegs = {};
    729 
    730   // Scan backwards across the range of instructions with live EFLAGS.
    731   for (MachineInstr &MI :
    732        llvm::reverse(llvm::make_range(MBB.begin(), TestPos))) {
    733     X86::CondCode Cond = X86::getCondFromSETOpc(MI.getOpcode());
    734     if (Cond != X86::COND_INVALID && !MI.mayStore() && MI.getOperand(0).isReg() &&
    735         TRI->isVirtualRegister(MI.getOperand(0).getReg())) {
    736       assert(MI.getOperand(0).isDef() &&
    737              "A non-storing SETcc should always define a register!");
    738       CondRegs[Cond] = MI.getOperand(0).getReg();
    739     }
    740 
    741     // Stop scanning when we see the first definition of the EFLAGS as prior to
    742     // this we would potentially capture the wrong flag state.
    743     if (MI.findRegisterDefOperand(X86::EFLAGS))
    744       break;
    745   }
    746   return CondRegs;
    747 }
    748 
    749 unsigned X86FlagsCopyLoweringPass::promoteCondToReg(
    750     MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
    751     DebugLoc TestLoc, X86::CondCode Cond) {
    752   unsigned Reg = MRI->createVirtualRegister(PromoteRC);
    753   auto SetI = BuildMI(TestMBB, TestPos, TestLoc,
    754                       TII->get(X86::getSETFromCond(Cond)), Reg);
    755   (void)SetI;
    756   LLVM_DEBUG(dbgs() << "    save cond: "; SetI->dump());
    757   ++NumSetCCsInserted;
    758   return Reg;
    759 }
    760 
    761 std::pair<unsigned, bool> X86FlagsCopyLoweringPass::getCondOrInverseInReg(
    762     MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
    763     DebugLoc TestLoc, X86::CondCode Cond, CondRegArray &CondRegs) {
    764   unsigned &CondReg = CondRegs[Cond];
    765   unsigned &InvCondReg = CondRegs[X86::GetOppositeBranchCondition(Cond)];
    766   if (!CondReg && !InvCondReg)
    767     CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
    768 
    769   if (CondReg)
    770     return {CondReg, false};
    771   else
    772     return {InvCondReg, true};
    773 }
    774 
    775 void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB,
    776                                           MachineBasicBlock::iterator Pos,
    777                                           DebugLoc Loc, unsigned Reg) {
    778   auto TestI =
    779       BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8rr)).addReg(Reg).addReg(Reg);
    780   (void)TestI;
    781   LLVM_DEBUG(dbgs() << "    test cond: "; TestI->dump());
    782   ++NumTestsInserted;
    783 }
    784 
    785 void X86FlagsCopyLoweringPass::rewriteArithmetic(
    786     MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
    787     DebugLoc TestLoc, MachineInstr &MI, MachineOperand &FlagUse,
    788     CondRegArray &CondRegs) {
    789   // Arithmetic is either reading CF or OF. Figure out which condition we need
    790   // to preserve in a register.
    791   X86::CondCode Cond;
    792 
    793   // The addend to use to reset CF or OF when added to the flag value.
    794   int Addend;
    795 
    796   switch (getMnemonicFromOpcode(MI.getOpcode())) {
    797   case FlagArithMnemonic::ADC:
    798   case FlagArithMnemonic::ADCX:
    799   case FlagArithMnemonic::RCL:
    800   case FlagArithMnemonic::RCR:
    801   case FlagArithMnemonic::SBB:
    802     Cond = X86::COND_B; // CF == 1
    803     // Set up an addend that when one is added will need a carry due to not
    804     // having a higher bit available.
    805     Addend = 255;
    806     break;
    807 
    808   case FlagArithMnemonic::ADOX:
    809     Cond = X86::COND_O; // OF == 1
    810     // Set up an addend that when one is added will turn from positive to
    811     // negative and thus overflow in the signed domain.
    812     Addend = 127;
    813     break;
    814   }
    815 
    816   // Now get a register that contains the value of the flag input to the
    817   // arithmetic. We require exactly this flag to simplify the arithmetic
    818   // required to materialize it back into the flag.
    819   unsigned &CondReg = CondRegs[Cond];
    820   if (!CondReg)
    821     CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
    822 
    823   MachineBasicBlock &MBB = *MI.getParent();
    824 
    825   // Insert an instruction that will set the flag back to the desired value.
    826   unsigned TmpReg = MRI->createVirtualRegister(PromoteRC);
    827   auto AddI =
    828       BuildMI(MBB, MI.getIterator(), MI.getDebugLoc(), TII->get(X86::ADD8ri))
    829           .addDef(TmpReg, RegState::Dead)
    830           .addReg(CondReg)
    831           .addImm(Addend);
    832   (void)AddI;
    833   LLVM_DEBUG(dbgs() << "    add cond: "; AddI->dump());
    834   ++NumAddsInserted;
    835   FlagUse.setIsKill(true);
    836 }
    837 
    838 void X86FlagsCopyLoweringPass::rewriteCMov(MachineBasicBlock &TestMBB,
    839                                            MachineBasicBlock::iterator TestPos,
    840                                            DebugLoc TestLoc,
    841                                            MachineInstr &CMovI,
    842                                            MachineOperand &FlagUse,
    843                                            CondRegArray &CondRegs) {
    844   // First get the register containing this specific condition.
    845   X86::CondCode Cond = X86::getCondFromCMovOpc(CMovI.getOpcode());
    846   unsigned CondReg;
    847   bool Inverted;
    848   std::tie(CondReg, Inverted) =
    849       getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs);
    850 
    851   MachineBasicBlock &MBB = *CMovI.getParent();
    852 
    853   // Insert a direct test of the saved register.
    854   insertTest(MBB, CMovI.getIterator(), CMovI.getDebugLoc(), CondReg);
    855 
    856   // Rewrite the CMov to use the !ZF flag from the test (but match register
    857   // size and memory operand), and then kill its use of the flags afterward.
    858   auto &CMovRC = *MRI->getRegClass(CMovI.getOperand(0).getReg());
    859   CMovI.setDesc(TII->get(X86::getCMovFromCond(
    860       Inverted ? X86::COND_E : X86::COND_NE, TRI->getRegSizeInBits(CMovRC) / 8,
    861       !CMovI.memoperands_empty())));
    862   FlagUse.setIsKill(true);
    863   LLVM_DEBUG(dbgs() << "    fixed cmov: "; CMovI.dump());
    864 }
    865 
    866 void X86FlagsCopyLoweringPass::rewriteCondJmp(
    867     MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
    868     DebugLoc TestLoc, MachineInstr &JmpI, CondRegArray &CondRegs) {
    869   // First get the register containing this specific condition.
    870   X86::CondCode Cond = X86::getCondFromBranchOpc(JmpI.getOpcode());
    871   unsigned CondReg;
    872   bool Inverted;
    873   std::tie(CondReg, Inverted) =
    874       getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs);
    875 
    876   MachineBasicBlock &JmpMBB = *JmpI.getParent();
    877 
    878   // Insert a direct test of the saved register.
    879   insertTest(JmpMBB, JmpI.getIterator(), JmpI.getDebugLoc(), CondReg);
    880 
    881   // Rewrite the jump to use the !ZF flag from the test, and kill its use of
    882   // flags afterward.
    883   JmpI.setDesc(TII->get(
    884       X86::GetCondBranchFromCond(Inverted ? X86::COND_E : X86::COND_NE)));
    885   const int ImplicitEFLAGSOpIdx = 1;
    886   JmpI.getOperand(ImplicitEFLAGSOpIdx).setIsKill(true);
    887   LLVM_DEBUG(dbgs() << "    fixed jCC: "; JmpI.dump());
    888 }
    889 
    890 void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI,
    891                                            MachineOperand &FlagUse,
    892                                            MachineInstr &CopyDefI) {
    893   // Just replace this copy with the original copy def.
    894   MRI->replaceRegWith(MI.getOperand(0).getReg(),
    895                       CopyDefI.getOperand(0).getReg());
    896   MI.eraseFromParent();
    897 }
    898 
    899 void X86FlagsCopyLoweringPass::rewriteSetCarryExtended(
    900     MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
    901     DebugLoc TestLoc, MachineInstr &SetBI, MachineOperand &FlagUse,
    902     CondRegArray &CondRegs) {
    903   // This routine is only used to handle pseudos for setting a register to zero
    904   // or all ones based on CF. This is essentially the sign extended from 1-bit
    905   // form of SETB and modeled with the SETB_C* pseudos. They require special
    906   // handling as they aren't normal SETcc instructions and are lowered to an
    907   // EFLAGS clobbering operation (SBB typically). One simplifying aspect is that
    908   // they are only provided in reg-defining forms. A complicating factor is that
    909   // they can define many different register widths.
    910   assert(SetBI.getOperand(0).isReg() &&
    911          "Cannot have a non-register defined operand to this variant of SETB!");
    912 
    913   // Little helper to do the common final step of replacing the register def'ed
    914   // by this SETB instruction with a new register and removing the SETB
    915   // instruction.
    916   auto RewriteToReg = [&](unsigned Reg) {
    917     MRI->replaceRegWith(SetBI.getOperand(0).getReg(), Reg);
    918     SetBI.eraseFromParent();
    919   };
    920 
    921   // Grab the register class used for this particular instruction.
    922   auto &SetBRC = *MRI->getRegClass(SetBI.getOperand(0).getReg());
    923 
    924   MachineBasicBlock &MBB = *SetBI.getParent();
    925   auto SetPos = SetBI.getIterator();
    926   auto SetLoc = SetBI.getDebugLoc();
    927 
    928   auto AdjustReg = [&](unsigned Reg) {
    929     auto &OrigRC = *MRI->getRegClass(Reg);
    930     if (&OrigRC == &SetBRC)
    931       return Reg;
    932 
    933     unsigned NewReg;
    934 
    935     int OrigRegSize = TRI->getRegSizeInBits(OrigRC) / 8;
    936     int TargetRegSize = TRI->getRegSizeInBits(SetBRC) / 8;
    937     assert(OrigRegSize <= 8 && "No GPRs larger than 64-bits!");
    938     assert(TargetRegSize <= 8 && "No GPRs larger than 64-bits!");
    939     int SubRegIdx[] = {X86::NoSubRegister, X86::sub_8bit, X86::sub_16bit,
    940                        X86::NoSubRegister, X86::sub_32bit};
    941 
    942     // If the original size is smaller than the target *and* is smaller than 4
    943     // bytes, we need to explicitly zero extend it. We always extend to 4-bytes
    944     // to maximize the chance of being able to CSE that operation and to avoid
    945     // partial dependency stalls extending to 2-bytes.
    946     if (OrigRegSize < TargetRegSize && OrigRegSize < 4) {
    947       NewReg = MRI->createVirtualRegister(&X86::GR32RegClass);
    948       BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOVZX32rr8), NewReg)
    949           .addReg(Reg);
    950       if (&SetBRC == &X86::GR32RegClass)
    951         return NewReg;
    952       Reg = NewReg;
    953       OrigRegSize = 4;
    954     }
    955 
    956     NewReg = MRI->createVirtualRegister(&SetBRC);
    957     if (OrigRegSize < TargetRegSize) {
    958       BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::SUBREG_TO_REG),
    959               NewReg)
    960           .addImm(0)
    961           .addReg(Reg)
    962           .addImm(SubRegIdx[OrigRegSize]);
    963     } else if (OrigRegSize > TargetRegSize) {
    964       if (TargetRegSize == 1 && !Subtarget->is64Bit()) {
    965         // Need to constrain the register class.
    966         MRI->constrainRegClass(Reg, &X86::GR32_ABCDRegClass);
    967       }
    968 
    969       BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY),
    970               NewReg)
    971           .addReg(Reg, 0, SubRegIdx[TargetRegSize]);
    972     } else {
    973       BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg)
    974           .addReg(Reg);
    975     }
    976     return NewReg;
    977   };
    978 
    979   unsigned &CondReg = CondRegs[X86::COND_B];
    980   if (!CondReg)
    981     CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, X86::COND_B);
    982 
    983   // Adjust the condition to have the desired register width by zero-extending
    984   // as needed.
    985   // FIXME: We should use a better API to avoid the local reference and using a
    986   // different variable here.
    987   unsigned ExtCondReg = AdjustReg(CondReg);
    988 
    989   // Now we need to turn this into a bitmask. We do this by subtracting it from
    990   // zero.
    991   unsigned ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass);
    992   BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg);
    993   ZeroReg = AdjustReg(ZeroReg);
    994 
    995   unsigned Sub;
    996   switch (SetBI.getOpcode()) {
    997   case X86::SETB_C8r:
    998     Sub = X86::SUB8rr;
    999     break;
   1000 
   1001   case X86::SETB_C16r:
   1002     Sub = X86::SUB16rr;
   1003     break;
   1004 
   1005   case X86::SETB_C32r:
   1006     Sub = X86::SUB32rr;
   1007     break;
   1008 
   1009   case X86::SETB_C64r:
   1010     Sub = X86::SUB64rr;
   1011     break;
   1012 
   1013   default:
   1014     llvm_unreachable("Invalid SETB_C* opcode!");
   1015   }
   1016   unsigned ResultReg = MRI->createVirtualRegister(&SetBRC);
   1017   BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg)
   1018       .addReg(ZeroReg)
   1019       .addReg(ExtCondReg);
   1020   return RewriteToReg(ResultReg);
   1021 }
   1022 
   1023 void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
   1024                                             MachineBasicBlock::iterator TestPos,
   1025                                             DebugLoc TestLoc,
   1026                                             MachineInstr &SetCCI,
   1027                                             MachineOperand &FlagUse,
   1028                                             CondRegArray &CondRegs) {
   1029   X86::CondCode Cond = X86::getCondFromSETOpc(SetCCI.getOpcode());
   1030   // Note that we can't usefully rewrite this to the inverse without complex
   1031   // analysis of the users of the setCC. Largely we rely on duplicates which
   1032   // could have been avoided already being avoided here.
   1033   unsigned &CondReg = CondRegs[Cond];
   1034   if (!CondReg)
   1035     CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
   1036 
   1037   // Rewriting a register def is trivial: we just replace the register and
   1038   // remove the setcc.
   1039   if (!SetCCI.mayStore()) {
   1040     assert(SetCCI.getOperand(0).isReg() &&
   1041            "Cannot have a non-register defined operand to SETcc!");
   1042     MRI->replaceRegWith(SetCCI.getOperand(0).getReg(), CondReg);
   1043     SetCCI.eraseFromParent();
   1044     return;
   1045   }
   1046 
   1047   // Otherwise, we need to emit a store.
   1048   auto MIB = BuildMI(*SetCCI.getParent(), SetCCI.getIterator(),
   1049                      SetCCI.getDebugLoc(), TII->get(X86::MOV8mr));
   1050   // Copy the address operands.
   1051   for (int i = 0; i < X86::AddrNumOperands; ++i)
   1052     MIB.add(SetCCI.getOperand(i));
   1053 
   1054   MIB.addReg(CondReg);
   1055 
   1056   MIB->setMemRefs(SetCCI.memoperands_begin(), SetCCI.memoperands_end());
   1057 
   1058   SetCCI.eraseFromParent();
   1059   return;
   1060 }
   1061