Home | History | Annotate | Download | only in CodeGen
      1 //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This pass munges the code in the input function to better prepare it for
     11 // SelectionDAG-based code generation. This works around limitations in it's
     12 // basic-block-at-a-time approach. It should eventually be removed.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "llvm/CodeGen/Passes.h"
     17 #include "llvm/ADT/DenseMap.h"
     18 #include "llvm/ADT/SmallSet.h"
     19 #include "llvm/ADT/Statistic.h"
     20 #include "llvm/Analysis/InstructionSimplify.h"
     21 #include "llvm/Analysis/TargetLibraryInfo.h"
     22 #include "llvm/Analysis/TargetTransformInfo.h"
     23 #include "llvm/IR/CallSite.h"
     24 #include "llvm/IR/Constants.h"
     25 #include "llvm/IR/DataLayout.h"
     26 #include "llvm/IR/DerivedTypes.h"
     27 #include "llvm/IR/Dominators.h"
     28 #include "llvm/IR/Function.h"
     29 #include "llvm/IR/GetElementPtrTypeIterator.h"
     30 #include "llvm/IR/IRBuilder.h"
     31 #include "llvm/IR/InlineAsm.h"
     32 #include "llvm/IR/Instructions.h"
     33 #include "llvm/IR/IntrinsicInst.h"
     34 #include "llvm/IR/MDBuilder.h"
     35 #include "llvm/IR/PatternMatch.h"
     36 #include "llvm/IR/Statepoint.h"
     37 #include "llvm/IR/ValueHandle.h"
     38 #include "llvm/IR/ValueMap.h"
     39 #include "llvm/Pass.h"
     40 #include "llvm/Support/CommandLine.h"
     41 #include "llvm/Support/Debug.h"
     42 #include "llvm/Support/raw_ostream.h"
     43 #include "llvm/Target/TargetLowering.h"
     44 #include "llvm/Target/TargetSubtargetInfo.h"
     45 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
     46 #include "llvm/Transforms/Utils/BuildLibCalls.h"
     47 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
     48 #include "llvm/Transforms/Utils/Local.h"
     49 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
     50 using namespace llvm;
     51 using namespace llvm::PatternMatch;
     52 
     53 #define DEBUG_TYPE "codegenprepare"
     54 
     55 STATISTIC(NumBlocksElim, "Number of blocks eliminated");
     56 STATISTIC(NumPHIsElim,   "Number of trivial PHIs eliminated");
     57 STATISTIC(NumGEPsElim,   "Number of GEPs converted to casts");
     58 STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
     59                       "sunken Cmps");
     60 STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
     61                        "of sunken Casts");
     62 STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
     63                           "computations were sunk");
     64 STATISTIC(NumExtsMoved,  "Number of [s|z]ext instructions combined with loads");
     65 STATISTIC(NumExtUses,    "Number of uses of [s|z]ext instructions optimized");
     66 STATISTIC(NumRetsDup,    "Number of return instructions duplicated");
     67 STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
     68 STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
     69 STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches");
     70 STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
     71 
     72 static cl::opt<bool> DisableBranchOpts(
     73   "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
     74   cl::desc("Disable branch optimizations in CodeGenPrepare"));
     75 
     76 static cl::opt<bool>
     77     DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
     78                   cl::desc("Disable GC optimizations in CodeGenPrepare"));
     79 
     80 static cl::opt<bool> DisableSelectToBranch(
     81   "disable-cgp-select2branch", cl::Hidden, cl::init(false),
     82   cl::desc("Disable select to branch conversion."));
     83 
     84 static cl::opt<bool> AddrSinkUsingGEPs(
     85   "addr-sink-using-gep", cl::Hidden, cl::init(false),
     86   cl::desc("Address sinking in CGP using GEPs."));
     87 
     88 static cl::opt<bool> EnableAndCmpSinking(
     89    "enable-andcmp-sinking", cl::Hidden, cl::init(true),
     90    cl::desc("Enable sinkinig and/cmp into branches."));
     91 
     92 static cl::opt<bool> DisableStoreExtract(
     93     "disable-cgp-store-extract", cl::Hidden, cl::init(false),
     94     cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
     95 
     96 static cl::opt<bool> StressStoreExtract(
     97     "stress-cgp-store-extract", cl::Hidden, cl::init(false),
     98     cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
     99 
    100 static cl::opt<bool> DisableExtLdPromotion(
    101     "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
    102     cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
    103              "CodeGenPrepare"));
    104 
    105 static cl::opt<bool> StressExtLdPromotion(
    106     "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
    107     cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
    108              "optimization in CodeGenPrepare"));
    109 
    110 namespace {
    111 typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
    112 struct TypeIsSExt {
    113   Type *Ty;
    114   bool IsSExt;
    115   TypeIsSExt(Type *Ty, bool IsSExt) : Ty(Ty), IsSExt(IsSExt) {}
    116 };
    117 typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
    118 class TypePromotionTransaction;
    119 
    120   class CodeGenPrepare : public FunctionPass {
    121     /// TLI - Keep a pointer of a TargetLowering to consult for determining
    122     /// transformation profitability.
    123     const TargetMachine *TM;
    124     const TargetLowering *TLI;
    125     const TargetTransformInfo *TTI;
    126     const TargetLibraryInfo *TLInfo;
    127 
    128     /// CurInstIterator - As we scan instructions optimizing them, this is the
    129     /// next instruction to optimize.  Xforms that can invalidate this should
    130     /// update it.
    131     BasicBlock::iterator CurInstIterator;
    132 
    133     /// Keeps track of non-local addresses that have been sunk into a block.
    134     /// This allows us to avoid inserting duplicate code for blocks with
    135     /// multiple load/stores of the same address.
    136     ValueMap<Value*, Value*> SunkAddrs;
    137 
    138     /// Keeps track of all truncates inserted for the current function.
    139     SetOfInstrs InsertedTruncsSet;
    140     /// Keeps track of the type of the related instruction before their
    141     /// promotion for the current function.
    142     InstrToOrigTy PromotedInsts;
    143 
    144     /// ModifiedDT - If CFG is modified in anyway.
    145     bool ModifiedDT;
    146 
    147     /// OptSize - True if optimizing for size.
    148     bool OptSize;
    149 
    150   public:
    151     static char ID; // Pass identification, replacement for typeid
    152     explicit CodeGenPrepare(const TargetMachine *TM = nullptr)
    153         : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr) {
    154         initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
    155       }
    156     bool runOnFunction(Function &F) override;
    157 
    158     const char *getPassName() const override { return "CodeGen Prepare"; }
    159 
    160     void getAnalysisUsage(AnalysisUsage &AU) const override {
    161       AU.addPreserved<DominatorTreeWrapperPass>();
    162       AU.addRequired<TargetLibraryInfoWrapperPass>();
    163       AU.addRequired<TargetTransformInfoWrapperPass>();
    164     }
    165 
    166   private:
    167     bool EliminateFallThrough(Function &F);
    168     bool EliminateMostlyEmptyBlocks(Function &F);
    169     bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
    170     void EliminateMostlyEmptyBlock(BasicBlock *BB);
    171     bool OptimizeBlock(BasicBlock &BB, bool& ModifiedDT);
    172     bool OptimizeInst(Instruction *I, bool& ModifiedDT);
    173     bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy);
    174     bool OptimizeInlineAsmInst(CallInst *CS);
    175     bool OptimizeCallInst(CallInst *CI, bool& ModifiedDT);
    176     bool MoveExtToFormExtLoad(Instruction *&I);
    177     bool OptimizeExtUses(Instruction *I);
    178     bool OptimizeSelectInst(SelectInst *SI);
    179     bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI);
    180     bool OptimizeExtractElementInst(Instruction *Inst);
    181     bool DupRetToEnableTailCallOpts(BasicBlock *BB);
    182     bool PlaceDbgValues(Function &F);
    183     bool sinkAndCmp(Function &F);
    184     bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
    185                         Instruction *&Inst,
    186                         const SmallVectorImpl<Instruction *> &Exts,
    187                         unsigned CreatedInstCost);
    188     bool splitBranchCondition(Function &F);
    189     bool simplifyOffsetableRelocate(Instruction &I);
    190   };
    191 }
    192 
    193 char CodeGenPrepare::ID = 0;
    194 INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare",
    195                    "Optimize for code generation", false, false)
    196 
    197 FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
    198   return new CodeGenPrepare(TM);
    199 }
    200 
    201 bool CodeGenPrepare::runOnFunction(Function &F) {
    202   if (skipOptnoneFunction(F))
    203     return false;
    204 
    205   bool EverMadeChange = false;
    206   // Clear per function information.
    207   InsertedTruncsSet.clear();
    208   PromotedInsts.clear();
    209 
    210   ModifiedDT = false;
    211   if (TM)
    212     TLI = TM->getSubtargetImpl(F)->getTargetLowering();
    213   TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
    214   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
    215   OptSize = F.hasFnAttribute(Attribute::OptimizeForSize);
    216 
    217   /// This optimization identifies DIV instructions that can be
    218   /// profitably bypassed and carried out with a shorter, faster divide.
    219   if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
    220     const DenseMap<unsigned int, unsigned int> &BypassWidths =
    221        TLI->getBypassSlowDivWidths();
    222     for (Function::iterator I = F.begin(); I != F.end(); I++)
    223       EverMadeChange |= bypassSlowDivision(F, I, BypassWidths);
    224   }
    225 
    226   // Eliminate blocks that contain only PHI nodes and an
    227   // unconditional branch.
    228   EverMadeChange |= EliminateMostlyEmptyBlocks(F);
    229 
    230   // llvm.dbg.value is far away from the value then iSel may not be able
    231   // handle it properly. iSel will drop llvm.dbg.value if it can not
    232   // find a node corresponding to the value.
    233   EverMadeChange |= PlaceDbgValues(F);
    234 
    235   // If there is a mask, compare against zero, and branch that can be combined
    236   // into a single target instruction, push the mask and compare into branch
    237   // users. Do this before OptimizeBlock -> OptimizeInst ->
    238   // OptimizeCmpExpression, which perturbs the pattern being searched for.
    239   if (!DisableBranchOpts) {
    240     EverMadeChange |= sinkAndCmp(F);
    241     EverMadeChange |= splitBranchCondition(F);
    242   }
    243 
    244   bool MadeChange = true;
    245   while (MadeChange) {
    246     MadeChange = false;
    247     for (Function::iterator I = F.begin(); I != F.end(); ) {
    248       BasicBlock *BB = I++;
    249       bool ModifiedDTOnIteration = false;
    250       MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration);
    251 
    252       // Restart BB iteration if the dominator tree of the Function was changed
    253       if (ModifiedDTOnIteration)
    254         break;
    255     }
    256     EverMadeChange |= MadeChange;
    257   }
    258 
    259   SunkAddrs.clear();
    260 
    261   if (!DisableBranchOpts) {
    262     MadeChange = false;
    263     SmallPtrSet<BasicBlock*, 8> WorkList;
    264     for (BasicBlock &BB : F) {
    265       SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB));
    266       MadeChange |= ConstantFoldTerminator(&BB, true);
    267       if (!MadeChange) continue;
    268 
    269       for (SmallVectorImpl<BasicBlock*>::iterator
    270              II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
    271         if (pred_begin(*II) == pred_end(*II))
    272           WorkList.insert(*II);
    273     }
    274 
    275     // Delete the dead blocks and any of their dead successors.
    276     MadeChange |= !WorkList.empty();
    277     while (!WorkList.empty()) {
    278       BasicBlock *BB = *WorkList.begin();
    279       WorkList.erase(BB);
    280       SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
    281 
    282       DeleteDeadBlock(BB);
    283 
    284       for (SmallVectorImpl<BasicBlock*>::iterator
    285              II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
    286         if (pred_begin(*II) == pred_end(*II))
    287           WorkList.insert(*II);
    288     }
    289 
    290     // Merge pairs of basic blocks with unconditional branches, connected by
    291     // a single edge.
    292     if (EverMadeChange || MadeChange)
    293       MadeChange |= EliminateFallThrough(F);
    294 
    295     EverMadeChange |= MadeChange;
    296   }
    297 
    298   if (!DisableGCOpts) {
    299     SmallVector<Instruction *, 2> Statepoints;
    300     for (BasicBlock &BB : F)
    301       for (Instruction &I : BB)
    302         if (isStatepoint(I))
    303           Statepoints.push_back(&I);
    304     for (auto &I : Statepoints)
    305       EverMadeChange |= simplifyOffsetableRelocate(*I);
    306   }
    307 
    308   return EverMadeChange;
    309 }
    310 
    311 /// EliminateFallThrough - Merge basic blocks which are connected
    312 /// by a single edge, where one of the basic blocks has a single successor
    313 /// pointing to the other basic block, which has a single predecessor.
    314 bool CodeGenPrepare::EliminateFallThrough(Function &F) {
    315   bool Changed = false;
    316   // Scan all of the blocks in the function, except for the entry block.
    317   for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
    318     BasicBlock *BB = I++;
    319     // If the destination block has a single pred, then this is a trivial
    320     // edge, just collapse it.
    321     BasicBlock *SinglePred = BB->getSinglePredecessor();
    322 
    323     // Don't merge if BB's address is taken.
    324     if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
    325 
    326     BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
    327     if (Term && !Term->isConditional()) {
    328       Changed = true;
    329       DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n");
    330       // Remember if SinglePred was the entry block of the function.
    331       // If so, we will need to move BB back to the entry position.
    332       bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
    333       MergeBasicBlockIntoOnlyPred(BB, nullptr);
    334 
    335       if (isEntry && BB != &BB->getParent()->getEntryBlock())
    336         BB->moveBefore(&BB->getParent()->getEntryBlock());
    337 
    338       // We have erased a block. Update the iterator.
    339       I = BB;
    340     }
    341   }
    342   return Changed;
    343 }
    344 
    345 /// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
    346 /// debug info directives, and an unconditional branch.  Passes before isel
    347 /// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
    348 /// isel.  Start by eliminating these blocks so we can split them the way we
    349 /// want them.
    350 bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
    351   bool MadeChange = false;
    352   // Note that this intentionally skips the entry block.
    353   for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
    354     BasicBlock *BB = I++;
    355 
    356     // If this block doesn't end with an uncond branch, ignore it.
    357     BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
    358     if (!BI || !BI->isUnconditional())
    359       continue;
    360 
    361     // If the instruction before the branch (skipping debug info) isn't a phi
    362     // node, then other stuff is happening here.
    363     BasicBlock::iterator BBI = BI;
    364     if (BBI != BB->begin()) {
    365       --BBI;
    366       while (isa<DbgInfoIntrinsic>(BBI)) {
    367         if (BBI == BB->begin())
    368           break;
    369         --BBI;
    370       }
    371       if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
    372         continue;
    373     }
    374 
    375     // Do not break infinite loops.
    376     BasicBlock *DestBB = BI->getSuccessor(0);
    377     if (DestBB == BB)
    378       continue;
    379 
    380     if (!CanMergeBlocks(BB, DestBB))
    381       continue;
    382 
    383     EliminateMostlyEmptyBlock(BB);
    384     MadeChange = true;
    385   }
    386   return MadeChange;
    387 }
    388 
    389 /// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
    390 /// single uncond branch between them, and BB contains no other non-phi
    391 /// instructions.
    392 bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
    393                                     const BasicBlock *DestBB) const {
    394   // We only want to eliminate blocks whose phi nodes are used by phi nodes in
    395   // the successor.  If there are more complex condition (e.g. preheaders),
    396   // don't mess around with them.
    397   BasicBlock::const_iterator BBI = BB->begin();
    398   while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
    399     for (const User *U : PN->users()) {
    400       const Instruction *UI = cast<Instruction>(U);
    401       if (UI->getParent() != DestBB || !isa<PHINode>(UI))
    402         return false;
    403       // If User is inside DestBB block and it is a PHINode then check
    404       // incoming value. If incoming value is not from BB then this is
    405       // a complex condition (e.g. preheaders) we want to avoid here.
    406       if (UI->getParent() == DestBB) {
    407         if (const PHINode *UPN = dyn_cast<PHINode>(UI))
    408           for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
    409             Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
    410             if (Insn && Insn->getParent() == BB &&
    411                 Insn->getParent() != UPN->getIncomingBlock(I))
    412               return false;
    413           }
    414       }
    415     }
    416   }
    417 
    418   // If BB and DestBB contain any common predecessors, then the phi nodes in BB
    419   // and DestBB may have conflicting incoming values for the block.  If so, we
    420   // can't merge the block.
    421   const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
    422   if (!DestBBPN) return true;  // no conflict.
    423 
    424   // Collect the preds of BB.
    425   SmallPtrSet<const BasicBlock*, 16> BBPreds;
    426   if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
    427     // It is faster to get preds from a PHI than with pred_iterator.
    428     for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
    429       BBPreds.insert(BBPN->getIncomingBlock(i));
    430   } else {
    431     BBPreds.insert(pred_begin(BB), pred_end(BB));
    432   }
    433 
    434   // Walk the preds of DestBB.
    435   for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
    436     BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
    437     if (BBPreds.count(Pred)) {   // Common predecessor?
    438       BBI = DestBB->begin();
    439       while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
    440         const Value *V1 = PN->getIncomingValueForBlock(Pred);
    441         const Value *V2 = PN->getIncomingValueForBlock(BB);
    442 
    443         // If V2 is a phi node in BB, look up what the mapped value will be.
    444         if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
    445           if (V2PN->getParent() == BB)
    446             V2 = V2PN->getIncomingValueForBlock(Pred);
    447 
    448         // If there is a conflict, bail out.
    449         if (V1 != V2) return false;
    450       }
    451     }
    452   }
    453 
    454   return true;
    455 }
    456 
    457 
    458 /// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
    459 /// an unconditional branch in it.
    460 void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
    461   BranchInst *BI = cast<BranchInst>(BB->getTerminator());
    462   BasicBlock *DestBB = BI->getSuccessor(0);
    463 
    464   DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
    465 
    466   // If the destination block has a single pred, then this is a trivial edge,
    467   // just collapse it.
    468   if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
    469     if (SinglePred != DestBB) {
    470       // Remember if SinglePred was the entry block of the function.  If so, we
    471       // will need to move BB back to the entry position.
    472       bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
    473       MergeBasicBlockIntoOnlyPred(DestBB, nullptr);
    474 
    475       if (isEntry && BB != &BB->getParent()->getEntryBlock())
    476         BB->moveBefore(&BB->getParent()->getEntryBlock());
    477 
    478       DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
    479       return;
    480     }
    481   }
    482 
    483   // Otherwise, we have multiple predecessors of BB.  Update the PHIs in DestBB
    484   // to handle the new incoming edges it is about to have.
    485   PHINode *PN;
    486   for (BasicBlock::iterator BBI = DestBB->begin();
    487        (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
    488     // Remove the incoming value for BB, and remember it.
    489     Value *InVal = PN->removeIncomingValue(BB, false);
    490 
    491     // Two options: either the InVal is a phi node defined in BB or it is some
    492     // value that dominates BB.
    493     PHINode *InValPhi = dyn_cast<PHINode>(InVal);
    494     if (InValPhi && InValPhi->getParent() == BB) {
    495       // Add all of the input values of the input PHI as inputs of this phi.
    496       for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
    497         PN->addIncoming(InValPhi->getIncomingValue(i),
    498                         InValPhi->getIncomingBlock(i));
    499     } else {
    500       // Otherwise, add one instance of the dominating value for each edge that
    501       // we will be adding.
    502       if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
    503         for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
    504           PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
    505       } else {
    506         for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
    507           PN->addIncoming(InVal, *PI);
    508       }
    509     }
    510   }
    511 
    512   // The PHIs are now updated, change everything that refers to BB to use
    513   // DestBB and remove BB.
    514   BB->replaceAllUsesWith(DestBB);
    515   BB->eraseFromParent();
    516   ++NumBlocksElim;
    517 
    518   DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
    519 }
    520 
    521 // Computes a map of base pointer relocation instructions to corresponding
    522 // derived pointer relocation instructions given a vector of all relocate calls
    523 static void computeBaseDerivedRelocateMap(
    524     const SmallVectorImpl<User *> &AllRelocateCalls,
    525     DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> &
    526         RelocateInstMap) {
    527   // Collect information in two maps: one primarily for locating the base object
    528   // while filling the second map; the second map is the final structure holding
    529   // a mapping between Base and corresponding Derived relocate calls
    530   DenseMap<std::pair<unsigned, unsigned>, IntrinsicInst *> RelocateIdxMap;
    531   for (auto &U : AllRelocateCalls) {
    532     GCRelocateOperands ThisRelocate(U);
    533     IntrinsicInst *I = cast<IntrinsicInst>(U);
    534     auto K = std::make_pair(ThisRelocate.basePtrIndex(),
    535                             ThisRelocate.derivedPtrIndex());
    536     RelocateIdxMap.insert(std::make_pair(K, I));
    537   }
    538   for (auto &Item : RelocateIdxMap) {
    539     std::pair<unsigned, unsigned> Key = Item.first;
    540     if (Key.first == Key.second)
    541       // Base relocation: nothing to insert
    542       continue;
    543 
    544     IntrinsicInst *I = Item.second;
    545     auto BaseKey = std::make_pair(Key.first, Key.first);
    546 
    547     // We're iterating over RelocateIdxMap so we cannot modify it.
    548     auto MaybeBase = RelocateIdxMap.find(BaseKey);
    549     if (MaybeBase == RelocateIdxMap.end())
    550       // TODO: We might want to insert a new base object relocate and gep off
    551       // that, if there are enough derived object relocates.
    552       continue;
    553 
    554     RelocateInstMap[MaybeBase->second].push_back(I);
    555   }
    556 }
    557 
    558 // Accepts a GEP and extracts the operands into a vector provided they're all
    559 // small integer constants
    560 static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
    561                                           SmallVectorImpl<Value *> &OffsetV) {
    562   for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
    563     // Only accept small constant integer operands
    564     auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
    565     if (!Op || Op->getZExtValue() > 20)
    566       return false;
    567   }
    568 
    569   for (unsigned i = 1; i < GEP->getNumOperands(); i++)
    570     OffsetV.push_back(GEP->getOperand(i));
    571   return true;
    572 }
    573 
    574 // Takes a RelocatedBase (base pointer relocation instruction) and Targets to
    575 // replace, computes a replacement, and affects it.
    576 static bool
    577 simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
    578                           const SmallVectorImpl<IntrinsicInst *> &Targets) {
    579   bool MadeChange = false;
    580   for (auto &ToReplace : Targets) {
    581     GCRelocateOperands MasterRelocate(RelocatedBase);
    582     GCRelocateOperands ThisRelocate(ToReplace);
    583 
    584     assert(ThisRelocate.basePtrIndex() == MasterRelocate.basePtrIndex() &&
    585            "Not relocating a derived object of the original base object");
    586     if (ThisRelocate.basePtrIndex() == ThisRelocate.derivedPtrIndex()) {
    587       // A duplicate relocate call. TODO: coalesce duplicates.
    588       continue;
    589     }
    590 
    591     Value *Base = ThisRelocate.basePtr();
    592     auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.derivedPtr());
    593     if (!Derived || Derived->getPointerOperand() != Base)
    594       continue;
    595 
    596     SmallVector<Value *, 2> OffsetV;
    597     if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
    598       continue;
    599 
    600     // Create a Builder and replace the target callsite with a gep
    601     IRBuilder<> Builder(ToReplace);
    602     Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
    603     Value *Replacement = Builder.CreateGEP(
    604         Derived->getSourceElementType(), RelocatedBase, makeArrayRef(OffsetV));
    605     Instruction *ReplacementInst = cast<Instruction>(Replacement);
    606     ReplacementInst->removeFromParent();
    607     ReplacementInst->insertAfter(RelocatedBase);
    608     Replacement->takeName(ToReplace);
    609     ToReplace->replaceAllUsesWith(Replacement);
    610     ToReplace->eraseFromParent();
    611 
    612     MadeChange = true;
    613   }
    614   return MadeChange;
    615 }
    616 
    617 // Turns this:
    618 //
    619 // %base = ...
    620 // %ptr = gep %base + 15
    621 // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
    622 // %base' = relocate(%tok, i32 4, i32 4)
    623 // %ptr' = relocate(%tok, i32 4, i32 5)
    624 // %val = load %ptr'
    625 //
    626 // into this:
    627 //
    628 // %base = ...
    629 // %ptr = gep %base + 15
    630 // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
    631 // %base' = gc.relocate(%tok, i32 4, i32 4)
    632 // %ptr' = gep %base' + 15
    633 // %val = load %ptr'
    634 bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
    635   bool MadeChange = false;
    636   SmallVector<User *, 2> AllRelocateCalls;
    637 
    638   for (auto *U : I.users())
    639     if (isGCRelocate(dyn_cast<Instruction>(U)))
    640       // Collect all the relocate calls associated with a statepoint
    641       AllRelocateCalls.push_back(U);
    642 
    643   // We need atleast one base pointer relocation + one derived pointer
    644   // relocation to mangle
    645   if (AllRelocateCalls.size() < 2)
    646     return false;
    647 
    648   // RelocateInstMap is a mapping from the base relocate instruction to the
    649   // corresponding derived relocate instructions
    650   DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> RelocateInstMap;
    651   computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
    652   if (RelocateInstMap.empty())
    653     return false;
    654 
    655   for (auto &Item : RelocateInstMap)
    656     // Item.first is the RelocatedBase to offset against
    657     // Item.second is the vector of Targets to replace
    658     MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
    659   return MadeChange;
    660 }
    661 
    662 /// SinkCast - Sink the specified cast instruction into its user blocks
    663 static bool SinkCast(CastInst *CI) {
    664   BasicBlock *DefBB = CI->getParent();
    665 
    666   /// InsertedCasts - Only insert a cast in each block once.
    667   DenseMap<BasicBlock*, CastInst*> InsertedCasts;
    668 
    669   bool MadeChange = false;
    670   for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
    671        UI != E; ) {
    672     Use &TheUse = UI.getUse();
    673     Instruction *User = cast<Instruction>(*UI);
    674 
    675     // Figure out which BB this cast is used in.  For PHI's this is the
    676     // appropriate predecessor block.
    677     BasicBlock *UserBB = User->getParent();
    678     if (PHINode *PN = dyn_cast<PHINode>(User)) {
    679       UserBB = PN->getIncomingBlock(TheUse);
    680     }
    681 
    682     // Preincrement use iterator so we don't invalidate it.
    683     ++UI;
    684 
    685     // If this user is in the same block as the cast, don't change the cast.
    686     if (UserBB == DefBB) continue;
    687 
    688     // If we have already inserted a cast into this block, use it.
    689     CastInst *&InsertedCast = InsertedCasts[UserBB];
    690 
    691     if (!InsertedCast) {
    692       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
    693       InsertedCast =
    694         CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
    695                          InsertPt);
    696     }
    697 
    698     // Replace a use of the cast with a use of the new cast.
    699     TheUse = InsertedCast;
    700     MadeChange = true;
    701     ++NumCastUses;
    702   }
    703 
    704   // If we removed all uses, nuke the cast.
    705   if (CI->use_empty()) {
    706     CI->eraseFromParent();
    707     MadeChange = true;
    708   }
    709 
    710   return MadeChange;
    711 }
    712 
    713 /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
    714 /// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
    715 /// sink it into user blocks to reduce the number of virtual
    716 /// registers that must be created and coalesced.
    717 ///
    718 /// Return true if any changes are made.
    719 ///
    720 static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
    721   // If this is a noop copy,
    722   EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
    723   EVT DstVT = TLI.getValueType(CI->getType());
    724 
    725   // This is an fp<->int conversion?
    726   if (SrcVT.isInteger() != DstVT.isInteger())
    727     return false;
    728 
    729   // If this is an extension, it will be a zero or sign extension, which
    730   // isn't a noop.
    731   if (SrcVT.bitsLT(DstVT)) return false;
    732 
    733   // If these values will be promoted, find out what they will be promoted
    734   // to.  This helps us consider truncates on PPC as noop copies when they
    735   // are.
    736   if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
    737       TargetLowering::TypePromoteInteger)
    738     SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
    739   if (TLI.getTypeAction(CI->getContext(), DstVT) ==
    740       TargetLowering::TypePromoteInteger)
    741     DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
    742 
    743   // If, after promotion, these are the same types, this is a noop copy.
    744   if (SrcVT != DstVT)
    745     return false;
    746 
    747   return SinkCast(CI);
    748 }
    749 
    750 /// CombineUAddWithOverflow - try to combine CI into a call to the
    751 /// llvm.uadd.with.overflow intrinsic if possible.
    752 ///
    753 /// Return true if any changes were made.
    754 static bool CombineUAddWithOverflow(CmpInst *CI) {
    755   Value *A, *B;
    756   Instruction *AddI;
    757   if (!match(CI,
    758              m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
    759     return false;
    760 
    761   Type *Ty = AddI->getType();
    762   if (!isa<IntegerType>(Ty))
    763     return false;
    764 
    765   // We don't want to move around uses of condition values this late, so we we
    766   // check if it is legal to create the call to the intrinsic in the basic
    767   // block containing the icmp:
    768 
    769   if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
    770     return false;
    771 
    772 #ifndef NDEBUG
    773   // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
    774   // for now:
    775   if (AddI->hasOneUse())
    776     assert(*AddI->user_begin() == CI && "expected!");
    777 #endif
    778 
    779   Module *M = CI->getParent()->getParent()->getParent();
    780   Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
    781 
    782   auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
    783 
    784   auto *UAddWithOverflow =
    785       CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
    786   auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
    787   auto *Overflow =
    788       ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
    789 
    790   CI->replaceAllUsesWith(Overflow);
    791   AddI->replaceAllUsesWith(UAdd);
    792   CI->eraseFromParent();
    793   AddI->eraseFromParent();
    794   return true;
    795 }
    796 
    797 /// SinkCmpExpression - Sink the given CmpInst into user blocks to reduce
    798 /// the number of virtual registers that must be created and coalesced.  This is
    799 /// a clear win except on targets with multiple condition code registers
    800 ///  (PowerPC), where it might lose; some adjustment may be wanted there.
    801 ///
    802 /// Return true if any changes are made.
    803 static bool SinkCmpExpression(CmpInst *CI) {
    804   BasicBlock *DefBB = CI->getParent();
    805 
    806   /// InsertedCmp - Only insert a cmp in each block once.
    807   DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
    808 
    809   bool MadeChange = false;
    810   for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
    811        UI != E; ) {
    812     Use &TheUse = UI.getUse();
    813     Instruction *User = cast<Instruction>(*UI);
    814 
    815     // Preincrement use iterator so we don't invalidate it.
    816     ++UI;
    817 
    818     // Don't bother for PHI nodes.
    819     if (isa<PHINode>(User))
    820       continue;
    821 
    822     // Figure out which BB this cmp is used in.
    823     BasicBlock *UserBB = User->getParent();
    824 
    825     // If this user is in the same block as the cmp, don't change the cmp.
    826     if (UserBB == DefBB) continue;
    827 
    828     // If we have already inserted a cmp into this block, use it.
    829     CmpInst *&InsertedCmp = InsertedCmps[UserBB];
    830 
    831     if (!InsertedCmp) {
    832       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
    833       InsertedCmp =
    834         CmpInst::Create(CI->getOpcode(),
    835                         CI->getPredicate(),  CI->getOperand(0),
    836                         CI->getOperand(1), "", InsertPt);
    837     }
    838 
    839     // Replace a use of the cmp with a use of the new cmp.
    840     TheUse = InsertedCmp;
    841     MadeChange = true;
    842     ++NumCmpUses;
    843   }
    844 
    845   // If we removed all uses, nuke the cmp.
    846   if (CI->use_empty()) {
    847     CI->eraseFromParent();
    848     MadeChange = true;
    849   }
    850 
    851   return MadeChange;
    852 }
    853 
    854 static bool OptimizeCmpExpression(CmpInst *CI) {
    855   if (SinkCmpExpression(CI))
    856     return true;
    857 
    858   if (CombineUAddWithOverflow(CI))
    859     return true;
    860 
    861   return false;
    862 }
    863 
    864 /// isExtractBitsCandidateUse - Check if the candidates could
    865 /// be combined with shift instruction, which includes:
    866 /// 1. Truncate instruction
    867 /// 2. And instruction and the imm is a mask of the low bits:
    868 /// imm & (imm+1) == 0
    869 static bool isExtractBitsCandidateUse(Instruction *User) {
    870   if (!isa<TruncInst>(User)) {
    871     if (User->getOpcode() != Instruction::And ||
    872         !isa<ConstantInt>(User->getOperand(1)))
    873       return false;
    874 
    875     const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
    876 
    877     if ((Cimm & (Cimm + 1)).getBoolValue())
    878       return false;
    879   }
    880   return true;
    881 }
    882 
    883 /// SinkShiftAndTruncate - sink both shift and truncate instruction
    884 /// to the use of truncate's BB.
    885 static bool
    886 SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
    887                      DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
    888                      const TargetLowering &TLI) {
    889   BasicBlock *UserBB = User->getParent();
    890   DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
    891   TruncInst *TruncI = dyn_cast<TruncInst>(User);
    892   bool MadeChange = false;
    893 
    894   for (Value::user_iterator TruncUI = TruncI->user_begin(),
    895                             TruncE = TruncI->user_end();
    896        TruncUI != TruncE;) {
    897 
    898     Use &TruncTheUse = TruncUI.getUse();
    899     Instruction *TruncUser = cast<Instruction>(*TruncUI);
    900     // Preincrement use iterator so we don't invalidate it.
    901 
    902     ++TruncUI;
    903 
    904     int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
    905     if (!ISDOpcode)
    906       continue;
    907 
    908     // If the use is actually a legal node, there will not be an
    909     // implicit truncate.
    910     // FIXME: always querying the result type is just an
    911     // approximation; some nodes' legality is determined by the
    912     // operand or other means. There's no good way to find out though.
    913     if (TLI.isOperationLegalOrCustom(
    914             ISDOpcode, TLI.getValueType(TruncUser->getType(), true)))
    915       continue;
    916 
    917     // Don't bother for PHI nodes.
    918     if (isa<PHINode>(TruncUser))
    919       continue;
    920 
    921     BasicBlock *TruncUserBB = TruncUser->getParent();
    922 
    923     if (UserBB == TruncUserBB)
    924       continue;
    925 
    926     BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
    927     CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
    928 
    929     if (!InsertedShift && !InsertedTrunc) {
    930       BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
    931       // Sink the shift
    932       if (ShiftI->getOpcode() == Instruction::AShr)
    933         InsertedShift =
    934             BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
    935       else
    936         InsertedShift =
    937             BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
    938 
    939       // Sink the trunc
    940       BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
    941       TruncInsertPt++;
    942 
    943       InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
    944                                        TruncI->getType(), "", TruncInsertPt);
    945 
    946       MadeChange = true;
    947 
    948       TruncTheUse = InsertedTrunc;
    949     }
    950   }
    951   return MadeChange;
    952 }
    953 
    954 /// OptimizeExtractBits - sink the shift *right* instruction into user blocks if
    955 /// the uses could potentially be combined with this shift instruction and
    956 /// generate BitExtract instruction. It will only be applied if the architecture
    957 /// supports BitExtract instruction. Here is an example:
    958 /// BB1:
    959 ///   %x.extract.shift = lshr i64 %arg1, 32
    960 /// BB2:
    961 ///   %x.extract.trunc = trunc i64 %x.extract.shift to i16
    962 /// ==>
    963 ///
    964 /// BB2:
    965 ///   %x.extract.shift.1 = lshr i64 %arg1, 32
    966 ///   %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
    967 ///
    968 /// CodeGen will recoginze the pattern in BB2 and generate BitExtract
    969 /// instruction.
    970 /// Return true if any changes are made.
    971 static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
    972                                 const TargetLowering &TLI) {
    973   BasicBlock *DefBB = ShiftI->getParent();
    974 
    975   /// Only insert instructions in each block once.
    976   DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;
    977 
    978   bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(ShiftI->getType()));
    979 
    980   bool MadeChange = false;
    981   for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
    982        UI != E;) {
    983     Use &TheUse = UI.getUse();
    984     Instruction *User = cast<Instruction>(*UI);
    985     // Preincrement use iterator so we don't invalidate it.
    986     ++UI;
    987 
    988     // Don't bother for PHI nodes.
    989     if (isa<PHINode>(User))
    990       continue;
    991 
    992     if (!isExtractBitsCandidateUse(User))
    993       continue;
    994 
    995     BasicBlock *UserBB = User->getParent();
    996 
    997     if (UserBB == DefBB) {
    998       // If the shift and truncate instruction are in the same BB. The use of
    999       // the truncate(TruncUse) may still introduce another truncate if not
   1000       // legal. In this case, we would like to sink both shift and truncate
   1001       // instruction to the BB of TruncUse.
   1002       // for example:
   1003       // BB1:
   1004       // i64 shift.result = lshr i64 opnd, imm
   1005       // trunc.result = trunc shift.result to i16
   1006       //
   1007       // BB2:
   1008       //   ----> We will have an implicit truncate here if the architecture does
   1009       //   not have i16 compare.
   1010       // cmp i16 trunc.result, opnd2
   1011       //
   1012       if (isa<TruncInst>(User) && shiftIsLegal
   1013           // If the type of the truncate is legal, no trucate will be
   1014           // introduced in other basic blocks.
   1015           && (!TLI.isTypeLegal(TLI.getValueType(User->getType()))))
   1016         MadeChange =
   1017             SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI);
   1018 
   1019       continue;
   1020     }
   1021     // If we have already inserted a shift into this block, use it.
   1022     BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
   1023 
   1024     if (!InsertedShift) {
   1025       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
   1026 
   1027       if (ShiftI->getOpcode() == Instruction::AShr)
   1028         InsertedShift =
   1029             BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
   1030       else
   1031         InsertedShift =
   1032             BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
   1033 
   1034       MadeChange = true;
   1035     }
   1036 
   1037     // Replace a use of the shift with a use of the new shift.
   1038     TheUse = InsertedShift;
   1039   }
   1040 
   1041   // If we removed all uses, nuke the shift.
   1042   if (ShiftI->use_empty())
   1043     ShiftI->eraseFromParent();
   1044 
   1045   return MadeChange;
   1046 }
   1047 
   1048 //  ScalarizeMaskedLoad() translates masked load intrinsic, like
   1049 // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
   1050 //                               <16 x i1> %mask, <16 x i32> %passthru)
   1051 // to a chain of basic blocks, whith loading element one-by-one if
   1052 // the appropriate mask bit is set
   1053 //
   1054 //  %1 = bitcast i8* %addr to i32*
   1055 //  %2 = extractelement <16 x i1> %mask, i32 0
   1056 //  %3 = icmp eq i1 %2, true
   1057 //  br i1 %3, label %cond.load, label %else
   1058 //
   1059 //cond.load:                                        ; preds = %0
   1060 //  %4 = getelementptr i32* %1, i32 0
   1061 //  %5 = load i32* %4
   1062 //  %6 = insertelement <16 x i32> undef, i32 %5, i32 0
   1063 //  br label %else
   1064 //
   1065 //else:                                             ; preds = %0, %cond.load
   1066 //  %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ]
   1067 //  %7 = extractelement <16 x i1> %mask, i32 1
   1068 //  %8 = icmp eq i1 %7, true
   1069 //  br i1 %8, label %cond.load1, label %else2
   1070 //
   1071 //cond.load1:                                       ; preds = %else
   1072 //  %9 = getelementptr i32* %1, i32 1
   1073 //  %10 = load i32* %9
   1074 //  %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1
   1075 //  br label %else2
   1076 //
   1077 //else2:                                            ; preds = %else, %cond.load1
   1078 //  %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
   1079 //  %12 = extractelement <16 x i1> %mask, i32 2
   1080 //  %13 = icmp eq i1 %12, true
   1081 //  br i1 %13, label %cond.load4, label %else5
   1082 //
   1083 static void ScalarizeMaskedLoad(CallInst *CI) {
   1084   Value *Ptr  = CI->getArgOperand(0);
   1085   Value *Src0 = CI->getArgOperand(3);
   1086   Value *Mask = CI->getArgOperand(2);
   1087   VectorType *VecType = dyn_cast<VectorType>(CI->getType());
   1088   Type *EltTy = VecType->getElementType();
   1089 
   1090   assert(VecType && "Unexpected return type of masked load intrinsic");
   1091 
   1092   IRBuilder<> Builder(CI->getContext());
   1093   Instruction *InsertPt = CI;
   1094   BasicBlock *IfBlock = CI->getParent();
   1095   BasicBlock *CondBlock = nullptr;
   1096   BasicBlock *PrevIfBlock = CI->getParent();
   1097   Builder.SetInsertPoint(InsertPt);
   1098 
   1099   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
   1100 
   1101   // Bitcast %addr fron i8* to EltTy*
   1102   Type *NewPtrType =
   1103     EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
   1104   Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
   1105   Value *UndefVal = UndefValue::get(VecType);
   1106 
   1107   // The result vector
   1108   Value *VResult = UndefVal;
   1109 
   1110   PHINode *Phi = nullptr;
   1111   Value *PrevPhi = UndefVal;
   1112 
   1113   unsigned VectorWidth = VecType->getNumElements();
   1114   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
   1115 
   1116     // Fill the "else" block, created in the previous iteration
   1117     //
   1118     //  %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
   1119     //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
   1120     //  %to_load = icmp eq i1 %mask_1, true
   1121     //  br i1 %to_load, label %cond.load, label %else
   1122     //
   1123     if (Idx > 0) {
   1124       Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
   1125       Phi->addIncoming(VResult, CondBlock);
   1126       Phi->addIncoming(PrevPhi, PrevIfBlock);
   1127       PrevPhi = Phi;
   1128       VResult = Phi;
   1129     }
   1130 
   1131     Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
   1132     Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
   1133                                     ConstantInt::get(Predicate->getType(), 1));
   1134 
   1135     // Create "cond" block
   1136     //
   1137     //  %EltAddr = getelementptr i32* %1, i32 0
   1138     //  %Elt = load i32* %EltAddr
   1139     //  VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
   1140     //
   1141     CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
   1142     Builder.SetInsertPoint(InsertPt);
   1143 
   1144     Value *Gep =
   1145         Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
   1146     LoadInst* Load = Builder.CreateLoad(Gep, false);
   1147     VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
   1148 
   1149     // Create "else" block, fill it in the next iteration
   1150     BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
   1151     Builder.SetInsertPoint(InsertPt);
   1152     Instruction *OldBr = IfBlock->getTerminator();
   1153     BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
   1154     OldBr->eraseFromParent();
   1155     PrevIfBlock = IfBlock;
   1156     IfBlock = NewIfBlock;
   1157   }
   1158 
   1159   Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
   1160   Phi->addIncoming(VResult, CondBlock);
   1161   Phi->addIncoming(PrevPhi, PrevIfBlock);
   1162   Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
   1163   CI->replaceAllUsesWith(NewI);
   1164   CI->eraseFromParent();
   1165 }
   1166 
   1167 //  ScalarizeMaskedStore() translates masked store intrinsic, like
   1168 // void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
   1169 //                               <16 x i1> %mask)
   1170 // to a chain of basic blocks, that stores element one-by-one if
   1171 // the appropriate mask bit is set
   1172 //
   1173 //   %1 = bitcast i8* %addr to i32*
   1174 //   %2 = extractelement <16 x i1> %mask, i32 0
   1175 //   %3 = icmp eq i1 %2, true
   1176 //   br i1 %3, label %cond.store, label %else
   1177 //
   1178 // cond.store:                                       ; preds = %0
   1179 //   %4 = extractelement <16 x i32> %val, i32 0
   1180 //   %5 = getelementptr i32* %1, i32 0
   1181 //   store i32 %4, i32* %5
   1182 //   br label %else
   1183 //
   1184 // else:                                             ; preds = %0, %cond.store
   1185 //   %6 = extractelement <16 x i1> %mask, i32 1
   1186 //   %7 = icmp eq i1 %6, true
   1187 //   br i1 %7, label %cond.store1, label %else2
   1188 //
   1189 // cond.store1:                                      ; preds = %else
   1190 //   %8 = extractelement <16 x i32> %val, i32 1
   1191 //   %9 = getelementptr i32* %1, i32 1
   1192 //   store i32 %8, i32* %9
   1193 //   br label %else2
   1194 //   . . .
   1195 static void ScalarizeMaskedStore(CallInst *CI) {
   1196   Value *Ptr  = CI->getArgOperand(1);
   1197   Value *Src = CI->getArgOperand(0);
   1198   Value *Mask = CI->getArgOperand(3);
   1199 
   1200   VectorType *VecType = dyn_cast<VectorType>(Src->getType());
   1201   Type *EltTy = VecType->getElementType();
   1202 
   1203   assert(VecType && "Unexpected data type in masked store intrinsic");
   1204 
   1205   IRBuilder<> Builder(CI->getContext());
   1206   Instruction *InsertPt = CI;
   1207   BasicBlock *IfBlock = CI->getParent();
   1208   Builder.SetInsertPoint(InsertPt);
   1209   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
   1210 
   1211   // Bitcast %addr fron i8* to EltTy*
   1212   Type *NewPtrType =
   1213     EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
   1214   Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
   1215 
   1216   unsigned VectorWidth = VecType->getNumElements();
   1217   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
   1218 
   1219     // Fill the "else" block, created in the previous iteration
   1220     //
   1221     //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
   1222     //  %to_store = icmp eq i1 %mask_1, true
   1223     //  br i1 %to_load, label %cond.store, label %else
   1224     //
   1225     Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
   1226     Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
   1227                                     ConstantInt::get(Predicate->getType(), 1));
   1228 
   1229     // Create "cond" block
   1230     //
   1231     //  %OneElt = extractelement <16 x i32> %Src, i32 Idx
   1232     //  %EltAddr = getelementptr i32* %1, i32 0
   1233     //  %store i32 %OneElt, i32* %EltAddr
   1234     //
   1235     BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
   1236     Builder.SetInsertPoint(InsertPt);
   1237 
   1238     Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
   1239     Value *Gep =
   1240         Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
   1241     Builder.CreateStore(OneElt, Gep);
   1242 
   1243     // Create "else" block, fill it in the next iteration
   1244     BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
   1245     Builder.SetInsertPoint(InsertPt);
   1246     Instruction *OldBr = IfBlock->getTerminator();
   1247     BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
   1248     OldBr->eraseFromParent();
   1249     IfBlock = NewIfBlock;
   1250   }
   1251   CI->eraseFromParent();
   1252 }
   1253 
   1254 bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
   1255   BasicBlock *BB = CI->getParent();
   1256 
   1257   // Lower inline assembly if we can.
   1258   // If we found an inline asm expession, and if the target knows how to
   1259   // lower it to normal LLVM code, do so now.
   1260   if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
   1261     if (TLI->ExpandInlineAsm(CI)) {
   1262       // Avoid invalidating the iterator.
   1263       CurInstIterator = BB->begin();
   1264       // Avoid processing instructions out of order, which could cause
   1265       // reuse before a value is defined.
   1266       SunkAddrs.clear();
   1267       return true;
   1268     }
   1269     // Sink address computing for memory operands into the block.
   1270     if (OptimizeInlineAsmInst(CI))
   1271       return true;
   1272   }
   1273 
   1274   const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr;
   1275 
   1276   // Align the pointer arguments to this call if the target thinks it's a good
   1277   // idea
   1278   unsigned MinSize, PrefAlign;
   1279   if (TLI && TD && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
   1280     for (auto &Arg : CI->arg_operands()) {
   1281       // We want to align both objects whose address is used directly and
   1282       // objects whose address is used in casts and GEPs, though it only makes
   1283       // sense for GEPs if the offset is a multiple of the desired alignment and
   1284       // if size - offset meets the size threshold.
   1285       if (!Arg->getType()->isPointerTy())
   1286         continue;
   1287       APInt Offset(TD->getPointerSizeInBits(
   1288                      cast<PointerType>(Arg->getType())->getAddressSpace()), 0);
   1289       Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*TD, Offset);
   1290       uint64_t Offset2 = Offset.getLimitedValue();
   1291       if ((Offset2 & (PrefAlign-1)) != 0)
   1292         continue;
   1293       AllocaInst *AI;
   1294       if ((AI = dyn_cast<AllocaInst>(Val)) &&
   1295           AI->getAlignment() < PrefAlign &&
   1296           TD->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
   1297         AI->setAlignment(PrefAlign);
   1298       // Global variables can only be aligned if they are defined in this
   1299       // object (i.e. they are uniquely initialized in this object), and
   1300       // over-aligning global variables that have an explicit section is
   1301       // forbidden.
   1302       GlobalVariable *GV;
   1303       if ((GV = dyn_cast<GlobalVariable>(Val)) &&
   1304           GV->hasUniqueInitializer() &&
   1305           !GV->hasSection() &&
   1306           GV->getAlignment() < PrefAlign &&
   1307           TD->getTypeAllocSize(
   1308             GV->getType()->getElementType()) >= MinSize + Offset2)
   1309         GV->setAlignment(PrefAlign);
   1310     }
   1311     // If this is a memcpy (or similar) then we may be able to improve the
   1312     // alignment
   1313     if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
   1314       unsigned Align = getKnownAlignment(MI->getDest(), *TD);
   1315       if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
   1316         Align = std::min(Align, getKnownAlignment(MTI->getSource(), *TD));
   1317       if (Align > MI->getAlignment())
   1318         MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align));
   1319     }
   1320   }
   1321 
   1322   IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
   1323   if (II) {
   1324     switch (II->getIntrinsicID()) {
   1325     default: break;
   1326     case Intrinsic::objectsize: {
   1327       // Lower all uses of llvm.objectsize.*
   1328       bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
   1329       Type *ReturnTy = CI->getType();
   1330       Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
   1331 
   1332       // Substituting this can cause recursive simplifications, which can
   1333       // invalidate our iterator.  Use a WeakVH to hold onto it in case this
   1334       // happens.
   1335       WeakVH IterHandle(CurInstIterator);
   1336 
   1337       replaceAndRecursivelySimplify(CI, RetVal,
   1338                                     TLInfo, nullptr);
   1339 
   1340       // If the iterator instruction was recursively deleted, start over at the
   1341       // start of the block.
   1342       if (IterHandle != CurInstIterator) {
   1343         CurInstIterator = BB->begin();
   1344         SunkAddrs.clear();
   1345       }
   1346       return true;
   1347     }
   1348     case Intrinsic::masked_load: {
   1349       // Scalarize unsupported vector masked load
   1350       if (!TTI->isLegalMaskedLoad(CI->getType(), 1)) {
   1351         ScalarizeMaskedLoad(CI);
   1352         ModifiedDT = true;
   1353         return true;
   1354       }
   1355       return false;
   1356     }
   1357     case Intrinsic::masked_store: {
   1358       if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType(), 1)) {
   1359         ScalarizeMaskedStore(CI);
   1360         ModifiedDT = true;
   1361         return true;
   1362       }
   1363       return false;
   1364     }
   1365     }
   1366 
   1367     if (TLI) {
   1368       SmallVector<Value*, 2> PtrOps;
   1369       Type *AccessTy;
   1370       if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy))
   1371         while (!PtrOps.empty())
   1372           if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy))
   1373             return true;
   1374     }
   1375   }
   1376 
   1377   // From here on out we're working with named functions.
   1378   if (!CI->getCalledFunction()) return false;
   1379 
   1380   // Lower all default uses of _chk calls.  This is very similar
   1381   // to what InstCombineCalls does, but here we are only lowering calls
   1382   // to fortified library functions (e.g. __memcpy_chk) that have the default
   1383   // "don't know" as the objectsize.  Anything else should be left alone.
   1384   FortifiedLibCallSimplifier Simplifier(TLInfo, true);
   1385   if (Value *V = Simplifier.optimizeCall(CI)) {
   1386     CI->replaceAllUsesWith(V);
   1387     CI->eraseFromParent();
   1388     return true;
   1389   }
   1390   return false;
   1391 }
   1392 
   1393 /// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
   1394 /// instructions to the predecessor to enable tail call optimizations. The
   1395 /// case it is currently looking for is:
   1396 /// @code
   1397 /// bb0:
   1398 ///   %tmp0 = tail call i32 @f0()
   1399 ///   br label %return
   1400 /// bb1:
   1401 ///   %tmp1 = tail call i32 @f1()
   1402 ///   br label %return
   1403 /// bb2:
   1404 ///   %tmp2 = tail call i32 @f2()
   1405 ///   br label %return
   1406 /// return:
   1407 ///   %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
   1408 ///   ret i32 %retval
   1409 /// @endcode
   1410 ///
   1411 /// =>
   1412 ///
   1413 /// @code
   1414 /// bb0:
   1415 ///   %tmp0 = tail call i32 @f0()
   1416 ///   ret i32 %tmp0
   1417 /// bb1:
   1418 ///   %tmp1 = tail call i32 @f1()
   1419 ///   ret i32 %tmp1
   1420 /// bb2:
   1421 ///   %tmp2 = tail call i32 @f2()
   1422 ///   ret i32 %tmp2
   1423 /// @endcode
   1424 bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
   1425   if (!TLI)
   1426     return false;
   1427 
   1428   ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
   1429   if (!RI)
   1430     return false;
   1431 
   1432   PHINode *PN = nullptr;
   1433   BitCastInst *BCI = nullptr;
   1434   Value *V = RI->getReturnValue();
   1435   if (V) {
   1436     BCI = dyn_cast<BitCastInst>(V);
   1437     if (BCI)
   1438       V = BCI->getOperand(0);
   1439 
   1440     PN = dyn_cast<PHINode>(V);
   1441     if (!PN)
   1442       return false;
   1443   }
   1444 
   1445   if (PN && PN->getParent() != BB)
   1446     return false;
   1447 
   1448   // It's not safe to eliminate the sign / zero extension of the return value.
   1449   // See llvm::isInTailCallPosition().
   1450   const Function *F = BB->getParent();
   1451   AttributeSet CallerAttrs = F->getAttributes();
   1452   if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
   1453       CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
   1454     return false;
   1455 
   1456   // Make sure there are no instructions between the PHI and return, or that the
   1457   // return is the first instruction in the block.
   1458   if (PN) {
   1459     BasicBlock::iterator BI = BB->begin();
   1460     do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
   1461     if (&*BI == BCI)
   1462       // Also skip over the bitcast.
   1463       ++BI;
   1464     if (&*BI != RI)
   1465       return false;
   1466   } else {
   1467     BasicBlock::iterator BI = BB->begin();
   1468     while (isa<DbgInfoIntrinsic>(BI)) ++BI;
   1469     if (&*BI != RI)
   1470       return false;
   1471   }
   1472 
   1473   /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
   1474   /// call.
   1475   SmallVector<CallInst*, 4> TailCalls;
   1476   if (PN) {
   1477     for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
   1478       CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
   1479       // Make sure the phi value is indeed produced by the tail call.
   1480       if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
   1481           TLI->mayBeEmittedAsTailCall(CI))
   1482         TailCalls.push_back(CI);
   1483     }
   1484   } else {
   1485     SmallPtrSet<BasicBlock*, 4> VisitedBBs;
   1486     for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
   1487       if (!VisitedBBs.insert(*PI).second)
   1488         continue;
   1489 
   1490       BasicBlock::InstListType &InstList = (*PI)->getInstList();
   1491       BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
   1492       BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
   1493       do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
   1494       if (RI == RE)
   1495         continue;
   1496 
   1497       CallInst *CI = dyn_cast<CallInst>(&*RI);
   1498       if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
   1499         TailCalls.push_back(CI);
   1500     }
   1501   }
   1502 
   1503   bool Changed = false;
   1504   for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
   1505     CallInst *CI = TailCalls[i];
   1506     CallSite CS(CI);
   1507 
   1508     // Conservatively require the attributes of the call to match those of the
   1509     // return. Ignore noalias because it doesn't affect the call sequence.
   1510     AttributeSet CalleeAttrs = CS.getAttributes();
   1511     if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
   1512           removeAttribute(Attribute::NoAlias) !=
   1513         AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
   1514           removeAttribute(Attribute::NoAlias))
   1515       continue;
   1516 
   1517     // Make sure the call instruction is followed by an unconditional branch to
   1518     // the return block.
   1519     BasicBlock *CallBB = CI->getParent();
   1520     BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
   1521     if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
   1522       continue;
   1523 
   1524     // Duplicate the return into CallBB.
   1525     (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
   1526     ModifiedDT = Changed = true;
   1527     ++NumRetsDup;
   1528   }
   1529 
   1530   // If we eliminated all predecessors of the block, delete the block now.
   1531   if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
   1532     BB->eraseFromParent();
   1533 
   1534   return Changed;
   1535 }
   1536 
   1537 //===----------------------------------------------------------------------===//
   1538 // Memory Optimization
   1539 //===----------------------------------------------------------------------===//
   1540 
   1541 namespace {
   1542 
   1543 /// ExtAddrMode - This is an extended version of TargetLowering::AddrMode
   1544 /// which holds actual Value*'s for register values.
   1545 struct ExtAddrMode : public TargetLowering::AddrMode {
   1546   Value *BaseReg;
   1547   Value *ScaledReg;
   1548   ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {}
   1549   void print(raw_ostream &OS) const;
   1550   void dump() const;
   1551 
   1552   bool operator==(const ExtAddrMode& O) const {
   1553     return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
   1554            (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
   1555            (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale);
   1556   }
   1557 };
   1558 
   1559 #ifndef NDEBUG
   1560 static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
   1561   AM.print(OS);
   1562   return OS;
   1563 }
   1564 #endif
   1565 
   1566 void ExtAddrMode::print(raw_ostream &OS) const {
   1567   bool NeedPlus = false;
   1568   OS << "[";
   1569   if (BaseGV) {
   1570     OS << (NeedPlus ? " + " : "")
   1571        << "GV:";
   1572     BaseGV->printAsOperand(OS, /*PrintType=*/false);
   1573     NeedPlus = true;
   1574   }
   1575 
   1576   if (BaseOffs) {
   1577     OS << (NeedPlus ? " + " : "")
   1578        << BaseOffs;
   1579     NeedPlus = true;
   1580   }
   1581 
   1582   if (BaseReg) {
   1583     OS << (NeedPlus ? " + " : "")
   1584        << "Base:";
   1585     BaseReg->printAsOperand(OS, /*PrintType=*/false);
   1586     NeedPlus = true;
   1587   }
   1588   if (Scale) {
   1589     OS << (NeedPlus ? " + " : "")
   1590        << Scale << "*";
   1591     ScaledReg->printAsOperand(OS, /*PrintType=*/false);
   1592   }
   1593 
   1594   OS << ']';
   1595 }
   1596 
   1597 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   1598 void ExtAddrMode::dump() const {
   1599   print(dbgs());
   1600   dbgs() << '\n';
   1601 }
   1602 #endif
   1603 
   1604 /// \brief This class provides transaction based operation on the IR.
   1605 /// Every change made through this class is recorded in the internal state and
   1606 /// can be undone (rollback) until commit is called.
   1607 class TypePromotionTransaction {
   1608 
   1609   /// \brief This represents the common interface of the individual transaction.
   1610   /// Each class implements the logic for doing one specific modification on
   1611   /// the IR via the TypePromotionTransaction.
   1612   class TypePromotionAction {
   1613   protected:
   1614     /// The Instruction modified.
   1615     Instruction *Inst;
   1616 
   1617   public:
   1618     /// \brief Constructor of the action.
   1619     /// The constructor performs the related action on the IR.
   1620     TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
   1621 
   1622     virtual ~TypePromotionAction() {}
   1623 
   1624     /// \brief Undo the modification done by this action.
   1625     /// When this method is called, the IR must be in the same state as it was
   1626     /// before this action was applied.
   1627     /// \pre Undoing the action works if and only if the IR is in the exact same
   1628     /// state as it was directly after this action was applied.
   1629     virtual void undo() = 0;
   1630 
   1631     /// \brief Advocate every change made by this action.
   1632     /// When the results on the IR of the action are to be kept, it is important
   1633     /// to call this function, otherwise hidden information may be kept forever.
   1634     virtual void commit() {
   1635       // Nothing to be done, this action is not doing anything.
   1636     }
   1637   };
   1638 
   1639   /// \brief Utility to remember the position of an instruction.
   1640   class InsertionHandler {
   1641     /// Position of an instruction.
   1642     /// Either an instruction:
   1643     /// - Is the first in a basic block: BB is used.
   1644     /// - Has a previous instructon: PrevInst is used.
   1645     union {
   1646       Instruction *PrevInst;
   1647       BasicBlock *BB;
   1648     } Point;
   1649     /// Remember whether or not the instruction had a previous instruction.
   1650     bool HasPrevInstruction;
   1651 
   1652   public:
   1653     /// \brief Record the position of \p Inst.
   1654     InsertionHandler(Instruction *Inst) {
   1655       BasicBlock::iterator It = Inst;
   1656       HasPrevInstruction = (It != (Inst->getParent()->begin()));
   1657       if (HasPrevInstruction)
   1658         Point.PrevInst = --It;
   1659       else
   1660         Point.BB = Inst->getParent();
   1661     }
   1662 
   1663     /// \brief Insert \p Inst at the recorded position.
   1664     void insert(Instruction *Inst) {
   1665       if (HasPrevInstruction) {
   1666         if (Inst->getParent())
   1667           Inst->removeFromParent();
   1668         Inst->insertAfter(Point.PrevInst);
   1669       } else {
   1670         Instruction *Position = Point.BB->getFirstInsertionPt();
   1671         if (Inst->getParent())
   1672           Inst->moveBefore(Position);
   1673         else
   1674           Inst->insertBefore(Position);
   1675       }
   1676     }
   1677   };
   1678 
   1679   /// \brief Move an instruction before another.
   1680   class InstructionMoveBefore : public TypePromotionAction {
   1681     /// Original position of the instruction.
   1682     InsertionHandler Position;
   1683 
   1684   public:
   1685     /// \brief Move \p Inst before \p Before.
   1686     InstructionMoveBefore(Instruction *Inst, Instruction *Before)
   1687         : TypePromotionAction(Inst), Position(Inst) {
   1688       DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before << "\n");
   1689       Inst->moveBefore(Before);
   1690     }
   1691 
   1692     /// \brief Move the instruction back to its original position.
   1693     void undo() override {
   1694       DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
   1695       Position.insert(Inst);
   1696     }
   1697   };
   1698 
   1699   /// \brief Set the operand of an instruction with a new value.
   1700   class OperandSetter : public TypePromotionAction {
   1701     /// Original operand of the instruction.
   1702     Value *Origin;
   1703     /// Index of the modified instruction.
   1704     unsigned Idx;
   1705 
   1706   public:
   1707     /// \brief Set \p Idx operand of \p Inst with \p NewVal.
   1708     OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
   1709         : TypePromotionAction(Inst), Idx(Idx) {
   1710       DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
   1711                    << "for:" << *Inst << "\n"
   1712                    << "with:" << *NewVal << "\n");
   1713       Origin = Inst->getOperand(Idx);
   1714       Inst->setOperand(Idx, NewVal);
   1715     }
   1716 
   1717     /// \brief Restore the original value of the instruction.
   1718     void undo() override {
   1719       DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
   1720                    << "for: " << *Inst << "\n"
   1721                    << "with: " << *Origin << "\n");
   1722       Inst->setOperand(Idx, Origin);
   1723     }
   1724   };
   1725 
   1726   /// \brief Hide the operands of an instruction.
   1727   /// Do as if this instruction was not using any of its operands.
   1728   class OperandsHider : public TypePromotionAction {
   1729     /// The list of original operands.
   1730     SmallVector<Value *, 4> OriginalValues;
   1731 
   1732   public:
   1733     /// \brief Remove \p Inst from the uses of the operands of \p Inst.
   1734     OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
   1735       DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
   1736       unsigned NumOpnds = Inst->getNumOperands();
   1737       OriginalValues.reserve(NumOpnds);
   1738       for (unsigned It = 0; It < NumOpnds; ++It) {
   1739         // Save the current operand.
   1740         Value *Val = Inst->getOperand(It);
   1741         OriginalValues.push_back(Val);
   1742         // Set a dummy one.
   1743         // We could use OperandSetter here, but that would implied an overhead
   1744         // that we are not willing to pay.
   1745         Inst->setOperand(It, UndefValue::get(Val->getType()));
   1746       }
   1747     }
   1748 
   1749     /// \brief Restore the original list of uses.
   1750     void undo() override {
   1751       DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
   1752       for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
   1753         Inst->setOperand(It, OriginalValues[It]);
   1754     }
   1755   };
   1756 
   1757   /// \brief Build a truncate instruction.
   1758   class TruncBuilder : public TypePromotionAction {
   1759     Value *Val;
   1760   public:
   1761     /// \brief Build a truncate instruction of \p Opnd producing a \p Ty
   1762     /// result.
   1763     /// trunc Opnd to Ty.
   1764     TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
   1765       IRBuilder<> Builder(Opnd);
   1766       Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
   1767       DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
   1768     }
   1769 
   1770     /// \brief Get the built value.
   1771     Value *getBuiltValue() { return Val; }
   1772 
   1773     /// \brief Remove the built instruction.
   1774     void undo() override {
   1775       DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
   1776       if (Instruction *IVal = dyn_cast<Instruction>(Val))
   1777         IVal->eraseFromParent();
   1778     }
   1779   };
   1780 
   1781   /// \brief Build a sign extension instruction.
   1782   class SExtBuilder : public TypePromotionAction {
   1783     Value *Val;
   1784   public:
   1785     /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty
   1786     /// result.
   1787     /// sext Opnd to Ty.
   1788     SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
   1789         : TypePromotionAction(InsertPt) {
   1790       IRBuilder<> Builder(InsertPt);
   1791       Val = Builder.CreateSExt(Opnd, Ty, "promoted");
   1792       DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
   1793     }
   1794 
   1795     /// \brief Get the built value.
   1796     Value *getBuiltValue() { return Val; }
   1797 
   1798     /// \brief Remove the built instruction.
   1799     void undo() override {
   1800       DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
   1801       if (Instruction *IVal = dyn_cast<Instruction>(Val))
   1802         IVal->eraseFromParent();
   1803     }
   1804   };
   1805 
   1806   /// \brief Build a zero extension instruction.
   1807   class ZExtBuilder : public TypePromotionAction {
   1808     Value *Val;
   1809   public:
   1810     /// \brief Build a zero extension instruction of \p Opnd producing a \p Ty
   1811     /// result.
   1812     /// zext Opnd to Ty.
   1813     ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
   1814         : TypePromotionAction(InsertPt) {
   1815       IRBuilder<> Builder(InsertPt);
   1816       Val = Builder.CreateZExt(Opnd, Ty, "promoted");
   1817       DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
   1818     }
   1819 
   1820     /// \brief Get the built value.
   1821     Value *getBuiltValue() { return Val; }
   1822 
   1823     /// \brief Remove the built instruction.
   1824     void undo() override {
   1825       DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
   1826       if (Instruction *IVal = dyn_cast<Instruction>(Val))
   1827         IVal->eraseFromParent();
   1828     }
   1829   };
   1830 
   1831   /// \brief Mutate an instruction to another type.
   1832   class TypeMutator : public TypePromotionAction {
   1833     /// Record the original type.
   1834     Type *OrigTy;
   1835 
   1836   public:
   1837     /// \brief Mutate the type of \p Inst into \p NewTy.
   1838     TypeMutator(Instruction *Inst, Type *NewTy)
   1839         : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
   1840       DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
   1841                    << "\n");
   1842       Inst->mutateType(NewTy);
   1843     }
   1844 
   1845     /// \brief Mutate the instruction back to its original type.
   1846     void undo() override {
   1847       DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
   1848                    << "\n");
   1849       Inst->mutateType(OrigTy);
   1850     }
   1851   };
   1852 
   1853   /// \brief Replace the uses of an instruction by another instruction.
   1854   class UsesReplacer : public TypePromotionAction {
   1855     /// Helper structure to keep track of the replaced uses.
   1856     struct InstructionAndIdx {
   1857       /// The instruction using the instruction.
   1858       Instruction *Inst;
   1859       /// The index where this instruction is used for Inst.
   1860       unsigned Idx;
   1861       InstructionAndIdx(Instruction *Inst, unsigned Idx)
   1862           : Inst(Inst), Idx(Idx) {}
   1863     };
   1864 
   1865     /// Keep track of the original uses (pair Instruction, Index).
   1866     SmallVector<InstructionAndIdx, 4> OriginalUses;
   1867     typedef SmallVectorImpl<InstructionAndIdx>::iterator use_iterator;
   1868 
   1869   public:
   1870     /// \brief Replace all the use of \p Inst by \p New.
   1871     UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) {
   1872       DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
   1873                    << "\n");
   1874       // Record the original uses.
   1875       for (Use &U : Inst->uses()) {
   1876         Instruction *UserI = cast<Instruction>(U.getUser());
   1877         OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
   1878       }
   1879       // Now, we can replace the uses.
   1880       Inst->replaceAllUsesWith(New);
   1881     }
   1882 
   1883     /// \brief Reassign the original uses of Inst to Inst.
   1884     void undo() override {
   1885       DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
   1886       for (use_iterator UseIt = OriginalUses.begin(),
   1887                         EndIt = OriginalUses.end();
   1888            UseIt != EndIt; ++UseIt) {
   1889         UseIt->Inst->setOperand(UseIt->Idx, Inst);
   1890       }
   1891     }
   1892   };
   1893 
   1894   /// \brief Remove an instruction from the IR.
   1895   class InstructionRemover : public TypePromotionAction {
   1896     /// Original position of the instruction.
   1897     InsertionHandler Inserter;
   1898     /// Helper structure to hide all the link to the instruction. In other
   1899     /// words, this helps to do as if the instruction was removed.
   1900     OperandsHider Hider;
   1901     /// Keep track of the uses replaced, if any.
   1902     UsesReplacer *Replacer;
   1903 
   1904   public:
   1905     /// \brief Remove all reference of \p Inst and optinally replace all its
   1906     /// uses with New.
   1907     /// \pre If !Inst->use_empty(), then New != nullptr
   1908     InstructionRemover(Instruction *Inst, Value *New = nullptr)
   1909         : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
   1910           Replacer(nullptr) {
   1911       if (New)
   1912         Replacer = new UsesReplacer(Inst, New);
   1913       DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
   1914       Inst->removeFromParent();
   1915     }
   1916 
   1917     ~InstructionRemover() override { delete Replacer; }
   1918 
   1919     /// \brief Really remove the instruction.
   1920     void commit() override { delete Inst; }
   1921 
   1922     /// \brief Resurrect the instruction and reassign it to the proper uses if
   1923     /// new value was provided when build this action.
   1924     void undo() override {
   1925       DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
   1926       Inserter.insert(Inst);
   1927       if (Replacer)
   1928         Replacer->undo();
   1929       Hider.undo();
   1930     }
   1931   };
   1932 
   1933 public:
   1934   /// Restoration point.
   1935   /// The restoration point is a pointer to an action instead of an iterator
   1936   /// because the iterator may be invalidated but not the pointer.
   1937   typedef const TypePromotionAction *ConstRestorationPt;
   1938   /// Advocate every changes made in that transaction.
   1939   void commit();
   1940   /// Undo all the changes made after the given point.
   1941   void rollback(ConstRestorationPt Point);
   1942   /// Get the current restoration point.
   1943   ConstRestorationPt getRestorationPoint() const;
   1944 
   1945   /// \name API for IR modification with state keeping to support rollback.
   1946   /// @{
   1947   /// Same as Instruction::setOperand.
   1948   void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
   1949   /// Same as Instruction::eraseFromParent.
   1950   void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
   1951   /// Same as Value::replaceAllUsesWith.
   1952   void replaceAllUsesWith(Instruction *Inst, Value *New);
   1953   /// Same as Value::mutateType.
   1954   void mutateType(Instruction *Inst, Type *NewTy);
   1955   /// Same as IRBuilder::createTrunc.
   1956   Value *createTrunc(Instruction *Opnd, Type *Ty);
   1957   /// Same as IRBuilder::createSExt.
   1958   Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
   1959   /// Same as IRBuilder::createZExt.
   1960   Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
   1961   /// Same as Instruction::moveBefore.
   1962   void moveBefore(Instruction *Inst, Instruction *Before);
   1963   /// @}
   1964 
   1965 private:
   1966   /// The ordered list of actions made so far.
   1967   SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
   1968   typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt;
   1969 };
   1970 
   1971 void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
   1972                                           Value *NewVal) {
   1973   Actions.push_back(
   1974       make_unique<TypePromotionTransaction::OperandSetter>(Inst, Idx, NewVal));
   1975 }
   1976 
   1977 void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
   1978                                                 Value *NewVal) {
   1979   Actions.push_back(
   1980       make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal));
   1981 }
   1982 
   1983 void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
   1984                                                   Value *New) {
   1985   Actions.push_back(make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
   1986 }
   1987 
   1988 void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
   1989   Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
   1990 }
   1991 
   1992 Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,
   1993                                              Type *Ty) {
   1994   std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
   1995   Value *Val = Ptr->getBuiltValue();
   1996   Actions.push_back(std::move(Ptr));
   1997   return Val;
   1998 }
   1999 
   2000 Value *TypePromotionTransaction::createSExt(Instruction *Inst,
   2001                                             Value *Opnd, Type *Ty) {
   2002   std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
   2003   Value *Val = Ptr->getBuiltValue();
   2004   Actions.push_back(std::move(Ptr));
   2005   return Val;
   2006 }
   2007 
   2008 Value *TypePromotionTransaction::createZExt(Instruction *Inst,
   2009                                             Value *Opnd, Type *Ty) {
   2010   std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
   2011   Value *Val = Ptr->getBuiltValue();
   2012   Actions.push_back(std::move(Ptr));
   2013   return Val;
   2014 }
   2015 
   2016 void TypePromotionTransaction::moveBefore(Instruction *Inst,
   2017                                           Instruction *Before) {
   2018   Actions.push_back(
   2019       make_unique<TypePromotionTransaction::InstructionMoveBefore>(Inst, Before));
   2020 }
   2021 
   2022 TypePromotionTransaction::ConstRestorationPt
   2023 TypePromotionTransaction::getRestorationPoint() const {
   2024   return !Actions.empty() ? Actions.back().get() : nullptr;
   2025 }
   2026 
   2027 void TypePromotionTransaction::commit() {
   2028   for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt;
   2029        ++It)
   2030     (*It)->commit();
   2031   Actions.clear();
   2032 }
   2033 
   2034 void TypePromotionTransaction::rollback(
   2035     TypePromotionTransaction::ConstRestorationPt Point) {
   2036   while (!Actions.empty() && Point != Actions.back().get()) {
   2037     std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
   2038     Curr->undo();
   2039   }
   2040 }
   2041 
   2042 /// \brief A helper class for matching addressing modes.
   2043 ///
   2044 /// This encapsulates the logic for matching the target-legal addressing modes.
   2045 class AddressingModeMatcher {
   2046   SmallVectorImpl<Instruction*> &AddrModeInsts;
   2047   const TargetMachine &TM;
   2048   const TargetLowering &TLI;
   2049 
   2050   /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
   2051   /// the memory instruction that we're computing this address for.
   2052   Type *AccessTy;
   2053   Instruction *MemoryInst;
   2054 
   2055   /// AddrMode - This is the addressing mode that we're building up.  This is
   2056   /// part of the return value of this addressing mode matching stuff.
   2057   ExtAddrMode &AddrMode;
   2058 
   2059   /// The truncate instruction inserted by other CodeGenPrepare optimizations.
   2060   const SetOfInstrs &InsertedTruncs;
   2061   /// A map from the instructions to their type before promotion.
   2062   InstrToOrigTy &PromotedInsts;
   2063   /// The ongoing transaction where every action should be registered.
   2064   TypePromotionTransaction &TPT;
   2065 
   2066   /// IgnoreProfitability - This is set to true when we should not do
   2067   /// profitability checks.  When true, IsProfitableToFoldIntoAddressingMode
   2068   /// always returns true.
   2069   bool IgnoreProfitability;
   2070 
   2071   AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,
   2072                         const TargetMachine &TM, Type *AT, Instruction *MI,
   2073                         ExtAddrMode &AM, const SetOfInstrs &InsertedTruncs,
   2074                         InstrToOrigTy &PromotedInsts,
   2075                         TypePromotionTransaction &TPT)
   2076       : AddrModeInsts(AMI), TM(TM),
   2077         TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent())
   2078                  ->getTargetLowering()),
   2079         AccessTy(AT), MemoryInst(MI), AddrMode(AM),
   2080         InsertedTruncs(InsertedTruncs), PromotedInsts(PromotedInsts), TPT(TPT) {
   2081     IgnoreProfitability = false;
   2082   }
   2083 public:
   2084 
   2085   /// Match - Find the maximal addressing mode that a load/store of V can fold,
   2086   /// give an access type of AccessTy.  This returns a list of involved
   2087   /// instructions in AddrModeInsts.
   2088   /// \p InsertedTruncs The truncate instruction inserted by other
   2089   /// CodeGenPrepare
   2090   /// optimizations.
   2091   /// \p PromotedInsts maps the instructions to their type before promotion.
   2092   /// \p The ongoing transaction where every action should be registered.
   2093   static ExtAddrMode Match(Value *V, Type *AccessTy,
   2094                            Instruction *MemoryInst,
   2095                            SmallVectorImpl<Instruction*> &AddrModeInsts,
   2096                            const TargetMachine &TM,
   2097                            const SetOfInstrs &InsertedTruncs,
   2098                            InstrToOrigTy &PromotedInsts,
   2099                            TypePromotionTransaction &TPT) {
   2100     ExtAddrMode Result;
   2101 
   2102     bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy,
   2103                                          MemoryInst, Result, InsertedTruncs,
   2104                                          PromotedInsts, TPT).MatchAddr(V, 0);
   2105     (void)Success; assert(Success && "Couldn't select *anything*?");
   2106     return Result;
   2107   }
   2108 private:
   2109   bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
   2110   bool MatchAddr(Value *V, unsigned Depth);
   2111   bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth,
   2112                           bool *MovedAway = nullptr);
   2113   bool IsProfitableToFoldIntoAddressingMode(Instruction *I,
   2114                                             ExtAddrMode &AMBefore,
   2115                                             ExtAddrMode &AMAfter);
   2116   bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
   2117   bool IsPromotionProfitable(unsigned NewCost, unsigned OldCost,
   2118                              Value *PromotedOperand) const;
   2119 };
   2120 
   2121 /// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
   2122 /// Return true and update AddrMode if this addr mode is legal for the target,
   2123 /// false if not.
   2124 bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
   2125                                              unsigned Depth) {
   2126   // If Scale is 1, then this is the same as adding ScaleReg to the addressing
   2127   // mode.  Just process that directly.
   2128   if (Scale == 1)
   2129     return MatchAddr(ScaleReg, Depth);
   2130 
   2131   // If the scale is 0, it takes nothing to add this.
   2132   if (Scale == 0)
   2133     return true;
   2134 
   2135   // If we already have a scale of this value, we can add to it, otherwise, we
   2136   // need an available scale field.
   2137   if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
   2138     return false;
   2139 
   2140   ExtAddrMode TestAddrMode = AddrMode;
   2141 
   2142   // Add scale to turn X*4+X*3 -> X*7.  This could also do things like
   2143   // [A+B + A*7] -> [B+A*8].
   2144   TestAddrMode.Scale += Scale;
   2145   TestAddrMode.ScaledReg = ScaleReg;
   2146 
   2147   // If the new address isn't legal, bail out.
   2148   if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
   2149     return false;
   2150 
   2151   // It was legal, so commit it.
   2152   AddrMode = TestAddrMode;
   2153 
   2154   // Okay, we decided that we can add ScaleReg+Scale to AddrMode.  Check now
   2155   // to see if ScaleReg is actually X+C.  If so, we can turn this into adding
   2156   // X*Scale + C*Scale to addr mode.
   2157   ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
   2158   if (isa<Instruction>(ScaleReg) &&  // not a constant expr.
   2159       match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
   2160     TestAddrMode.ScaledReg = AddLHS;
   2161     TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
   2162 
   2163     // If this addressing mode is legal, commit it and remember that we folded
   2164     // this instruction.
   2165     if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
   2166       AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
   2167       AddrMode = TestAddrMode;
   2168       return true;
   2169     }
   2170   }
   2171 
   2172   // Otherwise, not (x+c)*scale, just return what we have.
   2173   return true;
   2174 }
   2175 
   2176 /// MightBeFoldableInst - This is a little filter, which returns true if an
   2177 /// addressing computation involving I might be folded into a load/store
   2178 /// accessing it.  This doesn't need to be perfect, but needs to accept at least
   2179 /// the set of instructions that MatchOperationAddr can.
   2180 static bool MightBeFoldableInst(Instruction *I) {
   2181   switch (I->getOpcode()) {
   2182   case Instruction::BitCast:
   2183   case Instruction::AddrSpaceCast:
   2184     // Don't touch identity bitcasts.
   2185     if (I->getType() == I->getOperand(0)->getType())
   2186       return false;
   2187     return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
   2188   case Instruction::PtrToInt:
   2189     // PtrToInt is always a noop, as we know that the int type is pointer sized.
   2190     return true;
   2191   case Instruction::IntToPtr:
   2192     // We know the input is intptr_t, so this is foldable.
   2193     return true;
   2194   case Instruction::Add:
   2195     return true;
   2196   case Instruction::Mul:
   2197   case Instruction::Shl:
   2198     // Can only handle X*C and X << C.
   2199     return isa<ConstantInt>(I->getOperand(1));
   2200   case Instruction::GetElementPtr:
   2201     return true;
   2202   default:
   2203     return false;
   2204   }
   2205 }
   2206 
   2207 /// \brief Check whether or not \p Val is a legal instruction for \p TLI.
   2208 /// \note \p Val is assumed to be the product of some type promotion.
   2209 /// Therefore if \p Val has an undefined state in \p TLI, this is assumed
   2210 /// to be legal, as the non-promoted value would have had the same state.
   2211 static bool isPromotedInstructionLegal(const TargetLowering &TLI, Value *Val) {
   2212   Instruction *PromotedInst = dyn_cast<Instruction>(Val);
   2213   if (!PromotedInst)
   2214     return false;
   2215   int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
   2216   // If the ISDOpcode is undefined, it was undefined before the promotion.
   2217   if (!ISDOpcode)
   2218     return true;
   2219   // Otherwise, check if the promoted instruction is legal or not.
   2220   return TLI.isOperationLegalOrCustom(
   2221       ISDOpcode, TLI.getValueType(PromotedInst->getType()));
   2222 }
   2223 
   2224 /// \brief Hepler class to perform type promotion.
   2225 class TypePromotionHelper {
   2226   /// \brief Utility function to check whether or not a sign or zero extension
   2227   /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
   2228   /// either using the operands of \p Inst or promoting \p Inst.
   2229   /// The type of the extension is defined by \p IsSExt.
   2230   /// In other words, check if:
   2231   /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
   2232   /// #1 Promotion applies:
   2233   /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
   2234   /// #2 Operand reuses:
   2235   /// ext opnd1 to ConsideredExtType.
   2236   /// \p PromotedInsts maps the instructions to their type before promotion.
   2237   static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
   2238                             const InstrToOrigTy &PromotedInsts, bool IsSExt);
   2239 
   2240   /// \brief Utility function to determine if \p OpIdx should be promoted when
   2241   /// promoting \p Inst.
   2242   static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
   2243     if (isa<SelectInst>(Inst) && OpIdx == 0)
   2244       return false;
   2245     return true;
   2246   }
   2247 
   2248   /// \brief Utility function to promote the operand of \p Ext when this
   2249   /// operand is a promotable trunc or sext or zext.
   2250   /// \p PromotedInsts maps the instructions to their type before promotion.
   2251   /// \p CreatedInstsCost[out] contains the cost of all instructions
   2252   /// created to promote the operand of Ext.
   2253   /// Newly added extensions are inserted in \p Exts.
   2254   /// Newly added truncates are inserted in \p Truncs.
   2255   /// Should never be called directly.
   2256   /// \return The promoted value which is used instead of Ext.
   2257   static Value *promoteOperandForTruncAndAnyExt(
   2258       Instruction *Ext, TypePromotionTransaction &TPT,
   2259       InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
   2260       SmallVectorImpl<Instruction *> *Exts,
   2261       SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
   2262 
   2263   /// \brief Utility function to promote the operand of \p Ext when this
   2264   /// operand is promotable and is not a supported trunc or sext.
   2265   /// \p PromotedInsts maps the instructions to their type before promotion.
   2266   /// \p CreatedInstsCost[out] contains the cost of all the instructions
   2267   /// created to promote the operand of Ext.
   2268   /// Newly added extensions are inserted in \p Exts.
   2269   /// Newly added truncates are inserted in \p Truncs.
   2270   /// Should never be called directly.
   2271   /// \return The promoted value which is used instead of Ext.
   2272   static Value *promoteOperandForOther(Instruction *Ext,
   2273                                        TypePromotionTransaction &TPT,
   2274                                        InstrToOrigTy &PromotedInsts,
   2275                                        unsigned &CreatedInstsCost,
   2276                                        SmallVectorImpl<Instruction *> *Exts,
   2277                                        SmallVectorImpl<Instruction *> *Truncs,
   2278                                        const TargetLowering &TLI, bool IsSExt);
   2279 
   2280   /// \see promoteOperandForOther.
   2281   static Value *signExtendOperandForOther(
   2282       Instruction *Ext, TypePromotionTransaction &TPT,
   2283       InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
   2284       SmallVectorImpl<Instruction *> *Exts,
   2285       SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
   2286     return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
   2287                                   Exts, Truncs, TLI, true);
   2288   }
   2289 
   2290   /// \see promoteOperandForOther.
   2291   static Value *zeroExtendOperandForOther(
   2292       Instruction *Ext, TypePromotionTransaction &TPT,
   2293       InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
   2294       SmallVectorImpl<Instruction *> *Exts,
   2295       SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
   2296     return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
   2297                                   Exts, Truncs, TLI, false);
   2298   }
   2299 
   2300 public:
   2301   /// Type for the utility function that promotes the operand of Ext.
   2302   typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT,
   2303                            InstrToOrigTy &PromotedInsts,
   2304                            unsigned &CreatedInstsCost,
   2305                            SmallVectorImpl<Instruction *> *Exts,
   2306                            SmallVectorImpl<Instruction *> *Truncs,
   2307                            const TargetLowering &TLI);
   2308   /// \brief Given a sign/zero extend instruction \p Ext, return the approriate
   2309   /// action to promote the operand of \p Ext instead of using Ext.
   2310   /// \return NULL if no promotable action is possible with the current
   2311   /// sign extension.
   2312   /// \p InsertedTruncs keeps track of all the truncate instructions inserted by
   2313   /// the others CodeGenPrepare optimizations. This information is important
   2314   /// because we do not want to promote these instructions as CodeGenPrepare
   2315   /// will reinsert them later. Thus creating an infinite loop: create/remove.
   2316   /// \p PromotedInsts maps the instructions to their type before promotion.
   2317   static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedTruncs,
   2318                           const TargetLowering &TLI,
   2319                           const InstrToOrigTy &PromotedInsts);
   2320 };
   2321 
   2322 bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
   2323                                         Type *ConsideredExtType,
   2324                                         const InstrToOrigTy &PromotedInsts,
   2325                                         bool IsSExt) {
   2326   // The promotion helper does not know how to deal with vector types yet.
   2327   // To be able to fix that, we would need to fix the places where we
   2328   // statically extend, e.g., constants and such.
   2329   if (Inst->getType()->isVectorTy())
   2330     return false;
   2331 
   2332   // We can always get through zext.
   2333   if (isa<ZExtInst>(Inst))
   2334     return true;
   2335 
   2336   // sext(sext) is ok too.
   2337   if (IsSExt && isa<SExtInst>(Inst))
   2338     return true;
   2339 
   2340   // We can get through binary operator, if it is legal. In other words, the
   2341   // binary operator must have a nuw or nsw flag.
   2342   const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
   2343   if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
   2344       ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
   2345        (IsSExt && BinOp->hasNoSignedWrap())))
   2346     return true;
   2347 
   2348   // Check if we can do the following simplification.
   2349   // ext(trunc(opnd)) --> ext(opnd)
   2350   if (!isa<TruncInst>(Inst))
   2351     return false;
   2352 
   2353   Value *OpndVal = Inst->getOperand(0);
   2354   // Check if we can use this operand in the extension.
   2355   // If the type is larger than the result type of the extension,
   2356   // we cannot.
   2357   if (!OpndVal->getType()->isIntegerTy() ||
   2358       OpndVal->getType()->getIntegerBitWidth() >
   2359           ConsideredExtType->getIntegerBitWidth())
   2360     return false;
   2361 
   2362   // If the operand of the truncate is not an instruction, we will not have
   2363   // any information on the dropped bits.
   2364   // (Actually we could for constant but it is not worth the extra logic).
   2365   Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
   2366   if (!Opnd)
   2367     return false;
   2368 
   2369   // Check if the source of the type is narrow enough.
   2370   // I.e., check that trunc just drops extended bits of the same kind of
   2371   // the extension.
   2372   // #1 get the type of the operand and check the kind of the extended bits.
   2373   const Type *OpndType;
   2374   InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
   2375   if (It != PromotedInsts.end() && It->second.IsSExt == IsSExt)
   2376     OpndType = It->second.Ty;
   2377   else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
   2378     OpndType = Opnd->getOperand(0)->getType();
   2379   else
   2380     return false;
   2381 
   2382   // #2 check that the truncate just drop extended bits.
   2383   if (Inst->getType()->getIntegerBitWidth() >= OpndType->getIntegerBitWidth())
   2384     return true;
   2385 
   2386   return false;
   2387 }
   2388 
   2389 TypePromotionHelper::Action TypePromotionHelper::getAction(
   2390     Instruction *Ext, const SetOfInstrs &InsertedTruncs,
   2391     const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
   2392   assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
   2393          "Unexpected instruction type");
   2394   Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
   2395   Type *ExtTy = Ext->getType();
   2396   bool IsSExt = isa<SExtInst>(Ext);
   2397   // If the operand of the extension is not an instruction, we cannot
   2398   // get through.
   2399   // If it, check we can get through.
   2400   if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
   2401     return nullptr;
   2402 
   2403   // Do not promote if the operand has been added by codegenprepare.
   2404   // Otherwise, it means we are undoing an optimization that is likely to be
   2405   // redone, thus causing potential infinite loop.
   2406   if (isa<TruncInst>(ExtOpnd) && InsertedTruncs.count(ExtOpnd))
   2407     return nullptr;
   2408 
   2409   // SExt or Trunc instructions.
   2410   // Return the related handler.
   2411   if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
   2412       isa<ZExtInst>(ExtOpnd))
   2413     return promoteOperandForTruncAndAnyExt;
   2414 
   2415   // Regular instruction.
   2416   // Abort early if we will have to insert non-free instructions.
   2417   if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
   2418     return nullptr;
   2419   return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
   2420 }
   2421 
   2422 Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
   2423     llvm::Instruction *SExt, TypePromotionTransaction &TPT,
   2424     InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
   2425     SmallVectorImpl<Instruction *> *Exts,
   2426     SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
   2427   // By construction, the operand of SExt is an instruction. Otherwise we cannot
   2428   // get through it and this method should not be called.
   2429   Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
   2430   Value *ExtVal = SExt;
   2431   bool HasMergedNonFreeExt = false;
   2432   if (isa<ZExtInst>(SExtOpnd)) {
   2433     // Replace s|zext(zext(opnd))
   2434     // => zext(opnd).
   2435     HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
   2436     Value *ZExt =
   2437         TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
   2438     TPT.replaceAllUsesWith(SExt, ZExt);
   2439     TPT.eraseInstruction(SExt);
   2440     ExtVal = ZExt;
   2441   } else {
   2442     // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
   2443     // => z|sext(opnd).
   2444     TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
   2445   }
   2446   CreatedInstsCost = 0;
   2447 
   2448   // Remove dead code.
   2449   if (SExtOpnd->use_empty())
   2450     TPT.eraseInstruction(SExtOpnd);
   2451 
   2452   // Check if the extension is still needed.
   2453   Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
   2454   if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
   2455     if (ExtInst) {
   2456       if (Exts)
   2457         Exts->push_back(ExtInst);
   2458       CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
   2459     }
   2460     return ExtVal;
   2461   }
   2462 
   2463   // At this point we have: ext ty opnd to ty.
   2464   // Reassign the uses of ExtInst to the opnd and remove ExtInst.
   2465   Value *NextVal = ExtInst->getOperand(0);
   2466   TPT.eraseInstruction(ExtInst, NextVal);
   2467   return NextVal;
   2468 }
   2469 
   2470 Value *TypePromotionHelper::promoteOperandForOther(
   2471     Instruction *Ext, TypePromotionTransaction &TPT,
   2472     InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
   2473     SmallVectorImpl<Instruction *> *Exts,
   2474     SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
   2475     bool IsSExt) {
   2476   // By construction, the operand of Ext is an instruction. Otherwise we cannot
   2477   // get through it and this method should not be called.
   2478   Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
   2479   CreatedInstsCost = 0;
   2480   if (!ExtOpnd->hasOneUse()) {
   2481     // ExtOpnd will be promoted.
   2482     // All its uses, but Ext, will need to use a truncated value of the
   2483     // promoted version.
   2484     // Create the truncate now.
   2485     Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
   2486     if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
   2487       ITrunc->removeFromParent();
   2488       // Insert it just after the definition.
   2489       ITrunc->insertAfter(ExtOpnd);
   2490       if (Truncs)
   2491         Truncs->push_back(ITrunc);
   2492     }
   2493 
   2494     TPT.replaceAllUsesWith(ExtOpnd, Trunc);
   2495     // Restore the operand of Ext (which has been replace by the previous call
   2496     // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
   2497     TPT.setOperand(Ext, 0, ExtOpnd);
   2498   }
   2499 
   2500   // Get through the Instruction:
   2501   // 1. Update its type.
   2502   // 2. Replace the uses of Ext by Inst.
   2503   // 3. Extend each operand that needs to be extended.
   2504 
   2505   // Remember the original type of the instruction before promotion.
   2506   // This is useful to know that the high bits are sign extended bits.
   2507   PromotedInsts.insert(std::pair<Instruction *, TypeIsSExt>(
   2508       ExtOpnd, TypeIsSExt(ExtOpnd->getType(), IsSExt)));
   2509   // Step #1.
   2510   TPT.mutateType(ExtOpnd, Ext->getType());
   2511   // Step #2.
   2512   TPT.replaceAllUsesWith(Ext, ExtOpnd);
   2513   // Step #3.
   2514   Instruction *ExtForOpnd = Ext;
   2515 
   2516   DEBUG(dbgs() << "Propagate Ext to operands\n");
   2517   for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
   2518        ++OpIdx) {
   2519     DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
   2520     if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
   2521         !shouldExtOperand(ExtOpnd, OpIdx)) {
   2522       DEBUG(dbgs() << "No need to propagate\n");
   2523       continue;
   2524     }
   2525     // Check if we can statically extend the operand.
   2526     Value *Opnd = ExtOpnd->getOperand(OpIdx);
   2527     if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
   2528       DEBUG(dbgs() << "Statically extend\n");
   2529       unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
   2530       APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
   2531                             : Cst->getValue().zext(BitWidth);
   2532       TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
   2533       continue;
   2534     }
   2535     // UndefValue are typed, so we have to statically sign extend them.
   2536     if (isa<UndefValue>(Opnd)) {
   2537       DEBUG(dbgs() << "Statically extend\n");
   2538       TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
   2539       continue;
   2540     }
   2541 
   2542     // Otherwise we have to explicity sign extend the operand.
   2543     // Check if Ext was reused to extend an operand.
   2544     if (!ExtForOpnd) {
   2545       // If yes, create a new one.
   2546       DEBUG(dbgs() << "More operands to ext\n");
   2547       Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
   2548         : TPT.createZExt(Ext, Opnd, Ext->getType());
   2549       if (!isa<Instruction>(ValForExtOpnd)) {
   2550         TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
   2551         continue;
   2552       }
   2553       ExtForOpnd = cast<Instruction>(ValForExtOpnd);
   2554     }
   2555     if (Exts)
   2556       Exts->push_back(ExtForOpnd);
   2557     TPT.setOperand(ExtForOpnd, 0, Opnd);
   2558 
   2559     // Move the sign extension before the insertion point.
   2560     TPT.moveBefore(ExtForOpnd, ExtOpnd);
   2561     TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
   2562     CreatedInstsCost += !TLI.isExtFree(ExtForOpnd);
   2563     // If more sext are required, new instructions will have to be created.
   2564     ExtForOpnd = nullptr;
   2565   }
   2566   if (ExtForOpnd == Ext) {
   2567     DEBUG(dbgs() << "Extension is useless now\n");
   2568     TPT.eraseInstruction(Ext);
   2569   }
   2570   return ExtOpnd;
   2571 }
   2572 
   2573 /// IsPromotionProfitable - Check whether or not promoting an instruction
   2574 /// to a wider type was profitable.
   2575 /// \p NewCost gives the cost of extension instructions created by the
   2576 /// promotion.
   2577 /// \p OldCost gives the cost of extension instructions before the promotion
   2578 /// plus the number of instructions that have been
   2579 /// matched in the addressing mode the promotion.
   2580 /// \p PromotedOperand is the value that has been promoted.
   2581 /// \return True if the promotion is profitable, false otherwise.
   2582 bool AddressingModeMatcher::IsPromotionProfitable(
   2583     unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
   2584   DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n');
   2585   // The cost of the new extensions is greater than the cost of the
   2586   // old extension plus what we folded.
   2587   // This is not profitable.
   2588   if (NewCost > OldCost)
   2589     return false;
   2590   if (NewCost < OldCost)
   2591     return true;
   2592   // The promotion is neutral but it may help folding the sign extension in
   2593   // loads for instance.
   2594   // Check that we did not create an illegal instruction.
   2595   return isPromotedInstructionLegal(TLI, PromotedOperand);
   2596 }
   2597 
   2598 /// MatchOperationAddr - Given an instruction or constant expr, see if we can
   2599 /// fold the operation into the addressing mode.  If so, update the addressing
   2600 /// mode and return true, otherwise return false without modifying AddrMode.
   2601 /// If \p MovedAway is not NULL, it contains the information of whether or
   2602 /// not AddrInst has to be folded into the addressing mode on success.
   2603 /// If \p MovedAway == true, \p AddrInst will not be part of the addressing
   2604 /// because it has been moved away.
   2605 /// Thus AddrInst must not be added in the matched instructions.
   2606 /// This state can happen when AddrInst is a sext, since it may be moved away.
   2607 /// Therefore, AddrInst may not be valid when MovedAway is true and it must
   2608 /// not be referenced anymore.
   2609 bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
   2610                                                unsigned Depth,
   2611                                                bool *MovedAway) {
   2612   // Avoid exponential behavior on extremely deep expression trees.
   2613   if (Depth >= 5) return false;
   2614 
   2615   // By default, all matched instructions stay in place.
   2616   if (MovedAway)
   2617     *MovedAway = false;
   2618 
   2619   switch (Opcode) {
   2620   case Instruction::PtrToInt:
   2621     // PtrToInt is always a noop, as we know that the int type is pointer sized.
   2622     return MatchAddr(AddrInst->getOperand(0), Depth);
   2623   case Instruction::IntToPtr:
   2624     // This inttoptr is a no-op if the integer type is pointer sized.
   2625     if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
   2626         TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace()))
   2627       return MatchAddr(AddrInst->getOperand(0), Depth);
   2628     return false;
   2629   case Instruction::BitCast:
   2630   case Instruction::AddrSpaceCast:
   2631     // BitCast is always a noop, and we can handle it as long as it is
   2632     // int->int or pointer->pointer (we don't want int<->fp or something).
   2633     if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
   2634          AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
   2635         // Don't touch identity bitcasts.  These were probably put here by LSR,
   2636         // and we don't want to mess around with them.  Assume it knows what it
   2637         // is doing.
   2638         AddrInst->getOperand(0)->getType() != AddrInst->getType())
   2639       return MatchAddr(AddrInst->getOperand(0), Depth);
   2640     return false;
   2641   case Instruction::Add: {
   2642     // Check to see if we can merge in the RHS then the LHS.  If so, we win.
   2643     ExtAddrMode BackupAddrMode = AddrMode;
   2644     unsigned OldSize = AddrModeInsts.size();
   2645     // Start a transaction at this point.
   2646     // The LHS may match but not the RHS.
   2647     // Therefore, we need a higher level restoration point to undo partially
   2648     // matched operation.
   2649     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
   2650         TPT.getRestorationPoint();
   2651 
   2652     if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
   2653         MatchAddr(AddrInst->getOperand(0), Depth+1))
   2654       return true;
   2655 
   2656     // Restore the old addr mode info.
   2657     AddrMode = BackupAddrMode;
   2658     AddrModeInsts.resize(OldSize);
   2659     TPT.rollback(LastKnownGood);
   2660 
   2661     // Otherwise this was over-aggressive.  Try merging in the LHS then the RHS.
   2662     if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
   2663         MatchAddr(AddrInst->getOperand(1), Depth+1))
   2664       return true;
   2665 
   2666     // Otherwise we definitely can't merge the ADD in.
   2667     AddrMode = BackupAddrMode;
   2668     AddrModeInsts.resize(OldSize);
   2669     TPT.rollback(LastKnownGood);
   2670     break;
   2671   }
   2672   //case Instruction::Or:
   2673   // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
   2674   //break;
   2675   case Instruction::Mul:
   2676   case Instruction::Shl: {
   2677     // Can only handle X*C and X << C.
   2678     ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
   2679     if (!RHS)
   2680       return false;
   2681     int64_t Scale = RHS->getSExtValue();
   2682     if (Opcode == Instruction::Shl)
   2683       Scale = 1LL << Scale;
   2684 
   2685     return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
   2686   }
   2687   case Instruction::GetElementPtr: {
   2688     // Scan the GEP.  We check it if it contains constant offsets and at most
   2689     // one variable offset.
   2690     int VariableOperand = -1;
   2691     unsigned VariableScale = 0;
   2692 
   2693     int64_t ConstantOffset = 0;
   2694     const DataLayout *TD = TLI.getDataLayout();
   2695     gep_type_iterator GTI = gep_type_begin(AddrInst);
   2696     for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
   2697       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
   2698         const StructLayout *SL = TD->getStructLayout(STy);
   2699         unsigned Idx =
   2700           cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
   2701         ConstantOffset += SL->getElementOffset(Idx);
   2702       } else {
   2703         uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
   2704         if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
   2705           ConstantOffset += CI->getSExtValue()*TypeSize;
   2706         } else if (TypeSize) {  // Scales of zero don't do anything.
   2707           // We only allow one variable index at the moment.
   2708           if (VariableOperand != -1)
   2709             return false;
   2710 
   2711           // Remember the variable index.
   2712           VariableOperand = i;
   2713           VariableScale = TypeSize;
   2714         }
   2715       }
   2716     }
   2717 
   2718     // A common case is for the GEP to only do a constant offset.  In this case,
   2719     // just add it to the disp field and check validity.
   2720     if (VariableOperand == -1) {
   2721       AddrMode.BaseOffs += ConstantOffset;
   2722       if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
   2723         // Check to see if we can fold the base pointer in too.
   2724         if (MatchAddr(AddrInst->getOperand(0), Depth+1))
   2725           return true;
   2726       }
   2727       AddrMode.BaseOffs -= ConstantOffset;
   2728       return false;
   2729     }
   2730 
   2731     // Save the valid addressing mode in case we can't match.
   2732     ExtAddrMode BackupAddrMode = AddrMode;
   2733     unsigned OldSize = AddrModeInsts.size();
   2734 
   2735     // See if the scale and offset amount is valid for this target.
   2736     AddrMode.BaseOffs += ConstantOffset;
   2737 
   2738     // Match the base operand of the GEP.
   2739     if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
   2740       // If it couldn't be matched, just stuff the value in a register.
   2741       if (AddrMode.HasBaseReg) {
   2742         AddrMode = BackupAddrMode;
   2743         AddrModeInsts.resize(OldSize);
   2744         return false;
   2745       }
   2746       AddrMode.HasBaseReg = true;
   2747       AddrMode.BaseReg = AddrInst->getOperand(0);
   2748     }
   2749 
   2750     // Match the remaining variable portion of the GEP.
   2751     if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
   2752                           Depth)) {
   2753       // If it couldn't be matched, try stuffing the base into a register
   2754       // instead of matching it, and retrying the match of the scale.
   2755       AddrMode = BackupAddrMode;
   2756       AddrModeInsts.resize(OldSize);
   2757       if (AddrMode.HasBaseReg)
   2758         return false;
   2759       AddrMode.HasBaseReg = true;
   2760       AddrMode.BaseReg = AddrInst->getOperand(0);
   2761       AddrMode.BaseOffs += ConstantOffset;
   2762       if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
   2763                             VariableScale, Depth)) {
   2764         // If even that didn't work, bail.
   2765         AddrMode = BackupAddrMode;
   2766         AddrModeInsts.resize(OldSize);
   2767         return false;
   2768       }
   2769     }
   2770 
   2771     return true;
   2772   }
   2773   case Instruction::SExt:
   2774   case Instruction::ZExt: {
   2775     Instruction *Ext = dyn_cast<Instruction>(AddrInst);
   2776     if (!Ext)
   2777       return false;
   2778 
   2779     // Try to move this ext out of the way of the addressing mode.
   2780     // Ask for a method for doing so.
   2781     TypePromotionHelper::Action TPH =
   2782         TypePromotionHelper::getAction(Ext, InsertedTruncs, TLI, PromotedInsts);
   2783     if (!TPH)
   2784       return false;
   2785 
   2786     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
   2787         TPT.getRestorationPoint();
   2788     unsigned CreatedInstsCost = 0;
   2789     unsigned ExtCost = !TLI.isExtFree(Ext);
   2790     Value *PromotedOperand =
   2791         TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
   2792     // SExt has been moved away.
   2793     // Thus either it will be rematched later in the recursive calls or it is
   2794     // gone. Anyway, we must not fold it into the addressing mode at this point.
   2795     // E.g.,
   2796     // op = add opnd, 1
   2797     // idx = ext op
   2798     // addr = gep base, idx
   2799     // is now:
   2800     // promotedOpnd = ext opnd            <- no match here
   2801     // op = promoted_add promotedOpnd, 1  <- match (later in recursive calls)
   2802     // addr = gep base, op                <- match
   2803     if (MovedAway)
   2804       *MovedAway = true;
   2805 
   2806     assert(PromotedOperand &&
   2807            "TypePromotionHelper should have filtered out those cases");
   2808 
   2809     ExtAddrMode BackupAddrMode = AddrMode;
   2810     unsigned OldSize = AddrModeInsts.size();
   2811 
   2812     if (!MatchAddr(PromotedOperand, Depth) ||
   2813         // The total of the new cost is equals to the cost of the created
   2814         // instructions.
   2815         // The total of the old cost is equals to the cost of the extension plus
   2816         // what we have saved in the addressing mode.
   2817         !IsPromotionProfitable(CreatedInstsCost,
   2818                                ExtCost + (AddrModeInsts.size() - OldSize),
   2819                                PromotedOperand)) {
   2820       AddrMode = BackupAddrMode;
   2821       AddrModeInsts.resize(OldSize);
   2822       DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
   2823       TPT.rollback(LastKnownGood);
   2824       return false;
   2825     }
   2826     return true;
   2827   }
   2828   }
   2829   return false;
   2830 }
   2831 
   2832 /// MatchAddr - If we can, try to add the value of 'Addr' into the current
   2833 /// addressing mode.  If Addr can't be added to AddrMode this returns false and
   2834 /// leaves AddrMode unmodified.  This assumes that Addr is either a pointer type
   2835 /// or intptr_t for the target.
   2836 ///
   2837 bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
   2838   // Start a transaction at this point that we will rollback if the matching
   2839   // fails.
   2840   TypePromotionTransaction::ConstRestorationPt LastKnownGood =
   2841       TPT.getRestorationPoint();
   2842   if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
   2843     // Fold in immediates if legal for the target.
   2844     AddrMode.BaseOffs += CI->getSExtValue();
   2845     if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
   2846       return true;
   2847     AddrMode.BaseOffs -= CI->getSExtValue();
   2848   } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
   2849     // If this is a global variable, try to fold it into the addressing mode.
   2850     if (!AddrMode.BaseGV) {
   2851       AddrMode.BaseGV = GV;
   2852       if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
   2853         return true;
   2854       AddrMode.BaseGV = nullptr;
   2855     }
   2856   } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
   2857     ExtAddrMode BackupAddrMode = AddrMode;
   2858     unsigned OldSize = AddrModeInsts.size();
   2859 
   2860     // Check to see if it is possible to fold this operation.
   2861     bool MovedAway = false;
   2862     if (MatchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
   2863       // This instruction may have been move away. If so, there is nothing
   2864       // to check here.
   2865       if (MovedAway)
   2866         return true;
   2867       // Okay, it's possible to fold this.  Check to see if it is actually
   2868       // *profitable* to do so.  We use a simple cost model to avoid increasing
   2869       // register pressure too much.
   2870       if (I->hasOneUse() ||
   2871           IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
   2872         AddrModeInsts.push_back(I);
   2873         return true;
   2874       }
   2875 
   2876       // It isn't profitable to do this, roll back.
   2877       //cerr << "NOT FOLDING: " << *I;
   2878       AddrMode = BackupAddrMode;
   2879       AddrModeInsts.resize(OldSize);
   2880       TPT.rollback(LastKnownGood);
   2881     }
   2882   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
   2883     if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
   2884       return true;
   2885     TPT.rollback(LastKnownGood);
   2886   } else if (isa<ConstantPointerNull>(Addr)) {
   2887     // Null pointer gets folded without affecting the addressing mode.
   2888     return true;
   2889   }
   2890 
   2891   // Worse case, the target should support [reg] addressing modes. :)
   2892   if (!AddrMode.HasBaseReg) {
   2893     AddrMode.HasBaseReg = true;
   2894     AddrMode.BaseReg = Addr;
   2895     // Still check for legality in case the target supports [imm] but not [i+r].
   2896     if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
   2897       return true;
   2898     AddrMode.HasBaseReg = false;
   2899     AddrMode.BaseReg = nullptr;
   2900   }
   2901 
   2902   // If the base register is already taken, see if we can do [r+r].
   2903   if (AddrMode.Scale == 0) {
   2904     AddrMode.Scale = 1;
   2905     AddrMode.ScaledReg = Addr;
   2906     if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
   2907       return true;
   2908     AddrMode.Scale = 0;
   2909     AddrMode.ScaledReg = nullptr;
   2910   }
   2911   // Couldn't match.
   2912   TPT.rollback(LastKnownGood);
   2913   return false;
   2914 }
   2915 
   2916 /// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
   2917 /// inline asm call are due to memory operands.  If so, return true, otherwise
   2918 /// return false.
   2919 static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
   2920                                     const TargetMachine &TM) {
   2921   const Function *F = CI->getParent()->getParent();
   2922   const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering();
   2923   const TargetRegisterInfo *TRI = TM.getSubtargetImpl(*F)->getRegisterInfo();
   2924   TargetLowering::AsmOperandInfoVector TargetConstraints =
   2925       TLI->ParseConstraints(TRI, ImmutableCallSite(CI));
   2926   for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
   2927     TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
   2928 
   2929     // Compute the constraint code and ConstraintType to use.
   2930     TLI->ComputeConstraintToUse(OpInfo, SDValue());
   2931 
   2932     // If this asm operand is our Value*, and if it isn't an indirect memory
   2933     // operand, we can't fold it!
   2934     if (OpInfo.CallOperandVal == OpVal &&
   2935         (OpInfo.ConstraintType != TargetLowering::C_Memory ||
   2936          !OpInfo.isIndirect))
   2937       return false;
   2938   }
   2939 
   2940   return true;
   2941 }
   2942 
   2943 /// FindAllMemoryUses - Recursively walk all the uses of I until we find a
   2944 /// memory use.  If we find an obviously non-foldable instruction, return true.
   2945 /// Add the ultimately found memory instructions to MemoryUses.
   2946 static bool FindAllMemoryUses(
   2947     Instruction *I,
   2948     SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
   2949     SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetMachine &TM) {
   2950   // If we already considered this instruction, we're done.
   2951   if (!ConsideredInsts.insert(I).second)
   2952     return false;
   2953 
   2954   // If this is an obviously unfoldable instruction, bail out.
   2955   if (!MightBeFoldableInst(I))
   2956     return true;
   2957 
   2958   // Loop over all the uses, recursively processing them.
   2959   for (Use &U : I->uses()) {
   2960     Instruction *UserI = cast<Instruction>(U.getUser());
   2961 
   2962     if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
   2963       MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
   2964       continue;
   2965     }
   2966 
   2967     if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
   2968       unsigned opNo = U.getOperandNo();
   2969       if (opNo == 0) return true; // Storing addr, not into addr.
   2970       MemoryUses.push_back(std::make_pair(SI, opNo));
   2971       continue;
   2972     }
   2973 
   2974     if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
   2975       InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
   2976       if (!IA) return true;
   2977 
   2978       // If this is a memory operand, we're cool, otherwise bail out.
   2979       if (!IsOperandAMemoryOperand(CI, IA, I, TM))
   2980         return true;
   2981       continue;
   2982     }
   2983 
   2984     if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TM))
   2985       return true;
   2986   }
   2987 
   2988   return false;
   2989 }
   2990 
   2991 /// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
   2992 /// the use site that we're folding it into.  If so, there is no cost to
   2993 /// include it in the addressing mode.  KnownLive1 and KnownLive2 are two values
   2994 /// that we know are live at the instruction already.
   2995 bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
   2996                                                    Value *KnownLive2) {
   2997   // If Val is either of the known-live values, we know it is live!
   2998   if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
   2999     return true;
   3000 
   3001   // All values other than instructions and arguments (e.g. constants) are live.
   3002   if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
   3003 
   3004   // If Val is a constant sized alloca in the entry block, it is live, this is
   3005   // true because it is just a reference to the stack/frame pointer, which is
   3006   // live for the whole function.
   3007   if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
   3008     if (AI->isStaticAlloca())
   3009       return true;
   3010 
   3011   // Check to see if this value is already used in the memory instruction's
   3012   // block.  If so, it's already live into the block at the very least, so we
   3013   // can reasonably fold it.
   3014   return Val->isUsedInBasicBlock(MemoryInst->getParent());
   3015 }
   3016 
   3017 /// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
   3018 /// mode of the machine to fold the specified instruction into a load or store
   3019 /// that ultimately uses it.  However, the specified instruction has multiple
   3020 /// uses.  Given this, it may actually increase register pressure to fold it
   3021 /// into the load.  For example, consider this code:
   3022 ///
   3023 ///     X = ...
   3024 ///     Y = X+1
   3025 ///     use(Y)   -> nonload/store
   3026 ///     Z = Y+1
   3027 ///     load Z
   3028 ///
   3029 /// In this case, Y has multiple uses, and can be folded into the load of Z
   3030 /// (yielding load [X+2]).  However, doing this will cause both "X" and "X+1" to
   3031 /// be live at the use(Y) line.  If we don't fold Y into load Z, we use one
   3032 /// fewer register.  Since Y can't be folded into "use(Y)" we don't increase the
   3033 /// number of computations either.
   3034 ///
   3035 /// Note that this (like most of CodeGenPrepare) is just a rough heuristic.  If
   3036 /// X was live across 'load Z' for other reasons, we actually *would* want to
   3037 /// fold the addressing mode in the Z case.  This would make Y die earlier.
   3038 bool AddressingModeMatcher::
   3039 IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
   3040                                      ExtAddrMode &AMAfter) {
   3041   if (IgnoreProfitability) return true;
   3042 
   3043   // AMBefore is the addressing mode before this instruction was folded into it,
   3044   // and AMAfter is the addressing mode after the instruction was folded.  Get
   3045   // the set of registers referenced by AMAfter and subtract out those
   3046   // referenced by AMBefore: this is the set of values which folding in this
   3047   // address extends the lifetime of.
   3048   //
   3049   // Note that there are only two potential values being referenced here,
   3050   // BaseReg and ScaleReg (global addresses are always available, as are any
   3051   // folded immediates).
   3052   Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
   3053 
   3054   // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
   3055   // lifetime wasn't extended by adding this instruction.
   3056   if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
   3057     BaseReg = nullptr;
   3058   if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
   3059     ScaledReg = nullptr;
   3060 
   3061   // If folding this instruction (and it's subexprs) didn't extend any live
   3062   // ranges, we're ok with it.
   3063   if (!BaseReg && !ScaledReg)
   3064     return true;
   3065 
   3066   // If all uses of this instruction are ultimately load/store/inlineasm's,
   3067   // check to see if their addressing modes will include this instruction.  If
   3068   // so, we can fold it into all uses, so it doesn't matter if it has multiple
   3069   // uses.
   3070   SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
   3071   SmallPtrSet<Instruction*, 16> ConsideredInsts;
   3072   if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM))
   3073     return false;  // Has a non-memory, non-foldable use!
   3074 
   3075   // Now that we know that all uses of this instruction are part of a chain of
   3076   // computation involving only operations that could theoretically be folded
   3077   // into a memory use, loop over each of these uses and see if they could
   3078   // *actually* fold the instruction.
   3079   SmallVector<Instruction*, 32> MatchedAddrModeInsts;
   3080   for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
   3081     Instruction *User = MemoryUses[i].first;
   3082     unsigned OpNo = MemoryUses[i].second;
   3083 
   3084     // Get the access type of this use.  If the use isn't a pointer, we don't
   3085     // know what it accesses.
   3086     Value *Address = User->getOperand(OpNo);
   3087     if (!Address->getType()->isPointerTy())
   3088       return false;
   3089     Type *AddressAccessTy = Address->getType()->getPointerElementType();
   3090 
   3091     // Do a match against the root of this address, ignoring profitability. This
   3092     // will tell us if the addressing mode for the memory operation will
   3093     // *actually* cover the shared instruction.
   3094     ExtAddrMode Result;
   3095     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
   3096         TPT.getRestorationPoint();
   3097     AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy,
   3098                                   MemoryInst, Result, InsertedTruncs,
   3099                                   PromotedInsts, TPT);
   3100     Matcher.IgnoreProfitability = true;
   3101     bool Success = Matcher.MatchAddr(Address, 0);
   3102     (void)Success; assert(Success && "Couldn't select *anything*?");
   3103 
   3104     // The match was to check the profitability, the changes made are not
   3105     // part of the original matcher. Therefore, they should be dropped
   3106     // otherwise the original matcher will not present the right state.
   3107     TPT.rollback(LastKnownGood);
   3108 
   3109     // If the match didn't cover I, then it won't be shared by it.
   3110     if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
   3111                   I) == MatchedAddrModeInsts.end())
   3112       return false;
   3113 
   3114     MatchedAddrModeInsts.clear();
   3115   }
   3116 
   3117   return true;
   3118 }
   3119 
   3120 } // end anonymous namespace
   3121 
   3122 /// IsNonLocalValue - Return true if the specified values are defined in a
   3123 /// different basic block than BB.
   3124 static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
   3125   if (Instruction *I = dyn_cast<Instruction>(V))
   3126     return I->getParent() != BB;
   3127   return false;
   3128 }
   3129 
   3130 /// OptimizeMemoryInst - Load and Store Instructions often have
   3131 /// addressing modes that can do significant amounts of computation.  As such,
   3132 /// instruction selection will try to get the load or store to do as much
   3133 /// computation as possible for the program.  The problem is that isel can only
   3134 /// see within a single block.  As such, we sink as much legal addressing mode
   3135 /// stuff into the block as possible.
   3136 ///
   3137 /// This method is used to optimize both load/store and inline asms with memory
   3138 /// operands.
   3139 bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
   3140                                         Type *AccessTy) {
   3141   Value *Repl = Addr;
   3142 
   3143   // Try to collapse single-value PHI nodes.  This is necessary to undo
   3144   // unprofitable PRE transformations.
   3145   SmallVector<Value*, 8> worklist;
   3146   SmallPtrSet<Value*, 16> Visited;
   3147   worklist.push_back(Addr);
   3148 
   3149   // Use a worklist to iteratively look through PHI nodes, and ensure that
   3150   // the addressing mode obtained from the non-PHI roots of the graph
   3151   // are equivalent.
   3152   Value *Consensus = nullptr;
   3153   unsigned NumUsesConsensus = 0;
   3154   bool IsNumUsesConsensusValid = false;
   3155   SmallVector<Instruction*, 16> AddrModeInsts;
   3156   ExtAddrMode AddrMode;
   3157   TypePromotionTransaction TPT;
   3158   TypePromotionTransaction::ConstRestorationPt LastKnownGood =
   3159       TPT.getRestorationPoint();
   3160   while (!worklist.empty()) {
   3161     Value *V = worklist.back();
   3162     worklist.pop_back();
   3163 
   3164     // Break use-def graph loops.
   3165     if (!Visited.insert(V).second) {
   3166       Consensus = nullptr;
   3167       break;
   3168     }
   3169 
   3170     // For a PHI node, push all of its incoming values.
   3171     if (PHINode *P = dyn_cast<PHINode>(V)) {
   3172       for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
   3173         worklist.push_back(P->getIncomingValue(i));
   3174       continue;
   3175     }
   3176 
   3177     // For non-PHIs, determine the addressing mode being computed.
   3178     SmallVector<Instruction*, 16> NewAddrModeInsts;
   3179     ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
   3180         V, AccessTy, MemoryInst, NewAddrModeInsts, *TM, InsertedTruncsSet,
   3181         PromotedInsts, TPT);
   3182 
   3183     // This check is broken into two cases with very similar code to avoid using
   3184     // getNumUses() as much as possible. Some values have a lot of uses, so
   3185     // calling getNumUses() unconditionally caused a significant compile-time
   3186     // regression.
   3187     if (!Consensus) {
   3188       Consensus = V;
   3189       AddrMode = NewAddrMode;
   3190       AddrModeInsts = NewAddrModeInsts;
   3191       continue;
   3192     } else if (NewAddrMode == AddrMode) {
   3193       if (!IsNumUsesConsensusValid) {
   3194         NumUsesConsensus = Consensus->getNumUses();
   3195         IsNumUsesConsensusValid = true;
   3196       }
   3197 
   3198       // Ensure that the obtained addressing mode is equivalent to that obtained
   3199       // for all other roots of the PHI traversal.  Also, when choosing one
   3200       // such root as representative, select the one with the most uses in order
   3201       // to keep the cost modeling heuristics in AddressingModeMatcher
   3202       // applicable.
   3203       unsigned NumUses = V->getNumUses();
   3204       if (NumUses > NumUsesConsensus) {
   3205         Consensus = V;
   3206         NumUsesConsensus = NumUses;
   3207         AddrModeInsts = NewAddrModeInsts;
   3208       }
   3209       continue;
   3210     }
   3211 
   3212     Consensus = nullptr;
   3213     break;
   3214   }
   3215 
   3216   // If the addressing mode couldn't be determined, or if multiple different
   3217   // ones were determined, bail out now.
   3218   if (!Consensus) {
   3219     TPT.rollback(LastKnownGood);
   3220     return false;
   3221   }
   3222   TPT.commit();
   3223 
   3224   // Check to see if any of the instructions supersumed by this addr mode are
   3225   // non-local to I's BB.
   3226   bool AnyNonLocal = false;
   3227   for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
   3228     if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
   3229       AnyNonLocal = true;
   3230       break;
   3231     }
   3232   }
   3233 
   3234   // If all the instructions matched are already in this BB, don't do anything.
   3235   if (!AnyNonLocal) {
   3236     DEBUG(dbgs() << "CGP: Found      local addrmode: " << AddrMode << "\n");
   3237     return false;
   3238   }
   3239 
   3240   // Insert this computation right after this user.  Since our caller is
   3241   // scanning from the top of the BB to the bottom, reuse of the expr are
   3242   // guaranteed to happen later.
   3243   IRBuilder<> Builder(MemoryInst);
   3244 
   3245   // Now that we determined the addressing expression we want to use and know
   3246   // that we have to sink it into this block.  Check to see if we have already
   3247   // done this for some other load/store instr in this block.  If so, reuse the
   3248   // computation.
   3249   Value *&SunkAddr = SunkAddrs[Addr];
   3250   if (SunkAddr) {
   3251     DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
   3252                  << *MemoryInst << "\n");
   3253     if (SunkAddr->getType() != Addr->getType())
   3254       SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
   3255   } else if (AddrSinkUsingGEPs ||
   3256              (!AddrSinkUsingGEPs.getNumOccurrences() && TM &&
   3257               TM->getSubtargetImpl(*MemoryInst->getParent()->getParent())
   3258                   ->useAA())) {
   3259     // By default, we use the GEP-based method when AA is used later. This
   3260     // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
   3261     DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
   3262                  << *MemoryInst << "\n");
   3263     Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
   3264     Value *ResultPtr = nullptr, *ResultIndex = nullptr;
   3265 
   3266     // First, find the pointer.
   3267     if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
   3268       ResultPtr = AddrMode.BaseReg;
   3269       AddrMode.BaseReg = nullptr;
   3270     }
   3271 
   3272     if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
   3273       // We can't add more than one pointer together, nor can we scale a
   3274       // pointer (both of which seem meaningless).
   3275       if (ResultPtr || AddrMode.Scale != 1)
   3276         return false;
   3277 
   3278       ResultPtr = AddrMode.ScaledReg;
   3279       AddrMode.Scale = 0;
   3280     }
   3281 
   3282     if (AddrMode.BaseGV) {
   3283       if (ResultPtr)
   3284         return false;
   3285 
   3286       ResultPtr = AddrMode.BaseGV;
   3287     }
   3288 
   3289     // If the real base value actually came from an inttoptr, then the matcher
   3290     // will look through it and provide only the integer value. In that case,
   3291     // use it here.
   3292     if (!ResultPtr && AddrMode.BaseReg) {
   3293       ResultPtr =
   3294         Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr");
   3295       AddrMode.BaseReg = nullptr;
   3296     } else if (!ResultPtr && AddrMode.Scale == 1) {
   3297       ResultPtr =
   3298         Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr");
   3299       AddrMode.Scale = 0;
   3300     }
   3301 
   3302     if (!ResultPtr &&
   3303         !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {
   3304       SunkAddr = Constant::getNullValue(Addr->getType());
   3305     } else if (!ResultPtr) {
   3306       return false;
   3307     } else {
   3308       Type *I8PtrTy =
   3309           Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
   3310       Type *I8Ty = Builder.getInt8Ty();
   3311 
   3312       // Start with the base register. Do this first so that subsequent address
   3313       // matching finds it last, which will prevent it from trying to match it
   3314       // as the scaled value in case it happens to be a mul. That would be
   3315       // problematic if we've sunk a different mul for the scale, because then
   3316       // we'd end up sinking both muls.
   3317       if (AddrMode.BaseReg) {
   3318         Value *V = AddrMode.BaseReg;
   3319         if (V->getType() != IntPtrTy)
   3320           V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
   3321 
   3322         ResultIndex = V;
   3323       }
   3324 
   3325       // Add the scale value.
   3326       if (AddrMode.Scale) {
   3327         Value *V = AddrMode.ScaledReg;
   3328         if (V->getType() == IntPtrTy) {
   3329           // done.
   3330         } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
   3331                    cast<IntegerType>(V->getType())->getBitWidth()) {
   3332           V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
   3333         } else {
   3334           // It is only safe to sign extend the BaseReg if we know that the math
   3335           // required to create it did not overflow before we extend it. Since
   3336           // the original IR value was tossed in favor of a constant back when
   3337           // the AddrMode was created we need to bail out gracefully if widths
   3338           // do not match instead of extending it.
   3339           Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex);
   3340           if (I && (ResultIndex != AddrMode.BaseReg))
   3341             I->eraseFromParent();
   3342           return false;
   3343         }
   3344 
   3345         if (AddrMode.Scale != 1)
   3346           V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
   3347                                 "sunkaddr");
   3348         if (ResultIndex)
   3349           ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
   3350         else
   3351           ResultIndex = V;
   3352       }
   3353 
   3354       // Add in the Base Offset if present.
   3355       if (AddrMode.BaseOffs) {
   3356         Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
   3357         if (ResultIndex) {
   3358           // We need to add this separately from the scale above to help with
   3359           // SDAG consecutive load/store merging.
   3360           if (ResultPtr->getType() != I8PtrTy)
   3361             ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
   3362           ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
   3363         }
   3364 
   3365         ResultIndex = V;
   3366       }
   3367 
   3368       if (!ResultIndex) {
   3369         SunkAddr = ResultPtr;
   3370       } else {
   3371         if (ResultPtr->getType() != I8PtrTy)
   3372           ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
   3373         SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
   3374       }
   3375 
   3376       if (SunkAddr->getType() != Addr->getType())
   3377         SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
   3378     }
   3379   } else {
   3380     DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
   3381                  << *MemoryInst << "\n");
   3382     Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
   3383     Value *Result = nullptr;
   3384 
   3385     // Start with the base register. Do this first so that subsequent address
   3386     // matching finds it last, which will prevent it from trying to match it
   3387     // as the scaled value in case it happens to be a mul. That would be
   3388     // problematic if we've sunk a different mul for the scale, because then
   3389     // we'd end up sinking both muls.
   3390     if (AddrMode.BaseReg) {
   3391       Value *V = AddrMode.BaseReg;
   3392       if (V->getType()->isPointerTy())
   3393         V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
   3394       if (V->getType() != IntPtrTy)
   3395         V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
   3396       Result = V;
   3397     }
   3398 
   3399     // Add the scale value.
   3400     if (AddrMode.Scale) {
   3401       Value *V = AddrMode.ScaledReg;
   3402       if (V->getType() == IntPtrTy) {
   3403         // done.
   3404       } else if (V->getType()->isPointerTy()) {
   3405         V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
   3406       } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
   3407                  cast<IntegerType>(V->getType())->getBitWidth()) {
   3408         V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
   3409       } else {
   3410         // It is only safe to sign extend the BaseReg if we know that the math
   3411         // required to create it did not overflow before we extend it. Since
   3412         // the original IR value was tossed in favor of a constant back when
   3413         // the AddrMode was created we need to bail out gracefully if widths
   3414         // do not match instead of extending it.
   3415         Instruction *I = dyn_cast_or_null<Instruction>(Result);
   3416         if (I && (Result != AddrMode.BaseReg))
   3417           I->eraseFromParent();
   3418         return false;
   3419       }
   3420       if (AddrMode.Scale != 1)
   3421         V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
   3422                               "sunkaddr");
   3423       if (Result)
   3424         Result = Builder.CreateAdd(Result, V, "sunkaddr");
   3425       else
   3426         Result = V;
   3427     }
   3428 
   3429     // Add in the BaseGV if present.
   3430     if (AddrMode.BaseGV) {
   3431       Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
   3432       if (Result)
   3433         Result = Builder.CreateAdd(Result, V, "sunkaddr");
   3434       else
   3435         Result = V;
   3436     }
   3437 
   3438     // Add in the Base Offset if present.
   3439     if (AddrMode.BaseOffs) {
   3440       Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
   3441       if (Result)
   3442         Result = Builder.CreateAdd(Result, V, "sunkaddr");
   3443       else
   3444         Result = V;
   3445     }
   3446 
   3447     if (!Result)
   3448       SunkAddr = Constant::getNullValue(Addr->getType());
   3449     else
   3450       SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
   3451   }
   3452 
   3453   MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
   3454 
   3455   // If we have no uses, recursively delete the value and all dead instructions
   3456   // using it.
   3457   if (Repl->use_empty()) {
   3458     // This can cause recursive deletion, which can invalidate our iterator.
   3459     // Use a WeakVH to hold onto it in case this happens.
   3460     WeakVH IterHandle(CurInstIterator);
   3461     BasicBlock *BB = CurInstIterator->getParent();
   3462 
   3463     RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
   3464 
   3465     if (IterHandle != CurInstIterator) {
   3466       // If the iterator instruction was recursively deleted, start over at the
   3467       // start of the block.
   3468       CurInstIterator = BB->begin();
   3469       SunkAddrs.clear();
   3470     }
   3471   }
   3472   ++NumMemoryInsts;
   3473   return true;
   3474 }
   3475 
   3476 /// OptimizeInlineAsmInst - If there are any memory operands, use
   3477 /// OptimizeMemoryInst to sink their address computing into the block when
   3478 /// possible / profitable.
   3479 bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
   3480   bool MadeChange = false;
   3481 
   3482   const TargetRegisterInfo *TRI =
   3483       TM->getSubtargetImpl(*CS->getParent()->getParent())->getRegisterInfo();
   3484   TargetLowering::AsmOperandInfoVector
   3485     TargetConstraints = TLI->ParseConstraints(TRI, CS);
   3486   unsigned ArgNo = 0;
   3487   for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
   3488     TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
   3489 
   3490     // Compute the constraint code and ConstraintType to use.
   3491     TLI->ComputeConstraintToUse(OpInfo, SDValue());
   3492 
   3493     if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
   3494         OpInfo.isIndirect) {
   3495       Value *OpVal = CS->getArgOperand(ArgNo++);
   3496       MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType());
   3497     } else if (OpInfo.Type == InlineAsm::isInput)
   3498       ArgNo++;
   3499   }
   3500 
   3501   return MadeChange;
   3502 }
   3503 
   3504 /// \brief Check if all the uses of \p Inst are equivalent (or free) zero or
   3505 /// sign extensions.
   3506 static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
   3507   assert(!Inst->use_empty() && "Input must have at least one use");
   3508   const Instruction *FirstUser = cast<Instruction>(*Inst->user_begin());
   3509   bool IsSExt = isa<SExtInst>(FirstUser);
   3510   Type *ExtTy = FirstUser->getType();
   3511   for (const User *U : Inst->users()) {
   3512     const Instruction *UI = cast<Instruction>(U);
   3513     if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
   3514       return false;
   3515     Type *CurTy = UI->getType();
   3516     // Same input and output types: Same instruction after CSE.
   3517     if (CurTy == ExtTy)
   3518       continue;
   3519 
   3520     // If IsSExt is true, we are in this situation:
   3521     // a = Inst
   3522     // b = sext ty1 a to ty2
   3523     // c = sext ty1 a to ty3
   3524     // Assuming ty2 is shorter than ty3, this could be turned into:
   3525     // a = Inst
   3526     // b = sext ty1 a to ty2
   3527     // c = sext ty2 b to ty3
   3528     // However, the last sext is not free.
   3529     if (IsSExt)
   3530       return false;
   3531 
   3532     // This is a ZExt, maybe this is free to extend from one type to another.
   3533     // In that case, we would not account for a different use.
   3534     Type *NarrowTy;
   3535     Type *LargeTy;
   3536     if (ExtTy->getScalarType()->getIntegerBitWidth() >
   3537         CurTy->getScalarType()->getIntegerBitWidth()) {
   3538       NarrowTy = CurTy;
   3539       LargeTy = ExtTy;
   3540     } else {
   3541       NarrowTy = ExtTy;
   3542       LargeTy = CurTy;
   3543     }
   3544 
   3545     if (!TLI.isZExtFree(NarrowTy, LargeTy))
   3546       return false;
   3547   }
   3548   // All uses are the same or can be derived from one another for free.
   3549   return true;
   3550 }
   3551 
   3552 /// \brief Try to form ExtLd by promoting \p Exts until they reach a
   3553 /// load instruction.
   3554 /// If an ext(load) can be formed, it is returned via \p LI for the load
   3555 /// and \p Inst for the extension.
   3556 /// Otherwise LI == nullptr and Inst == nullptr.
   3557 /// When some promotion happened, \p TPT contains the proper state to
   3558 /// revert them.
   3559 ///
   3560 /// \return true when promoting was necessary to expose the ext(load)
   3561 /// opportunity, false otherwise.
   3562 ///
   3563 /// Example:
   3564 /// \code
   3565 /// %ld = load i32* %addr
   3566 /// %add = add nuw i32 %ld, 4
   3567 /// %zext = zext i32 %add to i64
   3568 /// \endcode
   3569 /// =>
   3570 /// \code
   3571 /// %ld = load i32* %addr
   3572 /// %zext = zext i32 %ld to i64
   3573 /// %add = add nuw i64 %zext, 4
   3574 /// \encode
   3575 /// Thanks to the promotion, we can match zext(load i32*) to i64.
   3576 bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
   3577                                     LoadInst *&LI, Instruction *&Inst,
   3578                                     const SmallVectorImpl<Instruction *> &Exts,
   3579                                     unsigned CreatedInstsCost = 0) {
   3580   // Iterate over all the extensions to see if one form an ext(load).
   3581   for (auto I : Exts) {
   3582     // Check if we directly have ext(load).
   3583     if ((LI = dyn_cast<LoadInst>(I->getOperand(0)))) {
   3584       Inst = I;
   3585       // No promotion happened here.
   3586       return false;
   3587     }
   3588     // Check whether or not we want to do any promotion.
   3589     if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)
   3590       continue;
   3591     // Get the action to perform the promotion.
   3592     TypePromotionHelper::Action TPH = TypePromotionHelper::getAction(
   3593         I, InsertedTruncsSet, *TLI, PromotedInsts);
   3594     // Check if we can promote.
   3595     if (!TPH)
   3596       continue;
   3597     // Save the current state.
   3598     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
   3599         TPT.getRestorationPoint();
   3600     SmallVector<Instruction *, 4> NewExts;
   3601     unsigned NewCreatedInstsCost = 0;
   3602     unsigned ExtCost = !TLI->isExtFree(I);
   3603     // Promote.
   3604     Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
   3605                              &NewExts, nullptr, *TLI);
   3606     assert(PromotedVal &&
   3607            "TypePromotionHelper should have filtered out those cases");
   3608 
   3609     // We would be able to merge only one extension in a load.
   3610     // Therefore, if we have more than 1 new extension we heuristically
   3611     // cut this search path, because it means we degrade the code quality.
   3612     // With exactly 2, the transformation is neutral, because we will merge
   3613     // one extension but leave one. However, we optimistically keep going,
   3614     // because the new extension may be removed too.
   3615     long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
   3616     TotalCreatedInstsCost -= ExtCost;
   3617     if (!StressExtLdPromotion &&
   3618         (TotalCreatedInstsCost > 1 ||
   3619          !isPromotedInstructionLegal(*TLI, PromotedVal))) {
   3620       // The promotion is not profitable, rollback to the previous state.
   3621       TPT.rollback(LastKnownGood);
   3622       continue;
   3623     }
   3624     // The promotion is profitable.
   3625     // Check if it exposes an ext(load).
   3626     (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost);
   3627     if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
   3628                // If we have created a new extension, i.e., now we have two
   3629                // extensions. We must make sure one of them is merged with
   3630                // the load, otherwise we may degrade the code quality.
   3631                (LI->hasOneUse() || hasSameExtUse(LI, *TLI))))
   3632       // Promotion happened.
   3633       return true;
   3634     // If this does not help to expose an ext(load) then, rollback.
   3635     TPT.rollback(LastKnownGood);
   3636   }
   3637   // None of the extension can form an ext(load).
   3638   LI = nullptr;
   3639   Inst = nullptr;
   3640   return false;
   3641 }
   3642 
   3643 /// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
   3644 /// basic block as the load, unless conditions are unfavorable. This allows
   3645 /// SelectionDAG to fold the extend into the load.
   3646 /// \p I[in/out] the extension may be modified during the process if some
   3647 /// promotions apply.
   3648 ///
   3649 bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) {
   3650   // Try to promote a chain of computation if it allows to form
   3651   // an extended load.
   3652   TypePromotionTransaction TPT;
   3653   TypePromotionTransaction::ConstRestorationPt LastKnownGood =
   3654     TPT.getRestorationPoint();
   3655   SmallVector<Instruction *, 1> Exts;
   3656   Exts.push_back(I);
   3657   // Look for a load being extended.
   3658   LoadInst *LI = nullptr;
   3659   Instruction *OldExt = I;
   3660   bool HasPromoted = ExtLdPromotion(TPT, LI, I, Exts);
   3661   if (!LI || !I) {
   3662     assert(!HasPromoted && !LI && "If we did not match any load instruction "
   3663                                   "the code must remain the same");
   3664     I = OldExt;
   3665     return false;
   3666   }
   3667 
   3668   // If they're already in the same block, there's nothing to do.
   3669   // Make the cheap checks first if we did not promote.
   3670   // If we promoted, we need to check if it is indeed profitable.
   3671   if (!HasPromoted && LI->getParent() == I->getParent())
   3672     return false;
   3673 
   3674   EVT VT = TLI->getValueType(I->getType());
   3675   EVT LoadVT = TLI->getValueType(LI->getType());
   3676 
   3677   // If the load has other users and the truncate is not free, this probably
   3678   // isn't worthwhile.
   3679   if (!LI->hasOneUse() && TLI &&
   3680       (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) &&
   3681       !TLI->isTruncateFree(I->getType(), LI->getType())) {
   3682     I = OldExt;
   3683     TPT.rollback(LastKnownGood);
   3684     return false;
   3685   }
   3686 
   3687   // Check whether the target supports casts folded into loads.
   3688   unsigned LType;
   3689   if (isa<ZExtInst>(I))
   3690     LType = ISD::ZEXTLOAD;
   3691   else {
   3692     assert(isa<SExtInst>(I) && "Unexpected ext type!");
   3693     LType = ISD::SEXTLOAD;
   3694   }
   3695   if (TLI && !TLI->isLoadExtLegal(LType, VT, LoadVT)) {
   3696     I = OldExt;
   3697     TPT.rollback(LastKnownGood);
   3698     return false;
   3699   }
   3700 
   3701   // Move the extend into the same block as the load, so that SelectionDAG
   3702   // can fold it.
   3703   TPT.commit();
   3704   I->removeFromParent();
   3705   I->insertAfter(LI);
   3706   ++NumExtsMoved;
   3707   return true;
   3708 }
   3709 
   3710 bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
   3711   BasicBlock *DefBB = I->getParent();
   3712 
   3713   // If the result of a {s|z}ext and its source are both live out, rewrite all
   3714   // other uses of the source with result of extension.
   3715   Value *Src = I->getOperand(0);
   3716   if (Src->hasOneUse())
   3717     return false;
   3718 
   3719   // Only do this xform if truncating is free.
   3720   if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
   3721     return false;
   3722 
   3723   // Only safe to perform the optimization if the source is also defined in
   3724   // this block.
   3725   if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
   3726     return false;
   3727 
   3728   bool DefIsLiveOut = false;
   3729   for (User *U : I->users()) {
   3730     Instruction *UI = cast<Instruction>(U);
   3731 
   3732     // Figure out which BB this ext is used in.
   3733     BasicBlock *UserBB = UI->getParent();
   3734     if (UserBB == DefBB) continue;
   3735     DefIsLiveOut = true;
   3736     break;
   3737   }
   3738   if (!DefIsLiveOut)
   3739     return false;
   3740 
   3741   // Make sure none of the uses are PHI nodes.
   3742   for (User *U : Src->users()) {
   3743     Instruction *UI = cast<Instruction>(U);
   3744     BasicBlock *UserBB = UI->getParent();
   3745     if (UserBB == DefBB) continue;
   3746     // Be conservative. We don't want this xform to end up introducing
   3747     // reloads just before load / store instructions.
   3748     if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
   3749       return false;
   3750   }
   3751 
   3752   // InsertedTruncs - Only insert one trunc in each block once.
   3753   DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
   3754 
   3755   bool MadeChange = false;
   3756   for (Use &U : Src->uses()) {
   3757     Instruction *User = cast<Instruction>(U.getUser());
   3758 
   3759     // Figure out which BB this ext is used in.
   3760     BasicBlock *UserBB = User->getParent();
   3761     if (UserBB == DefBB) continue;
   3762 
   3763     // Both src and def are live in this block. Rewrite the use.
   3764     Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
   3765 
   3766     if (!InsertedTrunc) {
   3767       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
   3768       InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
   3769       InsertedTruncsSet.insert(InsertedTrunc);
   3770     }
   3771 
   3772     // Replace a use of the {s|z}ext source with a use of the result.
   3773     U = InsertedTrunc;
   3774     ++NumExtUses;
   3775     MadeChange = true;
   3776   }
   3777 
   3778   return MadeChange;
   3779 }
   3780 
   3781 /// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be
   3782 /// turned into an explicit branch.
   3783 static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
   3784   // FIXME: This should use the same heuristics as IfConversion to determine
   3785   // whether a select is better represented as a branch.  This requires that
   3786   // branch probability metadata is preserved for the select, which is not the
   3787   // case currently.
   3788 
   3789   CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
   3790 
   3791   // If the branch is predicted right, an out of order CPU can avoid blocking on
   3792   // the compare.  Emit cmovs on compares with a memory operand as branches to
   3793   // avoid stalls on the load from memory.  If the compare has more than one use
   3794   // there's probably another cmov or setcc around so it's not worth emitting a
   3795   // branch.
   3796   if (!Cmp)
   3797     return false;
   3798 
   3799   Value *CmpOp0 = Cmp->getOperand(0);
   3800   Value *CmpOp1 = Cmp->getOperand(1);
   3801 
   3802   // We check that the memory operand has one use to avoid uses of the loaded
   3803   // value directly after the compare, making branches unprofitable.
   3804   return Cmp->hasOneUse() &&
   3805          ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
   3806           (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()));
   3807 }
   3808 
   3809 
   3810 /// If we have a SelectInst that will likely profit from branch prediction,
   3811 /// turn it into a branch.
   3812 bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
   3813   bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
   3814 
   3815   // Can we convert the 'select' to CF ?
   3816   if (DisableSelectToBranch || OptSize || !TLI || VectorCond)
   3817     return false;
   3818 
   3819   TargetLowering::SelectSupportKind SelectKind;
   3820   if (VectorCond)
   3821     SelectKind = TargetLowering::VectorMaskSelect;
   3822   else if (SI->getType()->isVectorTy())
   3823     SelectKind = TargetLowering::ScalarCondVectorVal;
   3824   else
   3825     SelectKind = TargetLowering::ScalarValSelect;
   3826 
   3827   // Do we have efficient codegen support for this kind of 'selects' ?
   3828   if (TLI->isSelectSupported(SelectKind)) {
   3829     // We have efficient codegen support for the select instruction.
   3830     // Check if it is profitable to keep this 'select'.
   3831     if (!TLI->isPredictableSelectExpensive() ||
   3832         !isFormingBranchFromSelectProfitable(SI))
   3833       return false;
   3834   }
   3835 
   3836   ModifiedDT = true;
   3837 
   3838   // First, we split the block containing the select into 2 blocks.
   3839   BasicBlock *StartBlock = SI->getParent();
   3840   BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI));
   3841   BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
   3842 
   3843   // Create a new block serving as the landing pad for the branch.
   3844   BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid",
   3845                                              NextBlock->getParent(), NextBlock);
   3846 
   3847   // Move the unconditional branch from the block with the select in it into our
   3848   // landing pad block.
   3849   StartBlock->getTerminator()->eraseFromParent();
   3850   BranchInst::Create(NextBlock, SmallBlock);
   3851 
   3852   // Insert the real conditional branch based on the original condition.
   3853   BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI);
   3854 
   3855   // The select itself is replaced with a PHI Node.
   3856   PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin());
   3857   PN->takeName(SI);
   3858   PN->addIncoming(SI->getTrueValue(), StartBlock);
   3859   PN->addIncoming(SI->getFalseValue(), SmallBlock);
   3860   SI->replaceAllUsesWith(PN);
   3861   SI->eraseFromParent();
   3862 
   3863   // Instruct OptimizeBlock to skip to the next block.
   3864   CurInstIterator = StartBlock->end();
   3865   ++NumSelectsExpanded;
   3866   return true;
   3867 }
   3868 
   3869 static bool isBroadcastShuffle(ShuffleVectorInst *SVI) {
   3870   SmallVector<int, 16> Mask(SVI->getShuffleMask());
   3871   int SplatElem = -1;
   3872   for (unsigned i = 0; i < Mask.size(); ++i) {
   3873     if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem)
   3874       return false;
   3875     SplatElem = Mask[i];
   3876   }
   3877 
   3878   return true;
   3879 }
   3880 
   3881 /// Some targets have expensive vector shifts if the lanes aren't all the same
   3882 /// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases
   3883 /// it's often worth sinking a shufflevector splat down to its use so that
   3884 /// codegen can spot all lanes are identical.
   3885 bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
   3886   BasicBlock *DefBB = SVI->getParent();
   3887 
   3888   // Only do this xform if variable vector shifts are particularly expensive.
   3889   if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType()))
   3890     return false;
   3891 
   3892   // We only expect better codegen by sinking a shuffle if we can recognise a
   3893   // constant splat.
   3894   if (!isBroadcastShuffle(SVI))
   3895     return false;
   3896 
   3897   // InsertedShuffles - Only insert a shuffle in each block once.
   3898   DenseMap<BasicBlock*, Instruction*> InsertedShuffles;
   3899 
   3900   bool MadeChange = false;
   3901   for (User *U : SVI->users()) {
   3902     Instruction *UI = cast<Instruction>(U);
   3903 
   3904     // Figure out which BB this ext is used in.
   3905     BasicBlock *UserBB = UI->getParent();
   3906     if (UserBB == DefBB) continue;
   3907 
   3908     // For now only apply this when the splat is used by a shift instruction.
   3909     if (!UI->isShift()) continue;
   3910 
   3911     // Everything checks out, sink the shuffle if the user's block doesn't
   3912     // already have a copy.
   3913     Instruction *&InsertedShuffle = InsertedShuffles[UserBB];
   3914 
   3915     if (!InsertedShuffle) {
   3916       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
   3917       InsertedShuffle = new ShuffleVectorInst(SVI->getOperand(0),
   3918                                               SVI->getOperand(1),
   3919                                               SVI->getOperand(2), "", InsertPt);
   3920     }
   3921 
   3922     UI->replaceUsesOfWith(SVI, InsertedShuffle);
   3923     MadeChange = true;
   3924   }
   3925 
   3926   // If we removed all uses, nuke the shuffle.
   3927   if (SVI->use_empty()) {
   3928     SVI->eraseFromParent();
   3929     MadeChange = true;
   3930   }
   3931 
   3932   return MadeChange;
   3933 }
   3934 
   3935 namespace {
   3936 /// \brief Helper class to promote a scalar operation to a vector one.
   3937 /// This class is used to move downward extractelement transition.
   3938 /// E.g.,
   3939 /// a = vector_op <2 x i32>
   3940 /// b = extractelement <2 x i32> a, i32 0
   3941 /// c = scalar_op b
   3942 /// store c
   3943 ///
   3944 /// =>
   3945 /// a = vector_op <2 x i32>
   3946 /// c = vector_op a (equivalent to scalar_op on the related lane)
   3947 /// * d = extractelement <2 x i32> c, i32 0
   3948 /// * store d
   3949 /// Assuming both extractelement and store can be combine, we get rid of the
   3950 /// transition.
   3951 class VectorPromoteHelper {
   3952   /// Used to perform some checks on the legality of vector operations.
   3953   const TargetLowering &TLI;
   3954 
   3955   /// Used to estimated the cost of the promoted chain.
   3956   const TargetTransformInfo &TTI;
   3957 
   3958   /// The transition being moved downwards.
   3959   Instruction *Transition;
   3960   /// The sequence of instructions to be promoted.
   3961   SmallVector<Instruction *, 4> InstsToBePromoted;
   3962   /// Cost of combining a store and an extract.
   3963   unsigned StoreExtractCombineCost;
   3964   /// Instruction that will be combined with the transition.
   3965   Instruction *CombineInst;
   3966 
   3967   /// \brief The instruction that represents the current end of the transition.
   3968   /// Since we are faking the promotion until we reach the end of the chain
   3969   /// of computation, we need a way to get the current end of the transition.
   3970   Instruction *getEndOfTransition() const {
   3971     if (InstsToBePromoted.empty())
   3972       return Transition;
   3973     return InstsToBePromoted.back();
   3974   }
   3975 
   3976   /// \brief Return the index of the original value in the transition.
   3977   /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
   3978   /// c, is at index 0.
   3979   unsigned getTransitionOriginalValueIdx() const {
   3980     assert(isa<ExtractElementInst>(Transition) &&
   3981            "Other kind of transitions are not supported yet");
   3982     return 0;
   3983   }
   3984 
   3985   /// \brief Return the index of the index in the transition.
   3986   /// E.g., for "extractelement <2 x i32> c, i32 0" the index
   3987   /// is at index 1.
   3988   unsigned getTransitionIdx() const {
   3989     assert(isa<ExtractElementInst>(Transition) &&
   3990            "Other kind of transitions are not supported yet");
   3991     return 1;
   3992   }
   3993 
   3994   /// \brief Get the type of the transition.
   3995   /// This is the type of the original value.
   3996   /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
   3997   /// transition is <2 x i32>.
   3998   Type *getTransitionType() const {
   3999     return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
   4000   }
   4001 
   4002   /// \brief Promote \p ToBePromoted by moving \p Def downward through.
   4003   /// I.e., we have the following sequence:
   4004   /// Def = Transition <ty1> a to <ty2>
   4005   /// b = ToBePromoted <ty2> Def, ...
   4006   /// =>
   4007   /// b = ToBePromoted <ty1> a, ...
   4008   /// Def = Transition <ty1> ToBePromoted to <ty2>
   4009   void promoteImpl(Instruction *ToBePromoted);
   4010 
   4011   /// \brief Check whether or not it is profitable to promote all the
   4012   /// instructions enqueued to be promoted.
   4013   bool isProfitableToPromote() {
   4014     Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
   4015     unsigned Index = isa<ConstantInt>(ValIdx)
   4016                          ? cast<ConstantInt>(ValIdx)->getZExtValue()
   4017                          : -1;
   4018     Type *PromotedType = getTransitionType();
   4019 
   4020     StoreInst *ST = cast<StoreInst>(CombineInst);
   4021     unsigned AS = ST->getPointerAddressSpace();
   4022     unsigned Align = ST->getAlignment();
   4023     // Check if this store is supported.
   4024     if (!TLI.allowsMisalignedMemoryAccesses(
   4025             TLI.getValueType(ST->getValueOperand()->getType()), AS, Align)) {
   4026       // If this is not supported, there is no way we can combine
   4027       // the extract with the store.
   4028       return false;
   4029     }
   4030 
   4031     // The scalar chain of computation has to pay for the transition
   4032     // scalar to vector.
   4033     // The vector chain has to account for the combining cost.
   4034     uint64_t ScalarCost =
   4035         TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index);
   4036     uint64_t VectorCost = StoreExtractCombineCost;
   4037     for (const auto &Inst : InstsToBePromoted) {
   4038       // Compute the cost.
   4039       // By construction, all instructions being promoted are arithmetic ones.
   4040       // Moreover, one argument is a constant that can be viewed as a splat
   4041       // constant.
   4042       Value *Arg0 = Inst->getOperand(0);
   4043       bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
   4044                             isa<ConstantFP>(Arg0);
   4045       TargetTransformInfo::OperandValueKind Arg0OVK =
   4046           IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
   4047                          : TargetTransformInfo::OK_AnyValue;
   4048       TargetTransformInfo::OperandValueKind Arg1OVK =
   4049           !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
   4050                           : TargetTransformInfo::OK_AnyValue;
   4051       ScalarCost += TTI.getArithmeticInstrCost(
   4052           Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK);
   4053       VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
   4054                                                Arg0OVK, Arg1OVK);
   4055     }
   4056     DEBUG(dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
   4057                  << ScalarCost << "\nVector: " << VectorCost << '\n');
   4058     return ScalarCost > VectorCost;
   4059   }
   4060 
   4061   /// \brief Generate a constant vector with \p Val with the same
   4062   /// number of elements as the transition.
   4063   /// \p UseSplat defines whether or not \p Val should be replicated
   4064   /// accross the whole vector.
   4065   /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
   4066   /// otherwise we generate a vector with as many undef as possible:
   4067   /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
   4068   /// used at the index of the extract.
   4069   Value *getConstantVector(Constant *Val, bool UseSplat) const {
   4070     unsigned ExtractIdx = UINT_MAX;
   4071     if (!UseSplat) {
   4072       // If we cannot determine where the constant must be, we have to
   4073       // use a splat constant.
   4074       Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
   4075       if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
   4076         ExtractIdx = CstVal->getSExtValue();
   4077       else
   4078         UseSplat = true;
   4079     }
   4080 
   4081     unsigned End = getTransitionType()->getVectorNumElements();
   4082     if (UseSplat)
   4083       return ConstantVector::getSplat(End, Val);
   4084 
   4085     SmallVector<Constant *, 4> ConstVec;
   4086     UndefValue *UndefVal = UndefValue::get(Val->getType());
   4087     for (unsigned Idx = 0; Idx != End; ++Idx) {
   4088       if (Idx == ExtractIdx)
   4089         ConstVec.push_back(Val);
   4090       else
   4091         ConstVec.push_back(UndefVal);
   4092     }
   4093     return ConstantVector::get(ConstVec);
   4094   }
   4095 
   4096   /// \brief Check if promoting to a vector type an operand at \p OperandIdx
   4097   /// in \p Use can trigger undefined behavior.
   4098   static bool canCauseUndefinedBehavior(const Instruction *Use,
   4099                                         unsigned OperandIdx) {
   4100     // This is not safe to introduce undef when the operand is on
   4101     // the right hand side of a division-like instruction.
   4102     if (OperandIdx != 1)
   4103       return false;
   4104     switch (Use->getOpcode()) {
   4105     default:
   4106       return false;
   4107     case Instruction::SDiv:
   4108     case Instruction::UDiv:
   4109     case Instruction::SRem:
   4110     case Instruction::URem:
   4111       return true;
   4112     case Instruction::FDiv:
   4113     case Instruction::FRem:
   4114       return !Use->hasNoNaNs();
   4115     }
   4116     llvm_unreachable(nullptr);
   4117   }
   4118 
   4119 public:
   4120   VectorPromoteHelper(const TargetLowering &TLI, const TargetTransformInfo &TTI,
   4121                       Instruction *Transition, unsigned CombineCost)
   4122       : TLI(TLI), TTI(TTI), Transition(Transition),
   4123         StoreExtractCombineCost(CombineCost), CombineInst(nullptr) {
   4124     assert(Transition && "Do not know how to promote null");
   4125   }
   4126 
   4127   /// \brief Check if we can promote \p ToBePromoted to \p Type.
   4128   bool canPromote(const Instruction *ToBePromoted) const {
   4129     // We could support CastInst too.
   4130     return isa<BinaryOperator>(ToBePromoted);
   4131   }
   4132 
   4133   /// \brief Check if it is profitable to promote \p ToBePromoted
   4134   /// by moving downward the transition through.
   4135   bool shouldPromote(const Instruction *ToBePromoted) const {
   4136     // Promote only if all the operands can be statically expanded.
   4137     // Indeed, we do not want to introduce any new kind of transitions.
   4138     for (const Use &U : ToBePromoted->operands()) {
   4139       const Value *Val = U.get();
   4140       if (Val == getEndOfTransition()) {
   4141         // If the use is a division and the transition is on the rhs,
   4142         // we cannot promote the operation, otherwise we may create a
   4143         // division by zero.
   4144         if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
   4145           return false;
   4146         continue;
   4147       }
   4148       if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
   4149           !isa<ConstantFP>(Val))
   4150         return false;
   4151     }
   4152     // Check that the resulting operation is legal.
   4153     int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
   4154     if (!ISDOpcode)
   4155       return false;
   4156     return StressStoreExtract ||
   4157            TLI.isOperationLegalOrCustom(
   4158                ISDOpcode, TLI.getValueType(getTransitionType(), true));
   4159   }
   4160 
   4161   /// \brief Check whether or not \p Use can be combined
   4162   /// with the transition.
   4163   /// I.e., is it possible to do Use(Transition) => AnotherUse?
   4164   bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
   4165 
   4166   /// \brief Record \p ToBePromoted as part of the chain to be promoted.
   4167   void enqueueForPromotion(Instruction *ToBePromoted) {
   4168     InstsToBePromoted.push_back(ToBePromoted);
   4169   }
   4170 
   4171   /// \brief Set the instruction that will be combined with the transition.
   4172   void recordCombineInstruction(Instruction *ToBeCombined) {
   4173     assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
   4174     CombineInst = ToBeCombined;
   4175   }
   4176 
   4177   /// \brief Promote all the instructions enqueued for promotion if it is
   4178   /// is profitable.
   4179   /// \return True if the promotion happened, false otherwise.
   4180   bool promote() {
   4181     // Check if there is something to promote.
   4182     // Right now, if we do not have anything to combine with,
   4183     // we assume the promotion is not profitable.
   4184     if (InstsToBePromoted.empty() || !CombineInst)
   4185       return false;
   4186 
   4187     // Check cost.
   4188     if (!StressStoreExtract && !isProfitableToPromote())
   4189       return false;
   4190 
   4191     // Promote.
   4192     for (auto &ToBePromoted : InstsToBePromoted)
   4193       promoteImpl(ToBePromoted);
   4194     InstsToBePromoted.clear();
   4195     return true;
   4196   }
   4197 };
   4198 } // End of anonymous namespace.
   4199 
   4200 void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
   4201   // At this point, we know that all the operands of ToBePromoted but Def
   4202   // can be statically promoted.
   4203   // For Def, we need to use its parameter in ToBePromoted:
   4204   // b = ToBePromoted ty1 a
   4205   // Def = Transition ty1 b to ty2
   4206   // Move the transition down.
   4207   // 1. Replace all uses of the promoted operation by the transition.
   4208   // = ... b => = ... Def.
   4209   assert(ToBePromoted->getType() == Transition->getType() &&
   4210          "The type of the result of the transition does not match "
   4211          "the final type");
   4212   ToBePromoted->replaceAllUsesWith(Transition);
   4213   // 2. Update the type of the uses.
   4214   // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
   4215   Type *TransitionTy = getTransitionType();
   4216   ToBePromoted->mutateType(TransitionTy);
   4217   // 3. Update all the operands of the promoted operation with promoted
   4218   // operands.
   4219   // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
   4220   for (Use &U : ToBePromoted->operands()) {
   4221     Value *Val = U.get();
   4222     Value *NewVal = nullptr;
   4223     if (Val == Transition)
   4224       NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
   4225     else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
   4226              isa<ConstantFP>(Val)) {
   4227       // Use a splat constant if it is not safe to use undef.
   4228       NewVal = getConstantVector(
   4229           cast<Constant>(Val),
   4230           isa<UndefValue>(Val) ||
   4231               canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
   4232     } else
   4233       llvm_unreachable("Did you modified shouldPromote and forgot to update "
   4234                        "this?");
   4235     ToBePromoted->setOperand(U.getOperandNo(), NewVal);
   4236   }
   4237   Transition->removeFromParent();
   4238   Transition->insertAfter(ToBePromoted);
   4239   Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
   4240 }
   4241 
   4242 /// Some targets can do store(extractelement) with one instruction.
   4243 /// Try to push the extractelement towards the stores when the target
   4244 /// has this feature and this is profitable.
   4245 bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) {
   4246   unsigned CombineCost = UINT_MAX;
   4247   if (DisableStoreExtract || !TLI ||
   4248       (!StressStoreExtract &&
   4249        !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(),
   4250                                        Inst->getOperand(1), CombineCost)))
   4251     return false;
   4252 
   4253   // At this point we know that Inst is a vector to scalar transition.
   4254   // Try to move it down the def-use chain, until:
   4255   // - We can combine the transition with its single use
   4256   //   => we got rid of the transition.
   4257   // - We escape the current basic block
   4258   //   => we would need to check that we are moving it at a cheaper place and
   4259   //      we do not do that for now.
   4260   BasicBlock *Parent = Inst->getParent();
   4261   DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
   4262   VectorPromoteHelper VPH(*TLI, *TTI, Inst, CombineCost);
   4263   // If the transition has more than one use, assume this is not going to be
   4264   // beneficial.
   4265   while (Inst->hasOneUse()) {
   4266     Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
   4267     DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
   4268 
   4269     if (ToBePromoted->getParent() != Parent) {
   4270       DEBUG(dbgs() << "Instruction to promote is in a different block ("
   4271                    << ToBePromoted->getParent()->getName()
   4272                    << ") than the transition (" << Parent->getName() << ").\n");
   4273       return false;
   4274     }
   4275 
   4276     if (VPH.canCombine(ToBePromoted)) {
   4277       DEBUG(dbgs() << "Assume " << *Inst << '\n'
   4278                    << "will be combined with: " << *ToBePromoted << '\n');
   4279       VPH.recordCombineInstruction(ToBePromoted);
   4280       bool Changed = VPH.promote();
   4281       NumStoreExtractExposed += Changed;
   4282       return Changed;
   4283     }
   4284 
   4285     DEBUG(dbgs() << "Try promoting.\n");
   4286     if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
   4287       return false;
   4288 
   4289     DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
   4290 
   4291     VPH.enqueueForPromotion(ToBePromoted);
   4292     Inst = ToBePromoted;
   4293   }
   4294   return false;
   4295 }
   4296 
   4297 bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
   4298   if (PHINode *P = dyn_cast<PHINode>(I)) {
   4299     // It is possible for very late stage optimizations (such as SimplifyCFG)
   4300     // to introduce PHI nodes too late to be cleaned up.  If we detect such a
   4301     // trivial PHI, go ahead and zap it here.
   4302     const DataLayout &DL = I->getModule()->getDataLayout();
   4303     if (Value *V = SimplifyInstruction(P, DL, TLInfo, nullptr)) {
   4304       P->replaceAllUsesWith(V);
   4305       P->eraseFromParent();
   4306       ++NumPHIsElim;
   4307       return true;
   4308     }
   4309     return false;
   4310   }
   4311 
   4312   if (CastInst *CI = dyn_cast<CastInst>(I)) {
   4313     // If the source of the cast is a constant, then this should have
   4314     // already been constant folded.  The only reason NOT to constant fold
   4315     // it is if something (e.g. LSR) was careful to place the constant
   4316     // evaluation in a block other than then one that uses it (e.g. to hoist
   4317     // the address of globals out of a loop).  If this is the case, we don't
   4318     // want to forward-subst the cast.
   4319     if (isa<Constant>(CI->getOperand(0)))
   4320       return false;
   4321 
   4322     if (TLI && OptimizeNoopCopyExpression(CI, *TLI))
   4323       return true;
   4324 
   4325     if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
   4326       /// Sink a zext or sext into its user blocks if the target type doesn't
   4327       /// fit in one register
   4328       if (TLI && TLI->getTypeAction(CI->getContext(),
   4329                                     TLI->getValueType(CI->getType())) ==
   4330                      TargetLowering::TypeExpandInteger) {
   4331         return SinkCast(CI);
   4332       } else {
   4333         bool MadeChange = MoveExtToFormExtLoad(I);
   4334         return MadeChange | OptimizeExtUses(I);
   4335       }
   4336     }
   4337     return false;
   4338   }
   4339 
   4340   if (CmpInst *CI = dyn_cast<CmpInst>(I))
   4341     if (!TLI || !TLI->hasMultipleConditionRegisters())
   4342       return OptimizeCmpExpression(CI);
   4343 
   4344   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
   4345     if (TLI)
   4346       return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
   4347     return false;
   4348   }
   4349 
   4350   if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
   4351     if (TLI)
   4352       return OptimizeMemoryInst(I, SI->getOperand(1),
   4353                                 SI->getOperand(0)->getType());
   4354     return false;
   4355   }
   4356 
   4357   BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
   4358 
   4359   if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
   4360                 BinOp->getOpcode() == Instruction::LShr)) {
   4361     ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
   4362     if (TLI && CI && TLI->hasExtractBitsInsn())
   4363       return OptimizeExtractBits(BinOp, CI, *TLI);
   4364 
   4365     return false;
   4366   }
   4367 
   4368   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
   4369     if (GEPI->hasAllZeroIndices()) {
   4370       /// The GEP operand must be a pointer, so must its result -> BitCast
   4371       Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
   4372                                         GEPI->getName(), GEPI);
   4373       GEPI->replaceAllUsesWith(NC);
   4374       GEPI->eraseFromParent();
   4375       ++NumGEPsElim;
   4376       OptimizeInst(NC, ModifiedDT);
   4377       return true;
   4378     }
   4379     return false;
   4380   }
   4381 
   4382   if (CallInst *CI = dyn_cast<CallInst>(I))
   4383     return OptimizeCallInst(CI, ModifiedDT);
   4384 
   4385   if (SelectInst *SI = dyn_cast<SelectInst>(I))
   4386     return OptimizeSelectInst(SI);
   4387 
   4388   if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
   4389     return OptimizeShuffleVectorInst(SVI);
   4390 
   4391   if (isa<ExtractElementInst>(I))
   4392     return OptimizeExtractElementInst(I);
   4393 
   4394   return false;
   4395 }
   4396 
   4397 // In this pass we look for GEP and cast instructions that are used
   4398 // across basic blocks and rewrite them to improve basic-block-at-a-time
   4399 // selection.
   4400 bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
   4401   SunkAddrs.clear();
   4402   bool MadeChange = false;
   4403 
   4404   CurInstIterator = BB.begin();
   4405   while (CurInstIterator != BB.end()) {
   4406     MadeChange |= OptimizeInst(CurInstIterator++, ModifiedDT);
   4407     if (ModifiedDT)
   4408       return true;
   4409   }
   4410   MadeChange |= DupRetToEnableTailCallOpts(&BB);
   4411 
   4412   return MadeChange;
   4413 }
   4414 
   4415 // llvm.dbg.value is far away from the value then iSel may not be able
   4416 // handle it properly. iSel will drop llvm.dbg.value if it can not
   4417 // find a node corresponding to the value.
   4418 bool CodeGenPrepare::PlaceDbgValues(Function &F) {
   4419   bool MadeChange = false;
   4420   for (BasicBlock &BB : F) {
   4421     Instruction *PrevNonDbgInst = nullptr;
   4422     for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
   4423       Instruction *Insn = BI++;
   4424       DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
   4425       // Leave dbg.values that refer to an alloca alone. These
   4426       // instrinsics describe the address of a variable (= the alloca)
   4427       // being taken.  They should not be moved next to the alloca
   4428       // (and to the beginning of the scope), but rather stay close to
   4429       // where said address is used.
   4430       if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) {
   4431         PrevNonDbgInst = Insn;
   4432         continue;
   4433       }
   4434 
   4435       Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
   4436       if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
   4437         DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
   4438         DVI->removeFromParent();
   4439         if (isa<PHINode>(VI))
   4440           DVI->insertBefore(VI->getParent()->getFirstInsertionPt());
   4441         else
   4442           DVI->insertAfter(VI);
   4443         MadeChange = true;
   4444         ++NumDbgValueMoved;
   4445       }
   4446     }
   4447   }
   4448   return MadeChange;
   4449 }
   4450 
   4451 // If there is a sequence that branches based on comparing a single bit
   4452 // against zero that can be combined into a single instruction, and the
   4453 // target supports folding these into a single instruction, sink the
   4454 // mask and compare into the branch uses. Do this before OptimizeBlock ->
   4455 // OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being
   4456 // searched for.
   4457 bool CodeGenPrepare::sinkAndCmp(Function &F) {
   4458   if (!EnableAndCmpSinking)
   4459     return false;
   4460   if (!TLI || !TLI->isMaskAndBranchFoldingLegal())
   4461     return false;
   4462   bool MadeChange = false;
   4463   for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
   4464     BasicBlock *BB = I++;
   4465 
   4466     // Does this BB end with the following?
   4467     //   %andVal = and %val, #single-bit-set
   4468     //   %icmpVal = icmp %andResult, 0
   4469     //   br i1 %cmpVal label %dest1, label %dest2"
   4470     BranchInst *Brcc = dyn_cast<BranchInst>(BB->getTerminator());
   4471     if (!Brcc || !Brcc->isConditional())
   4472       continue;
   4473     ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0));
   4474     if (!Cmp || Cmp->getParent() != BB)
   4475       continue;
   4476     ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1));
   4477     if (!Zero || !Zero->isZero())
   4478       continue;
   4479     Instruction *And = dyn_cast<Instruction>(Cmp->getOperand(0));
   4480     if (!And || And->getOpcode() != Instruction::And || And->getParent() != BB)
   4481       continue;
   4482     ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1));
   4483     if (!Mask || !Mask->getUniqueInteger().isPowerOf2())
   4484       continue;
   4485     DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB->dump());
   4486 
   4487     // Push the "and; icmp" for any users that are conditional branches.
   4488     // Since there can only be one branch use per BB, we don't need to keep
   4489     // track of which BBs we insert into.
   4490     for (Value::use_iterator UI = Cmp->use_begin(), E = Cmp->use_end();
   4491          UI != E; ) {
   4492       Use &TheUse = *UI;
   4493       // Find brcc use.
   4494       BranchInst *BrccUser = dyn_cast<BranchInst>(*UI);
   4495       ++UI;
   4496       if (!BrccUser || !BrccUser->isConditional())
   4497         continue;
   4498       BasicBlock *UserBB = BrccUser->getParent();
   4499       if (UserBB == BB) continue;
   4500       DEBUG(dbgs() << "found Brcc use\n");
   4501 
   4502       // Sink the "and; icmp" to use.
   4503       MadeChange = true;
   4504       BinaryOperator *NewAnd =
   4505         BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "",
   4506                                   BrccUser);
   4507       CmpInst *NewCmp =
   4508         CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero,
   4509                         "", BrccUser);
   4510       TheUse = NewCmp;
   4511       ++NumAndCmpsMoved;
   4512       DEBUG(BrccUser->getParent()->dump());
   4513     }
   4514   }
   4515   return MadeChange;
   4516 }
   4517 
   4518 /// \brief Retrieve the probabilities of a conditional branch. Returns true on
   4519 /// success, or returns false if no or invalid metadata was found.
   4520 static bool extractBranchMetadata(BranchInst *BI,
   4521                                   uint64_t &ProbTrue, uint64_t &ProbFalse) {
   4522   assert(BI->isConditional() &&
   4523          "Looking for probabilities on unconditional branch?");
   4524   auto *ProfileData = BI->getMetadata(LLVMContext::MD_prof);
   4525   if (!ProfileData || ProfileData->getNumOperands() != 3)
   4526     return false;
   4527 
   4528   const auto *CITrue =
   4529       mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1));
   4530   const auto *CIFalse =
   4531       mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(2));
   4532   if (!CITrue || !CIFalse)
   4533     return false;
   4534 
   4535   ProbTrue = CITrue->getValue().getZExtValue();
   4536   ProbFalse = CIFalse->getValue().getZExtValue();
   4537 
   4538   return true;
   4539 }
   4540 
   4541 /// \brief Scale down both weights to fit into uint32_t.
   4542 static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
   4543   uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
   4544   uint32_t Scale = (NewMax / UINT32_MAX) + 1;
   4545   NewTrue = NewTrue / Scale;
   4546   NewFalse = NewFalse / Scale;
   4547 }
   4548 
   4549 /// \brief Some targets prefer to split a conditional branch like:
   4550 /// \code
   4551 ///   %0 = icmp ne i32 %a, 0
   4552 ///   %1 = icmp ne i32 %b, 0
   4553 ///   %or.cond = or i1 %0, %1
   4554 ///   br i1 %or.cond, label %TrueBB, label %FalseBB
   4555 /// \endcode
   4556 /// into multiple branch instructions like:
   4557 /// \code
   4558 ///   bb1:
   4559 ///     %0 = icmp ne i32 %a, 0
   4560 ///     br i1 %0, label %TrueBB, label %bb2
   4561 ///   bb2:
   4562 ///     %1 = icmp ne i32 %b, 0
   4563 ///     br i1 %1, label %TrueBB, label %FalseBB
   4564 /// \endcode
   4565 /// This usually allows instruction selection to do even further optimizations
   4566 /// and combine the compare with the branch instruction. Currently this is
   4567 /// applied for targets which have "cheap" jump instructions.
   4568 ///
   4569 /// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
   4570 ///
   4571 bool CodeGenPrepare::splitBranchCondition(Function &F) {
   4572   if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive())
   4573     return false;
   4574 
   4575   bool MadeChange = false;
   4576   for (auto &BB : F) {
   4577     // Does this BB end with the following?
   4578     //   %cond1 = icmp|fcmp|binary instruction ...
   4579     //   %cond2 = icmp|fcmp|binary instruction ...
   4580     //   %cond.or = or|and i1 %cond1, cond2
   4581     //   br i1 %cond.or label %dest1, label %dest2"
   4582     BinaryOperator *LogicOp;
   4583     BasicBlock *TBB, *FBB;
   4584     if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB)))
   4585       continue;
   4586 
   4587     unsigned Opc;
   4588     Value *Cond1, *Cond2;
   4589     if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
   4590                              m_OneUse(m_Value(Cond2)))))
   4591       Opc = Instruction::And;
   4592     else if (match(LogicOp, m_Or(m_OneUse(m_Value(Cond1)),
   4593                                  m_OneUse(m_Value(Cond2)))))
   4594       Opc = Instruction::Or;
   4595     else
   4596       continue;
   4597 
   4598     if (!match(Cond1, m_CombineOr(m_Cmp(), m_BinOp())) ||
   4599         !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp()))   )
   4600       continue;
   4601 
   4602     DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
   4603 
   4604     // Create a new BB.
   4605     auto *InsertBefore = std::next(Function::iterator(BB))
   4606         .getNodePtrUnchecked();
   4607     auto TmpBB = BasicBlock::Create(BB.getContext(),
   4608                                     BB.getName() + ".cond.split",
   4609                                     BB.getParent(), InsertBefore);
   4610 
   4611     // Update original basic block by using the first condition directly by the
   4612     // branch instruction and removing the no longer needed and/or instruction.
   4613     auto *Br1 = cast<BranchInst>(BB.getTerminator());
   4614     Br1->setCondition(Cond1);
   4615     LogicOp->eraseFromParent();
   4616 
   4617     // Depending on the conditon we have to either replace the true or the false
   4618     // successor of the original branch instruction.
   4619     if (Opc == Instruction::And)
   4620       Br1->setSuccessor(0, TmpBB);
   4621     else
   4622       Br1->setSuccessor(1, TmpBB);
   4623 
   4624     // Fill in the new basic block.
   4625     auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
   4626     if (auto *I = dyn_cast<Instruction>(Cond2)) {
   4627       I->removeFromParent();
   4628       I->insertBefore(Br2);
   4629     }
   4630 
   4631     // Update PHI nodes in both successors. The original BB needs to be
   4632     // replaced in one succesor's PHI nodes, because the branch comes now from
   4633     // the newly generated BB (NewBB). In the other successor we need to add one
   4634     // incoming edge to the PHI nodes, because both branch instructions target
   4635     // now the same successor. Depending on the original branch condition
   4636     // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
   4637     // we perfrom the correct update for the PHI nodes.
   4638     // This doesn't change the successor order of the just created branch
   4639     // instruction (or any other instruction).
   4640     if (Opc == Instruction::Or)
   4641       std::swap(TBB, FBB);
   4642 
   4643     // Replace the old BB with the new BB.
   4644     for (auto &I : *TBB) {
   4645       PHINode *PN = dyn_cast<PHINode>(&I);
   4646       if (!PN)
   4647         break;
   4648       int i;
   4649       while ((i = PN->getBasicBlockIndex(&BB)) >= 0)
   4650         PN->setIncomingBlock(i, TmpBB);
   4651     }
   4652 
   4653     // Add another incoming edge form the new BB.
   4654     for (auto &I : *FBB) {
   4655       PHINode *PN = dyn_cast<PHINode>(&I);
   4656       if (!PN)
   4657         break;
   4658       auto *Val = PN->getIncomingValueForBlock(&BB);
   4659       PN->addIncoming(Val, TmpBB);
   4660     }
   4661 
   4662     // Update the branch weights (from SelectionDAGBuilder::
   4663     // FindMergedConditions).
   4664     if (Opc == Instruction::Or) {
   4665       // Codegen X | Y as:
   4666       // BB1:
   4667       //   jmp_if_X TBB
   4668       //   jmp TmpBB
   4669       // TmpBB:
   4670       //   jmp_if_Y TBB
   4671       //   jmp FBB
   4672       //
   4673 
   4674       // We have flexibility in setting Prob for BB1 and Prob for NewBB.
   4675       // The requirement is that
   4676       //   TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
   4677       //     = TrueProb for orignal BB.
   4678       // Assuming the orignal weights are A and B, one choice is to set BB1's
   4679       // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
   4680       // assumes that
   4681       //   TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
   4682       // Another choice is to assume TrueProb for BB1 equals to TrueProb for
   4683       // TmpBB, but the math is more complicated.
   4684       uint64_t TrueWeight, FalseWeight;
   4685       if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) {
   4686         uint64_t NewTrueWeight = TrueWeight;
   4687         uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
   4688         scaleWeights(NewTrueWeight, NewFalseWeight);
   4689         Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
   4690                          .createBranchWeights(TrueWeight, FalseWeight));
   4691 
   4692         NewTrueWeight = TrueWeight;
   4693         NewFalseWeight = 2 * FalseWeight;
   4694         scaleWeights(NewTrueWeight, NewFalseWeight);
   4695         Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
   4696                          .createBranchWeights(TrueWeight, FalseWeight));
   4697       }
   4698     } else {
   4699       // Codegen X & Y as:
   4700       // BB1:
   4701       //   jmp_if_X TmpBB
   4702       //   jmp FBB
   4703       // TmpBB:
   4704       //   jmp_if_Y TBB
   4705       //   jmp FBB
   4706       //
   4707       //  This requires creation of TmpBB after CurBB.
   4708 
   4709       // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
   4710       // The requirement is that
   4711       //   FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
   4712       //     = FalseProb for orignal BB.
   4713       // Assuming the orignal weights are A and B, one choice is to set BB1's
   4714       // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
   4715       // assumes that
   4716       //   FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
   4717       uint64_t TrueWeight, FalseWeight;
   4718       if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) {
   4719         uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
   4720         uint64_t NewFalseWeight = FalseWeight;
   4721         scaleWeights(NewTrueWeight, NewFalseWeight);
   4722         Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
   4723                          .createBranchWeights(TrueWeight, FalseWeight));
   4724 
   4725         NewTrueWeight = 2 * TrueWeight;
   4726         NewFalseWeight = FalseWeight;
   4727         scaleWeights(NewTrueWeight, NewFalseWeight);
   4728         Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
   4729                          .createBranchWeights(TrueWeight, FalseWeight));
   4730       }
   4731     }
   4732 
   4733     // Note: No point in getting fancy here, since the DT info is never
   4734     // available to CodeGenPrepare.
   4735     ModifiedDT = true;
   4736 
   4737     MadeChange = true;
   4738 
   4739     DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
   4740           TmpBB->dump());
   4741   }
   4742   return MadeChange;
   4743 }
   4744