Home | History | Annotate | Download | only in Scalar
      1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This transformation analyzes and transforms the induction variables (and
     11 // computations derived from them) into simpler forms suitable for subsequent
     12 // analysis and transformation.
     13 //
     14 // If the trip count of a loop is computable, this pass also makes the following
     15 // changes:
     16 //   1. The exit condition for the loop is canonicalized to compare the
     17 //      induction value against the exit value.  This turns loops like:
     18 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
     19 //   2. Any use outside of the loop of an expression derived from the indvar
     20 //      is changed to compute the derived value outside of the loop, eliminating
     21 //      the dependence on the exit value of the induction variable.  If the only
     22 //      purpose of the loop is to compute the exit value of some derived
     23 //      expression, this transformation will make the loop dead.
     24 //
     25 //===----------------------------------------------------------------------===//
     26 
     27 #include "llvm/Transforms/Scalar.h"
     28 #include "llvm/ADT/DenseMap.h"
     29 #include "llvm/ADT/SmallVector.h"
     30 #include "llvm/ADT/Statistic.h"
     31 #include "llvm/Analysis/GlobalsModRef.h"
     32 #include "llvm/Analysis/LoopInfo.h"
     33 #include "llvm/Analysis/LoopPass.h"
     34 #include "llvm/Analysis/ScalarEvolutionExpander.h"
     35 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
     36 #include "llvm/Analysis/TargetLibraryInfo.h"
     37 #include "llvm/Analysis/TargetTransformInfo.h"
     38 #include "llvm/IR/BasicBlock.h"
     39 #include "llvm/IR/CFG.h"
     40 #include "llvm/IR/Constants.h"
     41 #include "llvm/IR/DataLayout.h"
     42 #include "llvm/IR/Dominators.h"
     43 #include "llvm/IR/Instructions.h"
     44 #include "llvm/IR/IntrinsicInst.h"
     45 #include "llvm/IR/LLVMContext.h"
     46 #include "llvm/IR/PatternMatch.h"
     47 #include "llvm/IR/Type.h"
     48 #include "llvm/Support/CommandLine.h"
     49 #include "llvm/Support/Debug.h"
     50 #include "llvm/Support/raw_ostream.h"
     51 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
     52 #include "llvm/Transforms/Utils/Local.h"
     53 #include "llvm/Transforms/Utils/LoopUtils.h"
     54 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
     55 using namespace llvm;
     56 
     57 #define DEBUG_TYPE "indvars"
     58 
     59 STATISTIC(NumWidened     , "Number of indvars widened");
     60 STATISTIC(NumReplaced    , "Number of exit values replaced");
     61 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
     62 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
     63 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
     64 
     65 // Trip count verification can be enabled by default under NDEBUG if we
     66 // implement a strong expression equivalence checker in SCEV. Until then, we
     67 // use the verify-indvars flag, which may assert in some cases.
     68 static cl::opt<bool> VerifyIndvars(
     69   "verify-indvars", cl::Hidden,
     70   cl::desc("Verify the ScalarEvolution result after running indvars"));
     71 
     72 static cl::opt<bool> ReduceLiveIVs("liv-reduce", cl::Hidden,
     73   cl::desc("Reduce live induction variables."));
     74 
     75 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, AlwaysRepl };
     76 
     77 static cl::opt<ReplaceExitVal> ReplaceExitValue(
     78     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
     79     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
     80     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
     81                clEnumValN(OnlyCheapRepl, "cheap",
     82                           "only replace exit value when the cost is cheap"),
     83                clEnumValN(AlwaysRepl, "always",
     84                           "always replace exit value whenever possible"),
     85                clEnumValEnd));
     86 
     87 namespace {
     88 struct RewritePhi;
     89 
     90 class IndVarSimplify : public LoopPass {
     91   LoopInfo                  *LI;
     92   ScalarEvolution           *SE;
     93   DominatorTree             *DT;
     94   TargetLibraryInfo         *TLI;
     95   const TargetTransformInfo *TTI;
     96 
     97   SmallVector<WeakVH, 16> DeadInsts;
     98   bool Changed;
     99 public:
    100 
    101   static char ID; // Pass identification, replacement for typeid
    102   IndVarSimplify()
    103     : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), Changed(false) {
    104     initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
    105   }
    106 
    107   bool runOnLoop(Loop *L, LPPassManager &LPM) override;
    108 
    109   void getAnalysisUsage(AnalysisUsage &AU) const override {
    110     AU.addRequired<DominatorTreeWrapperPass>();
    111     AU.addRequired<LoopInfoWrapperPass>();
    112     AU.addRequired<ScalarEvolutionWrapperPass>();
    113     AU.addRequiredID(LoopSimplifyID);
    114     AU.addRequiredID(LCSSAID);
    115     AU.addPreserved<GlobalsAAWrapperPass>();
    116     AU.addPreserved<ScalarEvolutionWrapperPass>();
    117     AU.addPreservedID(LoopSimplifyID);
    118     AU.addPreservedID(LCSSAID);
    119     AU.setPreservesCFG();
    120   }
    121 
    122 private:
    123   void releaseMemory() override {
    124     DeadInsts.clear();
    125   }
    126 
    127   bool isValidRewrite(Value *FromVal, Value *ToVal);
    128 
    129   void handleFloatingPointIV(Loop *L, PHINode *PH);
    130   void rewriteNonIntegerIVs(Loop *L);
    131 
    132   void simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
    133 
    134   bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
    135   void rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
    136 
    137   Value *linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
    138                                    PHINode *IndVar, SCEVExpander &Rewriter);
    139 
    140   void sinkUnusedInvariants(Loop *L);
    141 
    142   Value *expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
    143                             Instruction *InsertPt, Type *Ty);
    144 };
    145 }
    146 
    147 char IndVarSimplify::ID = 0;
    148 INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
    149                 "Induction Variable Simplification", false, false)
    150 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
    151 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
    152 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
    153 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
    154 INITIALIZE_PASS_DEPENDENCY(LCSSA)
    155 INITIALIZE_PASS_END(IndVarSimplify, "indvars",
    156                 "Induction Variable Simplification", false, false)
    157 
    158 Pass *llvm::createIndVarSimplifyPass() {
    159   return new IndVarSimplify();
    160 }
    161 
    162 /// Return true if the SCEV expansion generated by the rewriter can replace the
    163 /// original value. SCEV guarantees that it produces the same value, but the way
    164 /// it is produced may be illegal IR.  Ideally, this function will only be
    165 /// called for verification.
    166 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
    167   // If an SCEV expression subsumed multiple pointers, its expansion could
    168   // reassociate the GEP changing the base pointer. This is illegal because the
    169   // final address produced by a GEP chain must be inbounds relative to its
    170   // underlying object. Otherwise basic alias analysis, among other things,
    171   // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
    172   // producing an expression involving multiple pointers. Until then, we must
    173   // bail out here.
    174   //
    175   // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
    176   // because it understands lcssa phis while SCEV does not.
    177   Value *FromPtr = FromVal;
    178   Value *ToPtr = ToVal;
    179   if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
    180     FromPtr = GEP->getPointerOperand();
    181   }
    182   if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
    183     ToPtr = GEP->getPointerOperand();
    184   }
    185   if (FromPtr != FromVal || ToPtr != ToVal) {
    186     // Quickly check the common case
    187     if (FromPtr == ToPtr)
    188       return true;
    189 
    190     // SCEV may have rewritten an expression that produces the GEP's pointer
    191     // operand. That's ok as long as the pointer operand has the same base
    192     // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
    193     // base of a recurrence. This handles the case in which SCEV expansion
    194     // converts a pointer type recurrence into a nonrecurrent pointer base
    195     // indexed by an integer recurrence.
    196 
    197     // If the GEP base pointer is a vector of pointers, abort.
    198     if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
    199       return false;
    200 
    201     const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
    202     const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
    203     if (FromBase == ToBase)
    204       return true;
    205 
    206     DEBUG(dbgs() << "INDVARS: GEP rewrite bail out "
    207           << *FromBase << " != " << *ToBase << "\n");
    208 
    209     return false;
    210   }
    211   return true;
    212 }
    213 
    214 /// Determine the insertion point for this user. By default, insert immediately
    215 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
    216 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
    217 /// common dominator for the incoming blocks.
    218 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
    219                                           DominatorTree *DT, LoopInfo *LI) {
    220   PHINode *PHI = dyn_cast<PHINode>(User);
    221   if (!PHI)
    222     return User;
    223 
    224   Instruction *InsertPt = nullptr;
    225   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
    226     if (PHI->getIncomingValue(i) != Def)
    227       continue;
    228 
    229     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
    230     if (!InsertPt) {
    231       InsertPt = InsertBB->getTerminator();
    232       continue;
    233     }
    234     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
    235     InsertPt = InsertBB->getTerminator();
    236   }
    237   assert(InsertPt && "Missing phi operand");
    238 
    239   auto *DefI = dyn_cast<Instruction>(Def);
    240   if (!DefI)
    241     return InsertPt;
    242 
    243   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
    244 
    245   auto *L = LI->getLoopFor(DefI->getParent());
    246   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
    247 
    248   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
    249     if (LI->getLoopFor(DTN->getBlock()) == L)
    250       return DTN->getBlock()->getTerminator();
    251 
    252   llvm_unreachable("DefI dominates InsertPt!");
    253 }
    254 
    255 //===----------------------------------------------------------------------===//
    256 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
    257 //===----------------------------------------------------------------------===//
    258 
    259 /// Convert APF to an integer, if possible.
    260 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
    261   bool isExact = false;
    262   // See if we can convert this to an int64_t
    263   uint64_t UIntVal;
    264   if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
    265                            &isExact) != APFloat::opOK || !isExact)
    266     return false;
    267   IntVal = UIntVal;
    268   return true;
    269 }
    270 
    271 /// If the loop has floating induction variable then insert corresponding
    272 /// integer induction variable if possible.
    273 /// For example,
    274 /// for(double i = 0; i < 10000; ++i)
    275 ///   bar(i)
    276 /// is converted into
    277 /// for(int i = 0; i < 10000; ++i)
    278 ///   bar((double)i);
    279 ///
    280 void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
    281   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
    282   unsigned BackEdge     = IncomingEdge^1;
    283 
    284   // Check incoming value.
    285   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
    286 
    287   int64_t InitValue;
    288   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
    289     return;
    290 
    291   // Check IV increment. Reject this PN if increment operation is not
    292   // an add or increment value can not be represented by an integer.
    293   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
    294   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return;
    295 
    296   // If this is not an add of the PHI with a constantfp, or if the constant fp
    297   // is not an integer, bail out.
    298   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
    299   int64_t IncValue;
    300   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
    301       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
    302     return;
    303 
    304   // Check Incr uses. One user is PN and the other user is an exit condition
    305   // used by the conditional terminator.
    306   Value::user_iterator IncrUse = Incr->user_begin();
    307   Instruction *U1 = cast<Instruction>(*IncrUse++);
    308   if (IncrUse == Incr->user_end()) return;
    309   Instruction *U2 = cast<Instruction>(*IncrUse++);
    310   if (IncrUse != Incr->user_end()) return;
    311 
    312   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
    313   // only used by a branch, we can't transform it.
    314   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
    315   if (!Compare)
    316     Compare = dyn_cast<FCmpInst>(U2);
    317   if (!Compare || !Compare->hasOneUse() ||
    318       !isa<BranchInst>(Compare->user_back()))
    319     return;
    320 
    321   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
    322 
    323   // We need to verify that the branch actually controls the iteration count
    324   // of the loop.  If not, the new IV can overflow and no one will notice.
    325   // The branch block must be in the loop and one of the successors must be out
    326   // of the loop.
    327   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
    328   if (!L->contains(TheBr->getParent()) ||
    329       (L->contains(TheBr->getSuccessor(0)) &&
    330        L->contains(TheBr->getSuccessor(1))))
    331     return;
    332 
    333 
    334   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
    335   // transform it.
    336   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
    337   int64_t ExitValue;
    338   if (ExitValueVal == nullptr ||
    339       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
    340     return;
    341 
    342   // Find new predicate for integer comparison.
    343   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
    344   switch (Compare->getPredicate()) {
    345   default: return;  // Unknown comparison.
    346   case CmpInst::FCMP_OEQ:
    347   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
    348   case CmpInst::FCMP_ONE:
    349   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
    350   case CmpInst::FCMP_OGT:
    351   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
    352   case CmpInst::FCMP_OGE:
    353   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
    354   case CmpInst::FCMP_OLT:
    355   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
    356   case CmpInst::FCMP_OLE:
    357   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
    358   }
    359 
    360   // We convert the floating point induction variable to a signed i32 value if
    361   // we can.  This is only safe if the comparison will not overflow in a way
    362   // that won't be trapped by the integer equivalent operations.  Check for this
    363   // now.
    364   // TODO: We could use i64 if it is native and the range requires it.
    365 
    366   // The start/stride/exit values must all fit in signed i32.
    367   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
    368     return;
    369 
    370   // If not actually striding (add x, 0.0), avoid touching the code.
    371   if (IncValue == 0)
    372     return;
    373 
    374   // Positive and negative strides have different safety conditions.
    375   if (IncValue > 0) {
    376     // If we have a positive stride, we require the init to be less than the
    377     // exit value.
    378     if (InitValue >= ExitValue)
    379       return;
    380 
    381     uint32_t Range = uint32_t(ExitValue-InitValue);
    382     // Check for infinite loop, either:
    383     // while (i <= Exit) or until (i > Exit)
    384     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
    385       if (++Range == 0) return;  // Range overflows.
    386     }
    387 
    388     unsigned Leftover = Range % uint32_t(IncValue);
    389 
    390     // If this is an equality comparison, we require that the strided value
    391     // exactly land on the exit value, otherwise the IV condition will wrap
    392     // around and do things the fp IV wouldn't.
    393     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
    394         Leftover != 0)
    395       return;
    396 
    397     // If the stride would wrap around the i32 before exiting, we can't
    398     // transform the IV.
    399     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
    400       return;
    401 
    402   } else {
    403     // If we have a negative stride, we require the init to be greater than the
    404     // exit value.
    405     if (InitValue <= ExitValue)
    406       return;
    407 
    408     uint32_t Range = uint32_t(InitValue-ExitValue);
    409     // Check for infinite loop, either:
    410     // while (i >= Exit) or until (i < Exit)
    411     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
    412       if (++Range == 0) return;  // Range overflows.
    413     }
    414 
    415     unsigned Leftover = Range % uint32_t(-IncValue);
    416 
    417     // If this is an equality comparison, we require that the strided value
    418     // exactly land on the exit value, otherwise the IV condition will wrap
    419     // around and do things the fp IV wouldn't.
    420     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
    421         Leftover != 0)
    422       return;
    423 
    424     // If the stride would wrap around the i32 before exiting, we can't
    425     // transform the IV.
    426     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
    427       return;
    428   }
    429 
    430   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
    431 
    432   // Insert new integer induction variable.
    433   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
    434   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
    435                       PN->getIncomingBlock(IncomingEdge));
    436 
    437   Value *NewAdd =
    438     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
    439                               Incr->getName()+".int", Incr);
    440   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
    441 
    442   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
    443                                       ConstantInt::get(Int32Ty, ExitValue),
    444                                       Compare->getName());
    445 
    446   // In the following deletions, PN may become dead and may be deleted.
    447   // Use a WeakVH to observe whether this happens.
    448   WeakVH WeakPH = PN;
    449 
    450   // Delete the old floating point exit comparison.  The branch starts using the
    451   // new comparison.
    452   NewCompare->takeName(Compare);
    453   Compare->replaceAllUsesWith(NewCompare);
    454   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
    455 
    456   // Delete the old floating point increment.
    457   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
    458   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
    459 
    460   // If the FP induction variable still has uses, this is because something else
    461   // in the loop uses its value.  In order to canonicalize the induction
    462   // variable, we chose to eliminate the IV and rewrite it in terms of an
    463   // int->fp cast.
    464   //
    465   // We give preference to sitofp over uitofp because it is faster on most
    466   // platforms.
    467   if (WeakPH) {
    468     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
    469                                  &*PN->getParent()->getFirstInsertionPt());
    470     PN->replaceAllUsesWith(Conv);
    471     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
    472   }
    473   Changed = true;
    474 }
    475 
    476 void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
    477   // First step.  Check to see if there are any floating-point recurrences.
    478   // If there are, change them into integer recurrences, permitting analysis by
    479   // the SCEV routines.
    480   //
    481   BasicBlock *Header = L->getHeader();
    482 
    483   SmallVector<WeakVH, 8> PHIs;
    484   for (BasicBlock::iterator I = Header->begin();
    485        PHINode *PN = dyn_cast<PHINode>(I); ++I)
    486     PHIs.push_back(PN);
    487 
    488   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
    489     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
    490       handleFloatingPointIV(L, PN);
    491 
    492   // If the loop previously had floating-point IV, ScalarEvolution
    493   // may not have been able to compute a trip count. Now that we've done some
    494   // re-writing, the trip count may be computable.
    495   if (Changed)
    496     SE->forgetLoop(L);
    497 }
    498 
    499 namespace {
    500 // Collect information about PHI nodes which can be transformed in
    501 // rewriteLoopExitValues.
    502 struct RewritePhi {
    503   PHINode *PN;
    504   unsigned Ith;  // Ith incoming value.
    505   Value *Val;    // Exit value after expansion.
    506   bool HighCost; // High Cost when expansion.
    507   bool SafePhi;  // LCSSASafePhiForRAUW.
    508 
    509   RewritePhi(PHINode *P, unsigned I, Value *V, bool H, bool S)
    510       : PN(P), Ith(I), Val(V), HighCost(H), SafePhi(S) {}
    511 };
    512 }
    513 
    514 Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
    515                                           Loop *L, Instruction *InsertPt,
    516                                           Type *ResultTy) {
    517   // Before expanding S into an expensive LLVM expression, see if we can use an
    518   // already existing value as the expansion for S.
    519   if (Value *ExistingValue = Rewriter.findExistingExpansion(S, InsertPt, L))
    520     if (ExistingValue->getType() == ResultTy)
    521       return ExistingValue;
    522 
    523   // We didn't find anything, fall back to using SCEVExpander.
    524   return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
    525 }
    526 
    527 //===----------------------------------------------------------------------===//
    528 // rewriteLoopExitValues - Optimize IV users outside the loop.
    529 // As a side effect, reduces the amount of IV processing within the loop.
    530 //===----------------------------------------------------------------------===//
    531 
    532 /// Check to see if this loop has a computable loop-invariant execution count.
    533 /// If so, this means that we can compute the final value of any expressions
    534 /// that are recurrent in the loop, and substitute the exit values from the loop
    535 /// into any instructions outside of the loop that use the final values of the
    536 /// current expressions.
    537 ///
    538 /// This is mostly redundant with the regular IndVarSimplify activities that
    539 /// happen later, except that it's more powerful in some cases, because it's
    540 /// able to brute-force evaluate arbitrary instructions as long as they have
    541 /// constant operands at the beginning of the loop.
    542 void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
    543   // Check a pre-condition.
    544   assert(L->isRecursivelyLCSSAForm(*DT) && "Indvars did not preserve LCSSA!");
    545 
    546   SmallVector<BasicBlock*, 8> ExitBlocks;
    547   L->getUniqueExitBlocks(ExitBlocks);
    548 
    549   SmallVector<RewritePhi, 8> RewritePhiSet;
    550   // Find all values that are computed inside the loop, but used outside of it.
    551   // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
    552   // the exit blocks of the loop to find them.
    553   for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
    554     BasicBlock *ExitBB = ExitBlocks[i];
    555 
    556     // If there are no PHI nodes in this exit block, then no values defined
    557     // inside the loop are used on this path, skip it.
    558     PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
    559     if (!PN) continue;
    560 
    561     unsigned NumPreds = PN->getNumIncomingValues();
    562 
    563     // We would like to be able to RAUW single-incoming value PHI nodes. We
    564     // have to be certain this is safe even when this is an LCSSA PHI node.
    565     // While the computed exit value is no longer varying in *this* loop, the
    566     // exit block may be an exit block for an outer containing loop as well,
    567     // the exit value may be varying in the outer loop, and thus it may still
    568     // require an LCSSA PHI node. The safe case is when this is
    569     // single-predecessor PHI node (LCSSA) and the exit block containing it is
    570     // part of the enclosing loop, or this is the outer most loop of the nest.
    571     // In either case the exit value could (at most) be varying in the same
    572     // loop body as the phi node itself. Thus if it is in turn used outside of
    573     // an enclosing loop it will only be via a separate LCSSA node.
    574     bool LCSSASafePhiForRAUW =
    575         NumPreds == 1 &&
    576         (!L->getParentLoop() || L->getParentLoop() == LI->getLoopFor(ExitBB));
    577 
    578     // Iterate over all of the PHI nodes.
    579     BasicBlock::iterator BBI = ExitBB->begin();
    580     while ((PN = dyn_cast<PHINode>(BBI++))) {
    581       if (PN->use_empty())
    582         continue; // dead use, don't replace it
    583 
    584       // SCEV only supports integer expressions for now.
    585       if (!PN->getType()->isIntegerTy() && !PN->getType()->isPointerTy())
    586         continue;
    587 
    588       // It's necessary to tell ScalarEvolution about this explicitly so that
    589       // it can walk the def-use list and forget all SCEVs, as it may not be
    590       // watching the PHI itself. Once the new exit value is in place, there
    591       // may not be a def-use connection between the loop and every instruction
    592       // which got a SCEVAddRecExpr for that loop.
    593       SE->forgetValue(PN);
    594 
    595       // Iterate over all of the values in all the PHI nodes.
    596       for (unsigned i = 0; i != NumPreds; ++i) {
    597         // If the value being merged in is not integer or is not defined
    598         // in the loop, skip it.
    599         Value *InVal = PN->getIncomingValue(i);
    600         if (!isa<Instruction>(InVal))
    601           continue;
    602 
    603         // If this pred is for a subloop, not L itself, skip it.
    604         if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
    605           continue; // The Block is in a subloop, skip it.
    606 
    607         // Check that InVal is defined in the loop.
    608         Instruction *Inst = cast<Instruction>(InVal);
    609         if (!L->contains(Inst))
    610           continue;
    611 
    612         // Okay, this instruction has a user outside of the current loop
    613         // and varies predictably *inside* the loop.  Evaluate the value it
    614         // contains when the loop exits, if possible.
    615         const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
    616         if (!SE->isLoopInvariant(ExitValue, L) ||
    617             !isSafeToExpand(ExitValue, *SE))
    618           continue;
    619 
    620         // Computing the value outside of the loop brings no benefit if :
    621         //  - it is definitely used inside the loop in a way which can not be
    622         //    optimized away.
    623         //  - no use outside of the loop can take advantage of hoisting the
    624         //    computation out of the loop
    625         if (ExitValue->getSCEVType()>=scMulExpr) {
    626           unsigned NumHardInternalUses = 0;
    627           unsigned NumSoftExternalUses = 0;
    628           unsigned NumUses = 0;
    629           for (auto IB = Inst->user_begin(), IE = Inst->user_end();
    630                IB != IE && NumUses <= 6; ++IB) {
    631             Instruction *UseInstr = cast<Instruction>(*IB);
    632             unsigned Opc = UseInstr->getOpcode();
    633             NumUses++;
    634             if (L->contains(UseInstr)) {
    635               if (Opc == Instruction::Call || Opc == Instruction::Ret)
    636                 NumHardInternalUses++;
    637             } else {
    638               if (Opc == Instruction::PHI) {
    639                 // Do not count the Phi as a use. LCSSA may have inserted
    640                 // plenty of trivial ones.
    641                 NumUses--;
    642                 for (auto PB = UseInstr->user_begin(),
    643                           PE = UseInstr->user_end();
    644                      PB != PE && NumUses <= 6; ++PB, ++NumUses) {
    645                   unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode();
    646                   if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret)
    647                     NumSoftExternalUses++;
    648                 }
    649                 continue;
    650               }
    651               if (Opc != Instruction::Call && Opc != Instruction::Ret)
    652                 NumSoftExternalUses++;
    653             }
    654           }
    655           if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses)
    656             continue;
    657         }
    658 
    659         bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
    660         Value *ExitVal =
    661             expandSCEVIfNeeded(Rewriter, ExitValue, L, Inst, PN->getType());
    662 
    663         DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
    664                      << "  LoopVal = " << *Inst << "\n");
    665 
    666         if (!isValidRewrite(Inst, ExitVal)) {
    667           DeadInsts.push_back(ExitVal);
    668           continue;
    669         }
    670 
    671         // Collect all the candidate PHINodes to be rewritten.
    672         RewritePhiSet.push_back(
    673             RewritePhi(PN, i, ExitVal, HighCost, LCSSASafePhiForRAUW));
    674       }
    675     }
    676   }
    677 
    678   bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
    679 
    680   // Transformation.
    681   for (const RewritePhi &Phi : RewritePhiSet) {
    682     PHINode *PN = Phi.PN;
    683     Value *ExitVal = Phi.Val;
    684 
    685     // Only do the rewrite when the ExitValue can be expanded cheaply.
    686     // If LoopCanBeDel is true, rewrite exit value aggressively.
    687     if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
    688       DeadInsts.push_back(ExitVal);
    689       continue;
    690     }
    691 
    692     Changed = true;
    693     ++NumReplaced;
    694     Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
    695     PN->setIncomingValue(Phi.Ith, ExitVal);
    696 
    697     // If this instruction is dead now, delete it. Don't do it now to avoid
    698     // invalidating iterators.
    699     if (isInstructionTriviallyDead(Inst, TLI))
    700       DeadInsts.push_back(Inst);
    701 
    702     // If we determined that this PHI is safe to replace even if an LCSSA
    703     // PHI, do so.
    704     if (Phi.SafePhi) {
    705       PN->replaceAllUsesWith(ExitVal);
    706       PN->eraseFromParent();
    707     }
    708   }
    709 
    710   // The insertion point instruction may have been deleted; clear it out
    711   // so that the rewriter doesn't trip over it later.
    712   Rewriter.clearInsertPoint();
    713 }
    714 
    715 /// Check whether it is possible to delete the loop after rewriting exit
    716 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
    717 /// aggressively.
    718 bool IndVarSimplify::canLoopBeDeleted(
    719     Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
    720 
    721   BasicBlock *Preheader = L->getLoopPreheader();
    722   // If there is no preheader, the loop will not be deleted.
    723   if (!Preheader)
    724     return false;
    725 
    726   // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
    727   // We obviate multiple ExitingBlocks case for simplicity.
    728   // TODO: If we see testcase with multiple ExitingBlocks can be deleted
    729   // after exit value rewriting, we can enhance the logic here.
    730   SmallVector<BasicBlock *, 4> ExitingBlocks;
    731   L->getExitingBlocks(ExitingBlocks);
    732   SmallVector<BasicBlock *, 8> ExitBlocks;
    733   L->getUniqueExitBlocks(ExitBlocks);
    734   if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1)
    735     return false;
    736 
    737   BasicBlock *ExitBlock = ExitBlocks[0];
    738   BasicBlock::iterator BI = ExitBlock->begin();
    739   while (PHINode *P = dyn_cast<PHINode>(BI)) {
    740     Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
    741 
    742     // If the Incoming value of P is found in RewritePhiSet, we know it
    743     // could be rewritten to use a loop invariant value in transformation
    744     // phase later. Skip it in the loop invariant check below.
    745     bool found = false;
    746     for (const RewritePhi &Phi : RewritePhiSet) {
    747       unsigned i = Phi.Ith;
    748       if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
    749         found = true;
    750         break;
    751       }
    752     }
    753 
    754     Instruction *I;
    755     if (!found && (I = dyn_cast<Instruction>(Incoming)))
    756       if (!L->hasLoopInvariantOperands(I))
    757         return false;
    758 
    759     ++BI;
    760   }
    761 
    762   for (auto *BB : L->blocks())
    763     if (any_of(*BB, [](Instruction &I) { return I.mayHaveSideEffects(); }))
    764       return false;
    765 
    766   return true;
    767 }
    768 
    769 //===----------------------------------------------------------------------===//
    770 //  IV Widening - Extend the width of an IV to cover its widest uses.
    771 //===----------------------------------------------------------------------===//
    772 
    773 namespace {
    774 // Collect information about induction variables that are used by sign/zero
    775 // extend operations. This information is recorded by CollectExtend and provides
    776 // the input to WidenIV.
    777 struct WideIVInfo {
    778   PHINode *NarrowIV = nullptr;
    779   Type *WidestNativeType = nullptr; // Widest integer type created [sz]ext
    780   bool IsSigned = false;            // Was a sext user seen before a zext?
    781 };
    782 }
    783 
    784 /// Update information about the induction variable that is extended by this
    785 /// sign or zero extend operation. This is used to determine the final width of
    786 /// the IV before actually widening it.
    787 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
    788                         const TargetTransformInfo *TTI) {
    789   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
    790   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
    791     return;
    792 
    793   Type *Ty = Cast->getType();
    794   uint64_t Width = SE->getTypeSizeInBits(Ty);
    795   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
    796     return;
    797 
    798   // Cast is either an sext or zext up to this point.
    799   // We should not widen an indvar if arithmetics on the wider indvar are more
    800   // expensive than those on the narrower indvar. We check only the cost of ADD
    801   // because at least an ADD is required to increment the induction variable. We
    802   // could compute more comprehensively the cost of all instructions on the
    803   // induction variable when necessary.
    804   if (TTI &&
    805       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
    806           TTI->getArithmeticInstrCost(Instruction::Add,
    807                                       Cast->getOperand(0)->getType())) {
    808     return;
    809   }
    810 
    811   if (!WI.WidestNativeType) {
    812     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
    813     WI.IsSigned = IsSigned;
    814     return;
    815   }
    816 
    817   // We extend the IV to satisfy the sign of its first user, arbitrarily.
    818   if (WI.IsSigned != IsSigned)
    819     return;
    820 
    821   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
    822     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
    823 }
    824 
    825 namespace {
    826 
    827 /// Record a link in the Narrow IV def-use chain along with the WideIV that
    828 /// computes the same value as the Narrow IV def.  This avoids caching Use*
    829 /// pointers.
    830 struct NarrowIVDefUse {
    831   Instruction *NarrowDef = nullptr;
    832   Instruction *NarrowUse = nullptr;
    833   Instruction *WideDef = nullptr;
    834 
    835   // True if the narrow def is never negative.  Tracking this information lets
    836   // us use a sign extension instead of a zero extension or vice versa, when
    837   // profitable and legal.
    838   bool NeverNegative = false;
    839 
    840   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
    841                  bool NeverNegative)
    842       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
    843         NeverNegative(NeverNegative) {}
    844 };
    845 
    846 /// The goal of this transform is to remove sign and zero extends without
    847 /// creating any new induction variables. To do this, it creates a new phi of
    848 /// the wider type and redirects all users, either removing extends or inserting
    849 /// truncs whenever we stop propagating the type.
    850 ///
    851 class WidenIV {
    852   // Parameters
    853   PHINode *OrigPhi;
    854   Type *WideType;
    855   bool IsSigned;
    856 
    857   // Context
    858   LoopInfo        *LI;
    859   Loop            *L;
    860   ScalarEvolution *SE;
    861   DominatorTree   *DT;
    862 
    863   // Result
    864   PHINode *WidePhi;
    865   Instruction *WideInc;
    866   const SCEV *WideIncExpr;
    867   SmallVectorImpl<WeakVH> &DeadInsts;
    868 
    869   SmallPtrSet<Instruction*,16> Widened;
    870   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
    871 
    872 public:
    873   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo,
    874           ScalarEvolution *SEv, DominatorTree *DTree,
    875           SmallVectorImpl<WeakVH> &DI) :
    876     OrigPhi(WI.NarrowIV),
    877     WideType(WI.WidestNativeType),
    878     IsSigned(WI.IsSigned),
    879     LI(LInfo),
    880     L(LI->getLoopFor(OrigPhi->getParent())),
    881     SE(SEv),
    882     DT(DTree),
    883     WidePhi(nullptr),
    884     WideInc(nullptr),
    885     WideIncExpr(nullptr),
    886     DeadInsts(DI) {
    887     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
    888   }
    889 
    890   PHINode *createWideIV(SCEVExpander &Rewriter);
    891 
    892 protected:
    893   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
    894                           Instruction *Use);
    895 
    896   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
    897   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
    898                                      const SCEVAddRecExpr *WideAR);
    899   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
    900 
    901   const SCEVAddRecExpr *getWideRecurrence(Instruction *NarrowUse);
    902 
    903   const SCEVAddRecExpr* getExtendedOperandRecurrence(NarrowIVDefUse DU);
    904 
    905   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
    906                               unsigned OpCode) const;
    907 
    908   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
    909 
    910   bool widenLoopCompare(NarrowIVDefUse DU);
    911 
    912   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
    913 };
    914 } // anonymous namespace
    915 
    916 /// Perform a quick domtree based check for loop invariance assuming that V is
    917 /// used within the loop. LoopInfo::isLoopInvariant() seems gratuitous for this
    918 /// purpose.
    919 static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
    920   Instruction *Inst = dyn_cast<Instruction>(V);
    921   if (!Inst)
    922     return true;
    923 
    924   return DT->properlyDominates(Inst->getParent(), L->getHeader());
    925 }
    926 
    927 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
    928                                  bool IsSigned, Instruction *Use) {
    929   // Set the debug location and conservative insertion point.
    930   IRBuilder<> Builder(Use);
    931   // Hoist the insertion point into loop preheaders as far as possible.
    932   for (const Loop *L = LI->getLoopFor(Use->getParent());
    933        L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT);
    934        L = L->getParentLoop())
    935     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
    936 
    937   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
    938                     Builder.CreateZExt(NarrowOper, WideType);
    939 }
    940 
    941 /// Instantiate a wide operation to replace a narrow operation. This only needs
    942 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
    943 /// 0 for any operation we decide not to clone.
    944 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
    945                                   const SCEVAddRecExpr *WideAR) {
    946   unsigned Opcode = DU.NarrowUse->getOpcode();
    947   switch (Opcode) {
    948   default:
    949     return nullptr;
    950   case Instruction::Add:
    951   case Instruction::Mul:
    952   case Instruction::UDiv:
    953   case Instruction::Sub:
    954     return cloneArithmeticIVUser(DU, WideAR);
    955 
    956   case Instruction::And:
    957   case Instruction::Or:
    958   case Instruction::Xor:
    959   case Instruction::Shl:
    960   case Instruction::LShr:
    961   case Instruction::AShr:
    962     return cloneBitwiseIVUser(DU);
    963   }
    964 }
    965 
    966 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
    967   Instruction *NarrowUse = DU.NarrowUse;
    968   Instruction *NarrowDef = DU.NarrowDef;
    969   Instruction *WideDef = DU.WideDef;
    970 
    971   DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
    972 
    973   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
    974   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
    975   // invariant and will be folded or hoisted. If it actually comes from a
    976   // widened IV, it should be removed during a future call to widenIVUse.
    977   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
    978                    ? WideDef
    979                    : createExtendInst(NarrowUse->getOperand(0), WideType,
    980                                       IsSigned, NarrowUse);
    981   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
    982                    ? WideDef
    983                    : createExtendInst(NarrowUse->getOperand(1), WideType,
    984                                       IsSigned, NarrowUse);
    985 
    986   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
    987   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
    988                                         NarrowBO->getName());
    989   IRBuilder<> Builder(NarrowUse);
    990   Builder.Insert(WideBO);
    991   WideBO->copyIRFlags(NarrowBO);
    992   return WideBO;
    993 }
    994 
    995 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
    996                                             const SCEVAddRecExpr *WideAR) {
    997   Instruction *NarrowUse = DU.NarrowUse;
    998   Instruction *NarrowDef = DU.NarrowDef;
    999   Instruction *WideDef = DU.WideDef;
   1000 
   1001   DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
   1002 
   1003   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
   1004 
   1005   // We're trying to find X such that
   1006   //
   1007   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
   1008   //
   1009   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
   1010   // and check using SCEV if any of them are correct.
   1011 
   1012   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
   1013   // correct solution to X.
   1014   auto GuessNonIVOperand = [&](bool SignExt) {
   1015     const SCEV *WideLHS;
   1016     const SCEV *WideRHS;
   1017 
   1018     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
   1019       if (SignExt)
   1020         return SE->getSignExtendExpr(S, Ty);
   1021       return SE->getZeroExtendExpr(S, Ty);
   1022     };
   1023 
   1024     if (IVOpIdx == 0) {
   1025       WideLHS = SE->getSCEV(WideDef);
   1026       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
   1027       WideRHS = GetExtend(NarrowRHS, WideType);
   1028     } else {
   1029       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
   1030       WideLHS = GetExtend(NarrowLHS, WideType);
   1031       WideRHS = SE->getSCEV(WideDef);
   1032     }
   1033 
   1034     // WideUse is "WideDef `op.wide` X" as described in the comment.
   1035     const SCEV *WideUse = nullptr;
   1036 
   1037     switch (NarrowUse->getOpcode()) {
   1038     default:
   1039       llvm_unreachable("No other possibility!");
   1040 
   1041     case Instruction::Add:
   1042       WideUse = SE->getAddExpr(WideLHS, WideRHS);
   1043       break;
   1044 
   1045     case Instruction::Mul:
   1046       WideUse = SE->getMulExpr(WideLHS, WideRHS);
   1047       break;
   1048 
   1049     case Instruction::UDiv:
   1050       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
   1051       break;
   1052 
   1053     case Instruction::Sub:
   1054       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
   1055       break;
   1056     }
   1057 
   1058     return WideUse == WideAR;
   1059   };
   1060 
   1061   bool SignExtend = IsSigned;
   1062   if (!GuessNonIVOperand(SignExtend)) {
   1063     SignExtend = !SignExtend;
   1064     if (!GuessNonIVOperand(SignExtend))
   1065       return nullptr;
   1066   }
   1067 
   1068   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
   1069                    ? WideDef
   1070                    : createExtendInst(NarrowUse->getOperand(0), WideType,
   1071                                       SignExtend, NarrowUse);
   1072   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
   1073                    ? WideDef
   1074                    : createExtendInst(NarrowUse->getOperand(1), WideType,
   1075                                       SignExtend, NarrowUse);
   1076 
   1077   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
   1078   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
   1079                                         NarrowBO->getName());
   1080 
   1081   IRBuilder<> Builder(NarrowUse);
   1082   Builder.Insert(WideBO);
   1083   WideBO->copyIRFlags(NarrowBO);
   1084   return WideBO;
   1085 }
   1086 
   1087 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
   1088                                      unsigned OpCode) const {
   1089   if (OpCode == Instruction::Add)
   1090     return SE->getAddExpr(LHS, RHS);
   1091   if (OpCode == Instruction::Sub)
   1092     return SE->getMinusSCEV(LHS, RHS);
   1093   if (OpCode == Instruction::Mul)
   1094     return SE->getMulExpr(LHS, RHS);
   1095 
   1096   llvm_unreachable("Unsupported opcode.");
   1097 }
   1098 
   1099 /// No-wrap operations can transfer sign extension of their result to their
   1100 /// operands. Generate the SCEV value for the widened operation without
   1101 /// actually modifying the IR yet. If the expression after extending the
   1102 /// operands is an AddRec for this loop, return it.
   1103 const SCEVAddRecExpr* WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
   1104 
   1105   // Handle the common case of add<nsw/nuw>
   1106   const unsigned OpCode = DU.NarrowUse->getOpcode();
   1107   // Only Add/Sub/Mul instructions supported yet.
   1108   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
   1109       OpCode != Instruction::Mul)
   1110     return nullptr;
   1111 
   1112   // One operand (NarrowDef) has already been extended to WideDef. Now determine
   1113   // if extending the other will lead to a recurrence.
   1114   const unsigned ExtendOperIdx =
   1115       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
   1116   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
   1117 
   1118   const SCEV *ExtendOperExpr = nullptr;
   1119   const OverflowingBinaryOperator *OBO =
   1120     cast<OverflowingBinaryOperator>(DU.NarrowUse);
   1121   if (IsSigned && OBO->hasNoSignedWrap())
   1122     ExtendOperExpr = SE->getSignExtendExpr(
   1123       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
   1124   else if(!IsSigned && OBO->hasNoUnsignedWrap())
   1125     ExtendOperExpr = SE->getZeroExtendExpr(
   1126       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
   1127   else
   1128     return nullptr;
   1129 
   1130   // When creating this SCEV expr, don't apply the current operations NSW or NUW
   1131   // flags. This instruction may be guarded by control flow that the no-wrap
   1132   // behavior depends on. Non-control-equivalent instructions can be mapped to
   1133   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
   1134   // semantics to those operations.
   1135   const SCEV *lhs = SE->getSCEV(DU.WideDef);
   1136   const SCEV *rhs = ExtendOperExpr;
   1137 
   1138   // Let's swap operands to the initial order for the case of non-commutative
   1139   // operations, like SUB. See PR21014.
   1140   if (ExtendOperIdx == 0)
   1141     std::swap(lhs, rhs);
   1142   const SCEVAddRecExpr *AddRec =
   1143       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
   1144 
   1145   if (!AddRec || AddRec->getLoop() != L)
   1146     return nullptr;
   1147   return AddRec;
   1148 }
   1149 
   1150 /// Is this instruction potentially interesting for further simplification after
   1151 /// widening it's type? In other words, can the extend be safely hoisted out of
   1152 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
   1153 /// so, return the sign or zero extended recurrence. Otherwise return NULL.
   1154 const SCEVAddRecExpr *WidenIV::getWideRecurrence(Instruction *NarrowUse) {
   1155   if (!SE->isSCEVable(NarrowUse->getType()))
   1156     return nullptr;
   1157 
   1158   const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
   1159   if (SE->getTypeSizeInBits(NarrowExpr->getType())
   1160       >= SE->getTypeSizeInBits(WideType)) {
   1161     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
   1162     // index. So don't follow this use.
   1163     return nullptr;
   1164   }
   1165 
   1166   const SCEV *WideExpr = IsSigned ?
   1167     SE->getSignExtendExpr(NarrowExpr, WideType) :
   1168     SE->getZeroExtendExpr(NarrowExpr, WideType);
   1169   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
   1170   if (!AddRec || AddRec->getLoop() != L)
   1171     return nullptr;
   1172   return AddRec;
   1173 }
   1174 
   1175 /// This IV user cannot be widen. Replace this use of the original narrow IV
   1176 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
   1177 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
   1178   DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef
   1179         << " for user " << *DU.NarrowUse << "\n");
   1180   IRBuilder<> Builder(
   1181       getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
   1182   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
   1183   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
   1184 }
   1185 
   1186 /// If the narrow use is a compare instruction, then widen the compare
   1187 //  (and possibly the other operand).  The extend operation is hoisted into the
   1188 // loop preheader as far as possible.
   1189 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
   1190   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
   1191   if (!Cmp)
   1192     return false;
   1193 
   1194   // We can legally widen the comparison in the following two cases:
   1195   //
   1196   //  - The signedness of the IV extension and comparison match
   1197   //
   1198   //  - The narrow IV is always positive (and thus its sign extension is equal
   1199   //    to its zero extension).  For instance, let's say we're zero extending
   1200   //    %narrow for the following use
   1201   //
   1202   //      icmp slt i32 %narrow, %val   ... (A)
   1203   //
   1204   //    and %narrow is always positive.  Then
   1205   //
   1206   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
   1207   //          == icmp slt i32 zext(%narrow), sext(%val)
   1208 
   1209   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
   1210     return false;
   1211 
   1212   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
   1213   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
   1214   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
   1215   assert (CastWidth <= IVWidth && "Unexpected width while widening compare.");
   1216 
   1217   // Widen the compare instruction.
   1218   IRBuilder<> Builder(
   1219       getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
   1220   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
   1221 
   1222   // Widen the other operand of the compare, if necessary.
   1223   if (CastWidth < IVWidth) {
   1224     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
   1225     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
   1226   }
   1227   return true;
   1228 }
   1229 
   1230 /// Determine whether an individual user of the narrow IV can be widened. If so,
   1231 /// return the wide clone of the user.
   1232 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
   1233 
   1234   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
   1235   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
   1236     if (LI->getLoopFor(UsePhi->getParent()) != L) {
   1237       // For LCSSA phis, sink the truncate outside the loop.
   1238       // After SimplifyCFG most loop exit targets have a single predecessor.
   1239       // Otherwise fall back to a truncate within the loop.
   1240       if (UsePhi->getNumOperands() != 1)
   1241         truncateIVUse(DU, DT, LI);
   1242       else {
   1243         PHINode *WidePhi =
   1244           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
   1245                           UsePhi);
   1246         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
   1247         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
   1248         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
   1249         UsePhi->replaceAllUsesWith(Trunc);
   1250         DeadInsts.emplace_back(UsePhi);
   1251         DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi
   1252               << " to " << *WidePhi << "\n");
   1253       }
   1254       return nullptr;
   1255     }
   1256   }
   1257   // Our raison d'etre! Eliminate sign and zero extension.
   1258   if (IsSigned ? isa<SExtInst>(DU.NarrowUse) : isa<ZExtInst>(DU.NarrowUse)) {
   1259     Value *NewDef = DU.WideDef;
   1260     if (DU.NarrowUse->getType() != WideType) {
   1261       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
   1262       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
   1263       if (CastWidth < IVWidth) {
   1264         // The cast isn't as wide as the IV, so insert a Trunc.
   1265         IRBuilder<> Builder(DU.NarrowUse);
   1266         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
   1267       }
   1268       else {
   1269         // A wider extend was hidden behind a narrower one. This may induce
   1270         // another round of IV widening in which the intermediate IV becomes
   1271         // dead. It should be very rare.
   1272         DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
   1273               << " not wide enough to subsume " << *DU.NarrowUse << "\n");
   1274         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
   1275         NewDef = DU.NarrowUse;
   1276       }
   1277     }
   1278     if (NewDef != DU.NarrowUse) {
   1279       DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
   1280             << " replaced by " << *DU.WideDef << "\n");
   1281       ++NumElimExt;
   1282       DU.NarrowUse->replaceAllUsesWith(NewDef);
   1283       DeadInsts.emplace_back(DU.NarrowUse);
   1284     }
   1285     // Now that the extend is gone, we want to expose it's uses for potential
   1286     // further simplification. We don't need to directly inform SimplifyIVUsers
   1287     // of the new users, because their parent IV will be processed later as a
   1288     // new loop phi. If we preserved IVUsers analysis, we would also want to
   1289     // push the uses of WideDef here.
   1290 
   1291     // No further widening is needed. The deceased [sz]ext had done it for us.
   1292     return nullptr;
   1293   }
   1294 
   1295   // Does this user itself evaluate to a recurrence after widening?
   1296   const SCEVAddRecExpr *WideAddRec = getWideRecurrence(DU.NarrowUse);
   1297   if (!WideAddRec)
   1298     WideAddRec = getExtendedOperandRecurrence(DU);
   1299 
   1300   if (!WideAddRec) {
   1301     // If use is a loop condition, try to promote the condition instead of
   1302     // truncating the IV first.
   1303     if (widenLoopCompare(DU))
   1304       return nullptr;
   1305 
   1306     // This user does not evaluate to a recurence after widening, so don't
   1307     // follow it. Instead insert a Trunc to kill off the original use,
   1308     // eventually isolating the original narrow IV so it can be removed.
   1309     truncateIVUse(DU, DT, LI);
   1310     return nullptr;
   1311   }
   1312   // Assume block terminators cannot evaluate to a recurrence. We can't to
   1313   // insert a Trunc after a terminator if there happens to be a critical edge.
   1314   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
   1315          "SCEV is not expected to evaluate a block terminator");
   1316 
   1317   // Reuse the IV increment that SCEVExpander created as long as it dominates
   1318   // NarrowUse.
   1319   Instruction *WideUse = nullptr;
   1320   if (WideAddRec == WideIncExpr
   1321       && Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
   1322     WideUse = WideInc;
   1323   else {
   1324     WideUse = cloneIVUser(DU, WideAddRec);
   1325     if (!WideUse)
   1326       return nullptr;
   1327   }
   1328   // Evaluation of WideAddRec ensured that the narrow expression could be
   1329   // extended outside the loop without overflow. This suggests that the wide use
   1330   // evaluates to the same expression as the extended narrow use, but doesn't
   1331   // absolutely guarantee it. Hence the following failsafe check. In rare cases
   1332   // where it fails, we simply throw away the newly created wide use.
   1333   if (WideAddRec != SE->getSCEV(WideUse)) {
   1334     DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
   1335           << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
   1336     DeadInsts.emplace_back(WideUse);
   1337     return nullptr;
   1338   }
   1339 
   1340   // Returning WideUse pushes it on the worklist.
   1341   return WideUse;
   1342 }
   1343 
   1344 /// Add eligible users of NarrowDef to NarrowIVUsers.
   1345 ///
   1346 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
   1347   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
   1348   bool NeverNegative =
   1349       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
   1350                            SE->getConstant(NarrowSCEV->getType(), 0));
   1351   for (User *U : NarrowDef->users()) {
   1352     Instruction *NarrowUser = cast<Instruction>(U);
   1353 
   1354     // Handle data flow merges and bizarre phi cycles.
   1355     if (!Widened.insert(NarrowUser).second)
   1356       continue;
   1357 
   1358     NarrowIVUsers.push_back(
   1359         NarrowIVDefUse(NarrowDef, NarrowUser, WideDef, NeverNegative));
   1360   }
   1361 }
   1362 
   1363 /// Process a single induction variable. First use the SCEVExpander to create a
   1364 /// wide induction variable that evaluates to the same recurrence as the
   1365 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
   1366 /// def-use chain. After widenIVUse has processed all interesting IV users, the
   1367 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
   1368 ///
   1369 /// It would be simpler to delete uses as they are processed, but we must avoid
   1370 /// invalidating SCEV expressions.
   1371 ///
   1372 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
   1373   // Is this phi an induction variable?
   1374   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
   1375   if (!AddRec)
   1376     return nullptr;
   1377 
   1378   // Widen the induction variable expression.
   1379   const SCEV *WideIVExpr = IsSigned ?
   1380     SE->getSignExtendExpr(AddRec, WideType) :
   1381     SE->getZeroExtendExpr(AddRec, WideType);
   1382 
   1383   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
   1384          "Expect the new IV expression to preserve its type");
   1385 
   1386   // Can the IV be extended outside the loop without overflow?
   1387   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
   1388   if (!AddRec || AddRec->getLoop() != L)
   1389     return nullptr;
   1390 
   1391   // An AddRec must have loop-invariant operands. Since this AddRec is
   1392   // materialized by a loop header phi, the expression cannot have any post-loop
   1393   // operands, so they must dominate the loop header.
   1394   assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
   1395          SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader())
   1396          && "Loop header phi recurrence inputs do not dominate the loop");
   1397 
   1398   // The rewriter provides a value for the desired IV expression. This may
   1399   // either find an existing phi or materialize a new one. Either way, we
   1400   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
   1401   // of the phi-SCC dominates the loop entry.
   1402   Instruction *InsertPt = &L->getHeader()->front();
   1403   WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
   1404 
   1405   // Remembering the WideIV increment generated by SCEVExpander allows
   1406   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
   1407   // employ a general reuse mechanism because the call above is the only call to
   1408   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
   1409   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
   1410     WideInc =
   1411       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
   1412     WideIncExpr = SE->getSCEV(WideInc);
   1413   }
   1414 
   1415   DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
   1416   ++NumWidened;
   1417 
   1418   // Traverse the def-use chain using a worklist starting at the original IV.
   1419   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
   1420 
   1421   Widened.insert(OrigPhi);
   1422   pushNarrowIVUsers(OrigPhi, WidePhi);
   1423 
   1424   while (!NarrowIVUsers.empty()) {
   1425     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
   1426 
   1427     // Process a def-use edge. This may replace the use, so don't hold a
   1428     // use_iterator across it.
   1429     Instruction *WideUse = widenIVUse(DU, Rewriter);
   1430 
   1431     // Follow all def-use edges from the previous narrow use.
   1432     if (WideUse)
   1433       pushNarrowIVUsers(DU.NarrowUse, WideUse);
   1434 
   1435     // widenIVUse may have removed the def-use edge.
   1436     if (DU.NarrowDef->use_empty())
   1437       DeadInsts.emplace_back(DU.NarrowDef);
   1438   }
   1439   return WidePhi;
   1440 }
   1441 
   1442 //===----------------------------------------------------------------------===//
   1443 //  Live IV Reduction - Minimize IVs live across the loop.
   1444 //===----------------------------------------------------------------------===//
   1445 
   1446 
   1447 //===----------------------------------------------------------------------===//
   1448 //  Simplification of IV users based on SCEV evaluation.
   1449 //===----------------------------------------------------------------------===//
   1450 
   1451 namespace {
   1452 class IndVarSimplifyVisitor : public IVVisitor {
   1453   ScalarEvolution *SE;
   1454   const TargetTransformInfo *TTI;
   1455   PHINode *IVPhi;
   1456 
   1457 public:
   1458   WideIVInfo WI;
   1459 
   1460   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
   1461                         const TargetTransformInfo *TTI,
   1462                         const DominatorTree *DTree)
   1463     : SE(SCEV), TTI(TTI), IVPhi(IV) {
   1464     DT = DTree;
   1465     WI.NarrowIV = IVPhi;
   1466     if (ReduceLiveIVs)
   1467       setSplitOverflowIntrinsics();
   1468   }
   1469 
   1470   // Implement the interface used by simplifyUsersOfIV.
   1471   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
   1472 };
   1473 }
   1474 
   1475 /// Iteratively perform simplification on a worklist of IV users. Each
   1476 /// successive simplification may push more users which may themselves be
   1477 /// candidates for simplification.
   1478 ///
   1479 /// Sign/Zero extend elimination is interleaved with IV simplification.
   1480 ///
   1481 void IndVarSimplify::simplifyAndExtend(Loop *L,
   1482                                        SCEVExpander &Rewriter,
   1483                                        LoopInfo *LI) {
   1484   SmallVector<WideIVInfo, 8> WideIVs;
   1485 
   1486   SmallVector<PHINode*, 8> LoopPhis;
   1487   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
   1488     LoopPhis.push_back(cast<PHINode>(I));
   1489   }
   1490   // Each round of simplification iterates through the SimplifyIVUsers worklist
   1491   // for all current phis, then determines whether any IVs can be
   1492   // widened. Widening adds new phis to LoopPhis, inducing another round of
   1493   // simplification on the wide IVs.
   1494   while (!LoopPhis.empty()) {
   1495     // Evaluate as many IV expressions as possible before widening any IVs. This
   1496     // forces SCEV to set no-wrap flags before evaluating sign/zero
   1497     // extension. The first time SCEV attempts to normalize sign/zero extension,
   1498     // the result becomes final. So for the most predictable results, we delay
   1499     // evaluation of sign/zero extend evaluation until needed, and avoid running
   1500     // other SCEV based analysis prior to simplifyAndExtend.
   1501     do {
   1502       PHINode *CurrIV = LoopPhis.pop_back_val();
   1503 
   1504       // Information about sign/zero extensions of CurrIV.
   1505       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
   1506 
   1507       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, &Visitor);
   1508 
   1509       if (Visitor.WI.WidestNativeType) {
   1510         WideIVs.push_back(Visitor.WI);
   1511       }
   1512     } while(!LoopPhis.empty());
   1513 
   1514     for (; !WideIVs.empty(); WideIVs.pop_back()) {
   1515       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts);
   1516       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
   1517         Changed = true;
   1518         LoopPhis.push_back(WidePhi);
   1519       }
   1520     }
   1521   }
   1522 }
   1523 
   1524 //===----------------------------------------------------------------------===//
   1525 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
   1526 //===----------------------------------------------------------------------===//
   1527 
   1528 /// Return true if this loop's backedge taken count expression can be safely and
   1529 /// cheaply expanded into an instruction sequence that can be used by
   1530 /// linearFunctionTestReplace.
   1531 ///
   1532 /// TODO: This fails for pointer-type loop counters with greater than one byte
   1533 /// strides, consequently preventing LFTR from running. For the purpose of LFTR
   1534 /// we could skip this check in the case that the LFTR loop counter (chosen by
   1535 /// FindLoopCounter) is also pointer type. Instead, we could directly convert
   1536 /// the loop test to an inequality test by checking the target data's alignment
   1537 /// of element types (given that the initial pointer value originates from or is
   1538 /// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
   1539 /// However, we don't yet have a strong motivation for converting loop tests
   1540 /// into inequality tests.
   1541 static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE,
   1542                                         SCEVExpander &Rewriter) {
   1543   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
   1544   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
   1545       BackedgeTakenCount->isZero())
   1546     return false;
   1547 
   1548   if (!L->getExitingBlock())
   1549     return false;
   1550 
   1551   // Can't rewrite non-branch yet.
   1552   if (!isa<BranchInst>(L->getExitingBlock()->getTerminator()))
   1553     return false;
   1554 
   1555   if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L))
   1556     return false;
   1557 
   1558   return true;
   1559 }
   1560 
   1561 /// Return the loop header phi IFF IncV adds a loop invariant value to the phi.
   1562 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
   1563   Instruction *IncI = dyn_cast<Instruction>(IncV);
   1564   if (!IncI)
   1565     return nullptr;
   1566 
   1567   switch (IncI->getOpcode()) {
   1568   case Instruction::Add:
   1569   case Instruction::Sub:
   1570     break;
   1571   case Instruction::GetElementPtr:
   1572     // An IV counter must preserve its type.
   1573     if (IncI->getNumOperands() == 2)
   1574       break;
   1575   default:
   1576     return nullptr;
   1577   }
   1578 
   1579   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
   1580   if (Phi && Phi->getParent() == L->getHeader()) {
   1581     if (isLoopInvariant(IncI->getOperand(1), L, DT))
   1582       return Phi;
   1583     return nullptr;
   1584   }
   1585   if (IncI->getOpcode() == Instruction::GetElementPtr)
   1586     return nullptr;
   1587 
   1588   // Allow add/sub to be commuted.
   1589   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
   1590   if (Phi && Phi->getParent() == L->getHeader()) {
   1591     if (isLoopInvariant(IncI->getOperand(0), L, DT))
   1592       return Phi;
   1593   }
   1594   return nullptr;
   1595 }
   1596 
   1597 /// Return the compare guarding the loop latch, or NULL for unrecognized tests.
   1598 static ICmpInst *getLoopTest(Loop *L) {
   1599   assert(L->getExitingBlock() && "expected loop exit");
   1600 
   1601   BasicBlock *LatchBlock = L->getLoopLatch();
   1602   // Don't bother with LFTR if the loop is not properly simplified.
   1603   if (!LatchBlock)
   1604     return nullptr;
   1605 
   1606   BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
   1607   assert(BI && "expected exit branch");
   1608 
   1609   return dyn_cast<ICmpInst>(BI->getCondition());
   1610 }
   1611 
   1612 /// linearFunctionTestReplace policy. Return true unless we can show that the
   1613 /// current exit test is already sufficiently canonical.
   1614 static bool needsLFTR(Loop *L, DominatorTree *DT) {
   1615   // Do LFTR to simplify the exit condition to an ICMP.
   1616   ICmpInst *Cond = getLoopTest(L);
   1617   if (!Cond)
   1618     return true;
   1619 
   1620   // Do LFTR to simplify the exit ICMP to EQ/NE
   1621   ICmpInst::Predicate Pred = Cond->getPredicate();
   1622   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
   1623     return true;
   1624 
   1625   // Look for a loop invariant RHS
   1626   Value *LHS = Cond->getOperand(0);
   1627   Value *RHS = Cond->getOperand(1);
   1628   if (!isLoopInvariant(RHS, L, DT)) {
   1629     if (!isLoopInvariant(LHS, L, DT))
   1630       return true;
   1631     std::swap(LHS, RHS);
   1632   }
   1633   // Look for a simple IV counter LHS
   1634   PHINode *Phi = dyn_cast<PHINode>(LHS);
   1635   if (!Phi)
   1636     Phi = getLoopPhiForCounter(LHS, L, DT);
   1637 
   1638   if (!Phi)
   1639     return true;
   1640 
   1641   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
   1642   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
   1643   if (Idx < 0)
   1644     return true;
   1645 
   1646   // Do LFTR if the exit condition's IV is *not* a simple counter.
   1647   Value *IncV = Phi->getIncomingValue(Idx);
   1648   return Phi != getLoopPhiForCounter(IncV, L, DT);
   1649 }
   1650 
   1651 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
   1652 /// down to checking that all operands are constant and listing instructions
   1653 /// that may hide undef.
   1654 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
   1655                                unsigned Depth) {
   1656   if (isa<Constant>(V))
   1657     return !isa<UndefValue>(V);
   1658 
   1659   if (Depth >= 6)
   1660     return false;
   1661 
   1662   // Conservatively handle non-constant non-instructions. For example, Arguments
   1663   // may be undef.
   1664   Instruction *I = dyn_cast<Instruction>(V);
   1665   if (!I)
   1666     return false;
   1667 
   1668   // Load and return values may be undef.
   1669   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
   1670     return false;
   1671 
   1672   // Optimistically handle other instructions.
   1673   for (Value *Op : I->operands()) {
   1674     if (!Visited.insert(Op).second)
   1675       continue;
   1676     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
   1677       return false;
   1678   }
   1679   return true;
   1680 }
   1681 
   1682 /// Return true if the given value is concrete. We must prove that undef can
   1683 /// never reach it.
   1684 ///
   1685 /// TODO: If we decide that this is a good approach to checking for undef, we
   1686 /// may factor it into a common location.
   1687 static bool hasConcreteDef(Value *V) {
   1688   SmallPtrSet<Value*, 8> Visited;
   1689   Visited.insert(V);
   1690   return hasConcreteDefImpl(V, Visited, 0);
   1691 }
   1692 
   1693 /// Return true if this IV has any uses other than the (soon to be rewritten)
   1694 /// loop exit test.
   1695 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
   1696   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
   1697   Value *IncV = Phi->getIncomingValue(LatchIdx);
   1698 
   1699   for (User *U : Phi->users())
   1700     if (U != Cond && U != IncV) return false;
   1701 
   1702   for (User *U : IncV->users())
   1703     if (U != Cond && U != Phi) return false;
   1704   return true;
   1705 }
   1706 
   1707 /// Find an affine IV in canonical form.
   1708 ///
   1709 /// BECount may be an i8* pointer type. The pointer difference is already
   1710 /// valid count without scaling the address stride, so it remains a pointer
   1711 /// expression as far as SCEV is concerned.
   1712 ///
   1713 /// Currently only valid for LFTR. See the comments on hasConcreteDef below.
   1714 ///
   1715 /// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
   1716 ///
   1717 /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
   1718 /// This is difficult in general for SCEV because of potential overflow. But we
   1719 /// could at least handle constant BECounts.
   1720 static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
   1721                                 ScalarEvolution *SE, DominatorTree *DT) {
   1722   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
   1723 
   1724   Value *Cond =
   1725     cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
   1726 
   1727   // Loop over all of the PHI nodes, looking for a simple counter.
   1728   PHINode *BestPhi = nullptr;
   1729   const SCEV *BestInit = nullptr;
   1730   BasicBlock *LatchBlock = L->getLoopLatch();
   1731   assert(LatchBlock && "needsLFTR should guarantee a loop latch");
   1732 
   1733   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
   1734     PHINode *Phi = cast<PHINode>(I);
   1735     if (!SE->isSCEVable(Phi->getType()))
   1736       continue;
   1737 
   1738     // Avoid comparing an integer IV against a pointer Limit.
   1739     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
   1740       continue;
   1741 
   1742     const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
   1743     if (!AR || AR->getLoop() != L || !AR->isAffine())
   1744       continue;
   1745 
   1746     // AR may be a pointer type, while BECount is an integer type.
   1747     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
   1748     // AR may not be a narrower type, or we may never exit.
   1749     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
   1750     if (PhiWidth < BCWidth ||
   1751         !L->getHeader()->getModule()->getDataLayout().isLegalInteger(PhiWidth))
   1752       continue;
   1753 
   1754     const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
   1755     if (!Step || !Step->isOne())
   1756       continue;
   1757 
   1758     int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
   1759     Value *IncV = Phi->getIncomingValue(LatchIdx);
   1760     if (getLoopPhiForCounter(IncV, L, DT) != Phi)
   1761       continue;
   1762 
   1763     // Avoid reusing a potentially undef value to compute other values that may
   1764     // have originally had a concrete definition.
   1765     if (!hasConcreteDef(Phi)) {
   1766       // We explicitly allow unknown phis as long as they are already used by
   1767       // the loop test. In this case we assume that performing LFTR could not
   1768       // increase the number of undef users.
   1769       if (ICmpInst *Cond = getLoopTest(L)) {
   1770         if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT)
   1771             && Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) {
   1772           continue;
   1773         }
   1774       }
   1775     }
   1776     const SCEV *Init = AR->getStart();
   1777 
   1778     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
   1779       // Don't force a live loop counter if another IV can be used.
   1780       if (AlmostDeadIV(Phi, LatchBlock, Cond))
   1781         continue;
   1782 
   1783       // Prefer to count-from-zero. This is a more "canonical" counter form. It
   1784       // also prefers integer to pointer IVs.
   1785       if (BestInit->isZero() != Init->isZero()) {
   1786         if (BestInit->isZero())
   1787           continue;
   1788       }
   1789       // If two IVs both count from zero or both count from nonzero then the
   1790       // narrower is likely a dead phi that has been widened. Use the wider phi
   1791       // to allow the other to be eliminated.
   1792       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
   1793         continue;
   1794     }
   1795     BestPhi = Phi;
   1796     BestInit = Init;
   1797   }
   1798   return BestPhi;
   1799 }
   1800 
   1801 /// Help linearFunctionTestReplace by generating a value that holds the RHS of
   1802 /// the new loop test.
   1803 static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
   1804                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
   1805   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
   1806   assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
   1807   const SCEV *IVInit = AR->getStart();
   1808 
   1809   // IVInit may be a pointer while IVCount is an integer when FindLoopCounter
   1810   // finds a valid pointer IV. Sign extend BECount in order to materialize a
   1811   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
   1812   // the existing GEPs whenever possible.
   1813   if (IndVar->getType()->isPointerTy()
   1814       && !IVCount->getType()->isPointerTy()) {
   1815 
   1816     // IVOffset will be the new GEP offset that is interpreted by GEP as a
   1817     // signed value. IVCount on the other hand represents the loop trip count,
   1818     // which is an unsigned value. FindLoopCounter only allows induction
   1819     // variables that have a positive unit stride of one. This means we don't
   1820     // have to handle the case of negative offsets (yet) and just need to zero
   1821     // extend IVCount.
   1822     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
   1823     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
   1824 
   1825     // Expand the code for the iteration count.
   1826     assert(SE->isLoopInvariant(IVOffset, L) &&
   1827            "Computed iteration count is not loop invariant!");
   1828     BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
   1829     Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
   1830 
   1831     Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
   1832     assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
   1833     // We could handle pointer IVs other than i8*, but we need to compensate for
   1834     // gep index scaling. See canExpandBackedgeTakenCount comments.
   1835     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
   1836              cast<PointerType>(GEPBase->getType())->getElementType())->isOne()
   1837            && "unit stride pointer IV must be i8*");
   1838 
   1839     IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
   1840     return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit");
   1841   }
   1842   else {
   1843     // In any other case, convert both IVInit and IVCount to integers before
   1844     // comparing. This may result in SCEV expension of pointers, but in practice
   1845     // SCEV will fold the pointer arithmetic away as such:
   1846     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
   1847     //
   1848     // Valid Cases: (1) both integers is most common; (2) both may be pointers
   1849     // for simple memset-style loops.
   1850     //
   1851     // IVInit integer and IVCount pointer would only occur if a canonical IV
   1852     // were generated on top of case #2, which is not expected.
   1853 
   1854     const SCEV *IVLimit = nullptr;
   1855     // For unit stride, IVCount = Start + BECount with 2's complement overflow.
   1856     // For non-zero Start, compute IVCount here.
   1857     if (AR->getStart()->isZero())
   1858       IVLimit = IVCount;
   1859     else {
   1860       assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
   1861       const SCEV *IVInit = AR->getStart();
   1862 
   1863       // For integer IVs, truncate the IV before computing IVInit + BECount.
   1864       if (SE->getTypeSizeInBits(IVInit->getType())
   1865           > SE->getTypeSizeInBits(IVCount->getType()))
   1866         IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
   1867 
   1868       IVLimit = SE->getAddExpr(IVInit, IVCount);
   1869     }
   1870     // Expand the code for the iteration count.
   1871     BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
   1872     IRBuilder<> Builder(BI);
   1873     assert(SE->isLoopInvariant(IVLimit, L) &&
   1874            "Computed iteration count is not loop invariant!");
   1875     // Ensure that we generate the same type as IndVar, or a smaller integer
   1876     // type. In the presence of null pointer values, we have an integer type
   1877     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
   1878     Type *LimitTy = IVCount->getType()->isPointerTy() ?
   1879       IndVar->getType() : IVCount->getType();
   1880     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
   1881   }
   1882 }
   1883 
   1884 /// This method rewrites the exit condition of the loop to be a canonical !=
   1885 /// comparison against the incremented loop induction variable.  This pass is
   1886 /// able to rewrite the exit tests of any loop where the SCEV analysis can
   1887 /// determine a loop-invariant trip count of the loop, which is actually a much
   1888 /// broader range than just linear tests.
   1889 Value *IndVarSimplify::
   1890 linearFunctionTestReplace(Loop *L,
   1891                           const SCEV *BackedgeTakenCount,
   1892                           PHINode *IndVar,
   1893                           SCEVExpander &Rewriter) {
   1894   assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
   1895 
   1896   // Initialize CmpIndVar and IVCount to their preincremented values.
   1897   Value *CmpIndVar = IndVar;
   1898   const SCEV *IVCount = BackedgeTakenCount;
   1899 
   1900   // If the exiting block is the same as the backedge block, we prefer to
   1901   // compare against the post-incremented value, otherwise we must compare
   1902   // against the preincremented value.
   1903   if (L->getExitingBlock() == L->getLoopLatch()) {
   1904     // Add one to the "backedge-taken" count to get the trip count.
   1905     // This addition may overflow, which is valid as long as the comparison is
   1906     // truncated to BackedgeTakenCount->getType().
   1907     IVCount = SE->getAddExpr(BackedgeTakenCount,
   1908                              SE->getOne(BackedgeTakenCount->getType()));
   1909     // The BackedgeTaken expression contains the number of times that the
   1910     // backedge branches to the loop header.  This is one less than the
   1911     // number of times the loop executes, so use the incremented indvar.
   1912     CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
   1913   }
   1914 
   1915   Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
   1916   assert(ExitCnt->getType()->isPointerTy() == IndVar->getType()->isPointerTy()
   1917          && "genLoopLimit missed a cast");
   1918 
   1919   // Insert a new icmp_ne or icmp_eq instruction before the branch.
   1920   BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
   1921   ICmpInst::Predicate P;
   1922   if (L->contains(BI->getSuccessor(0)))
   1923     P = ICmpInst::ICMP_NE;
   1924   else
   1925     P = ICmpInst::ICMP_EQ;
   1926 
   1927   DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
   1928                << "      LHS:" << *CmpIndVar << '\n'
   1929                << "       op:\t"
   1930                << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
   1931                << "      RHS:\t" << *ExitCnt << "\n"
   1932                << "  IVCount:\t" << *IVCount << "\n");
   1933 
   1934   IRBuilder<> Builder(BI);
   1935 
   1936   // LFTR can ignore IV overflow and truncate to the width of
   1937   // BECount. This avoids materializing the add(zext(add)) expression.
   1938   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
   1939   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
   1940   if (CmpIndVarSize > ExitCntSize) {
   1941     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
   1942     const SCEV *ARStart = AR->getStart();
   1943     const SCEV *ARStep = AR->getStepRecurrence(*SE);
   1944     // For constant IVCount, avoid truncation.
   1945     if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
   1946       const APInt &Start = cast<SCEVConstant>(ARStart)->getAPInt();
   1947       APInt Count = cast<SCEVConstant>(IVCount)->getAPInt();
   1948       // Note that the post-inc value of BackedgeTakenCount may have overflowed
   1949       // above such that IVCount is now zero.
   1950       if (IVCount != BackedgeTakenCount && Count == 0) {
   1951         Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize);
   1952         ++Count;
   1953       }
   1954       else
   1955         Count = Count.zext(CmpIndVarSize);
   1956       APInt NewLimit;
   1957       if (cast<SCEVConstant>(ARStep)->getValue()->isNegative())
   1958         NewLimit = Start - Count;
   1959       else
   1960         NewLimit = Start + Count;
   1961       ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit);
   1962 
   1963       DEBUG(dbgs() << "  Widen RHS:\t" << *ExitCnt << "\n");
   1964     } else {
   1965       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
   1966                                       "lftr.wideiv");
   1967     }
   1968   }
   1969   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
   1970   Value *OrigCond = BI->getCondition();
   1971   // It's tempting to use replaceAllUsesWith here to fully replace the old
   1972   // comparison, but that's not immediately safe, since users of the old
   1973   // comparison may not be dominated by the new comparison. Instead, just
   1974   // update the branch to use the new comparison; in the common case this
   1975   // will make old comparison dead.
   1976   BI->setCondition(Cond);
   1977   DeadInsts.push_back(OrigCond);
   1978 
   1979   ++NumLFTR;
   1980   Changed = true;
   1981   return Cond;
   1982 }
   1983 
   1984 //===----------------------------------------------------------------------===//
   1985 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
   1986 //===----------------------------------------------------------------------===//
   1987 
   1988 /// If there's a single exit block, sink any loop-invariant values that
   1989 /// were defined in the preheader but not used inside the loop into the
   1990 /// exit block to reduce register pressure in the loop.
   1991 void IndVarSimplify::sinkUnusedInvariants(Loop *L) {
   1992   BasicBlock *ExitBlock = L->getExitBlock();
   1993   if (!ExitBlock) return;
   1994 
   1995   BasicBlock *Preheader = L->getLoopPreheader();
   1996   if (!Preheader) return;
   1997 
   1998   Instruction *InsertPt = &*ExitBlock->getFirstInsertionPt();
   1999   BasicBlock::iterator I(Preheader->getTerminator());
   2000   while (I != Preheader->begin()) {
   2001     --I;
   2002     // New instructions were inserted at the end of the preheader.
   2003     if (isa<PHINode>(I))
   2004       break;
   2005 
   2006     // Don't move instructions which might have side effects, since the side
   2007     // effects need to complete before instructions inside the loop.  Also don't
   2008     // move instructions which might read memory, since the loop may modify
   2009     // memory. Note that it's okay if the instruction might have undefined
   2010     // behavior: LoopSimplify guarantees that the preheader dominates the exit
   2011     // block.
   2012     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
   2013       continue;
   2014 
   2015     // Skip debug info intrinsics.
   2016     if (isa<DbgInfoIntrinsic>(I))
   2017       continue;
   2018 
   2019     // Skip eh pad instructions.
   2020     if (I->isEHPad())
   2021       continue;
   2022 
   2023     // Don't sink alloca: we never want to sink static alloca's out of the
   2024     // entry block, and correctly sinking dynamic alloca's requires
   2025     // checks for stacksave/stackrestore intrinsics.
   2026     // FIXME: Refactor this check somehow?
   2027     if (isa<AllocaInst>(I))
   2028       continue;
   2029 
   2030     // Determine if there is a use in or before the loop (direct or
   2031     // otherwise).
   2032     bool UsedInLoop = false;
   2033     for (Use &U : I->uses()) {
   2034       Instruction *User = cast<Instruction>(U.getUser());
   2035       BasicBlock *UseBB = User->getParent();
   2036       if (PHINode *P = dyn_cast<PHINode>(User)) {
   2037         unsigned i =
   2038           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
   2039         UseBB = P->getIncomingBlock(i);
   2040       }
   2041       if (UseBB == Preheader || L->contains(UseBB)) {
   2042         UsedInLoop = true;
   2043         break;
   2044       }
   2045     }
   2046 
   2047     // If there is, the def must remain in the preheader.
   2048     if (UsedInLoop)
   2049       continue;
   2050 
   2051     // Otherwise, sink it to the exit block.
   2052     Instruction *ToMove = &*I;
   2053     bool Done = false;
   2054 
   2055     if (I != Preheader->begin()) {
   2056       // Skip debug info intrinsics.
   2057       do {
   2058         --I;
   2059       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
   2060 
   2061       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
   2062         Done = true;
   2063     } else {
   2064       Done = true;
   2065     }
   2066 
   2067     ToMove->moveBefore(InsertPt);
   2068     if (Done) break;
   2069     InsertPt = ToMove;
   2070   }
   2071 }
   2072 
   2073 //===----------------------------------------------------------------------===//
   2074 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
   2075 //===----------------------------------------------------------------------===//
   2076 
   2077 bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   2078   if (skipOptnoneFunction(L))
   2079     return false;
   2080 
   2081   // If LoopSimplify form is not available, stay out of trouble. Some notes:
   2082   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
   2083   //    canonicalization can be a pessimization without LSR to "clean up"
   2084   //    afterwards.
   2085   //  - We depend on having a preheader; in particular,
   2086   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
   2087   //    and we're in trouble if we can't find the induction variable even when
   2088   //    we've manually inserted one.
   2089   if (!L->isLoopSimplifyForm())
   2090     return false;
   2091 
   2092   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   2093   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
   2094   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   2095   auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
   2096   TLI = TLIP ? &TLIP->getTLI() : nullptr;
   2097   auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
   2098   TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
   2099   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
   2100 
   2101   DeadInsts.clear();
   2102   Changed = false;
   2103 
   2104   // If there are any floating-point recurrences, attempt to
   2105   // transform them to use integer recurrences.
   2106   rewriteNonIntegerIVs(L);
   2107 
   2108   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
   2109 
   2110   // Create a rewriter object which we'll use to transform the code with.
   2111   SCEVExpander Rewriter(*SE, DL, "indvars");
   2112 #ifndef NDEBUG
   2113   Rewriter.setDebugType(DEBUG_TYPE);
   2114 #endif
   2115 
   2116   // Eliminate redundant IV users.
   2117   //
   2118   // Simplification works best when run before other consumers of SCEV. We
   2119   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
   2120   // other expressions involving loop IVs have been evaluated. This helps SCEV
   2121   // set no-wrap flags before normalizing sign/zero extension.
   2122   Rewriter.disableCanonicalMode();
   2123   simplifyAndExtend(L, Rewriter, LI);
   2124 
   2125   // Check to see if this loop has a computable loop-invariant execution count.
   2126   // If so, this means that we can compute the final value of any expressions
   2127   // that are recurrent in the loop, and substitute the exit values from the
   2128   // loop into any instructions outside of the loop that use the final values of
   2129   // the current expressions.
   2130   //
   2131   if (ReplaceExitValue != NeverRepl &&
   2132       !isa<SCEVCouldNotCompute>(BackedgeTakenCount))
   2133     rewriteLoopExitValues(L, Rewriter);
   2134 
   2135   // Eliminate redundant IV cycles.
   2136   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
   2137 
   2138   // If we have a trip count expression, rewrite the loop's exit condition
   2139   // using it.  We can currently only handle loops with a single exit.
   2140   if (canExpandBackedgeTakenCount(L, SE, Rewriter) && needsLFTR(L, DT)) {
   2141     PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
   2142     if (IndVar) {
   2143       // Check preconditions for proper SCEVExpander operation. SCEV does not
   2144       // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
   2145       // pass that uses the SCEVExpander must do it. This does not work well for
   2146       // loop passes because SCEVExpander makes assumptions about all loops,
   2147       // while LoopPassManager only forces the current loop to be simplified.
   2148       //
   2149       // FIXME: SCEV expansion has no way to bail out, so the caller must
   2150       // explicitly check any assumptions made by SCEV. Brittle.
   2151       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
   2152       if (!AR || AR->getLoop()->getLoopPreheader())
   2153         (void)linearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
   2154                                         Rewriter);
   2155     }
   2156   }
   2157   // Clear the rewriter cache, because values that are in the rewriter's cache
   2158   // can be deleted in the loop below, causing the AssertingVH in the cache to
   2159   // trigger.
   2160   Rewriter.clear();
   2161 
   2162   // Now that we're done iterating through lists, clean up any instructions
   2163   // which are now dead.
   2164   while (!DeadInsts.empty())
   2165     if (Instruction *Inst =
   2166             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
   2167       RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
   2168 
   2169   // The Rewriter may not be used from this point on.
   2170 
   2171   // Loop-invariant instructions in the preheader that aren't used in the
   2172   // loop may be sunk below the loop to reduce register pressure.
   2173   sinkUnusedInvariants(L);
   2174 
   2175   // Clean up dead instructions.
   2176   Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
   2177 
   2178   // Check a post-condition.
   2179   assert(L->isRecursivelyLCSSAForm(*DT) && "Indvars did not preserve LCSSA!");
   2180 
   2181   // Verify that LFTR, and any other change have not interfered with SCEV's
   2182   // ability to compute trip count.
   2183 #ifndef NDEBUG
   2184   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
   2185     SE->forgetLoop(L);
   2186     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
   2187     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
   2188         SE->getTypeSizeInBits(NewBECount->getType()))
   2189       NewBECount = SE->getTruncateOrNoop(NewBECount,
   2190                                          BackedgeTakenCount->getType());
   2191     else
   2192       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
   2193                                                  NewBECount->getType());
   2194     assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
   2195   }
   2196 #endif
   2197 
   2198   return Changed;
   2199 }
   2200