Home | History | Annotate | Download | only in Scalar
      1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This transformation analyzes and transforms the induction variables (and
     11 // computations derived from them) into simpler forms suitable for subsequent
     12 // analysis and transformation.
     13 //
     14 // If the trip count of a loop is computable, this pass also makes the following
     15 // changes:
     16 //   1. The exit condition for the loop is canonicalized to compare the
     17 //      induction value against the exit value.  This turns loops like:
     18 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
     19 //   2. Any use outside of the loop of an expression derived from the indvar
     20 //      is changed to compute the derived value outside of the loop, eliminating
     21 //      the dependence on the exit value of the induction variable.  If the only
     22 //      purpose of the loop is to compute the exit value of some derived
     23 //      expression, this transformation will make the loop dead.
     24 //
     25 //===----------------------------------------------------------------------===//
     26 
     27 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
     28 #include "llvm/ADT/APFloat.h"
     29 #include "llvm/ADT/APInt.h"
     30 #include "llvm/ADT/ArrayRef.h"
     31 #include "llvm/ADT/DenseMap.h"
     32 #include "llvm/ADT/None.h"
     33 #include "llvm/ADT/Optional.h"
     34 #include "llvm/ADT/STLExtras.h"
     35 #include "llvm/ADT/SmallPtrSet.h"
     36 #include "llvm/ADT/SmallVector.h"
     37 #include "llvm/ADT/Statistic.h"
     38 #include "llvm/ADT/iterator_range.h"
     39 #include "llvm/Analysis/LoopInfo.h"
     40 #include "llvm/Analysis/LoopPass.h"
     41 #include "llvm/Analysis/ScalarEvolution.h"
     42 #include "llvm/Analysis/ScalarEvolutionExpander.h"
     43 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
     44 #include "llvm/Analysis/TargetLibraryInfo.h"
     45 #include "llvm/Analysis/TargetTransformInfo.h"
     46 #include "llvm/Transforms/Utils/Local.h"
     47 #include "llvm/IR/BasicBlock.h"
     48 #include "llvm/IR/Constant.h"
     49 #include "llvm/IR/ConstantRange.h"
     50 #include "llvm/IR/Constants.h"
     51 #include "llvm/IR/DataLayout.h"
     52 #include "llvm/IR/DerivedTypes.h"
     53 #include "llvm/IR/Dominators.h"
     54 #include "llvm/IR/Function.h"
     55 #include "llvm/IR/IRBuilder.h"
     56 #include "llvm/IR/InstrTypes.h"
     57 #include "llvm/IR/Instruction.h"
     58 #include "llvm/IR/Instructions.h"
     59 #include "llvm/IR/IntrinsicInst.h"
     60 #include "llvm/IR/Intrinsics.h"
     61 #include "llvm/IR/Module.h"
     62 #include "llvm/IR/Operator.h"
     63 #include "llvm/IR/PassManager.h"
     64 #include "llvm/IR/PatternMatch.h"
     65 #include "llvm/IR/Type.h"
     66 #include "llvm/IR/Use.h"
     67 #include "llvm/IR/User.h"
     68 #include "llvm/IR/Value.h"
     69 #include "llvm/IR/ValueHandle.h"
     70 #include "llvm/Pass.h"
     71 #include "llvm/Support/Casting.h"
     72 #include "llvm/Support/CommandLine.h"
     73 #include "llvm/Support/Compiler.h"
     74 #include "llvm/Support/Debug.h"
     75 #include "llvm/Support/ErrorHandling.h"
     76 #include "llvm/Support/MathExtras.h"
     77 #include "llvm/Support/raw_ostream.h"
     78 #include "llvm/Transforms/Scalar.h"
     79 #include "llvm/Transforms/Scalar/LoopPassManager.h"
     80 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
     81 #include "llvm/Transforms/Utils/LoopUtils.h"
     82 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
     83 #include <cassert>
     84 #include <cstdint>
     85 #include <utility>
     86 
     87 using namespace llvm;
     88 
     89 #define DEBUG_TYPE "indvars"
     90 
     91 STATISTIC(NumWidened     , "Number of indvars widened");
     92 STATISTIC(NumReplaced    , "Number of exit values replaced");
     93 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
     94 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
     95 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
     96 
     97 // Trip count verification can be enabled by default under NDEBUG if we
     98 // implement a strong expression equivalence checker in SCEV. Until then, we
     99 // use the verify-indvars flag, which may assert in some cases.
    100 static cl::opt<bool> VerifyIndvars(
    101   "verify-indvars", cl::Hidden,
    102   cl::desc("Verify the ScalarEvolution result after running indvars"));
    103 
    104 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, AlwaysRepl };
    105 
    106 static cl::opt<ReplaceExitVal> ReplaceExitValue(
    107     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
    108     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
    109     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
    110                clEnumValN(OnlyCheapRepl, "cheap",
    111                           "only replace exit value when the cost is cheap"),
    112                clEnumValN(AlwaysRepl, "always",
    113                           "always replace exit value whenever possible")));
    114 
    115 static cl::opt<bool> UsePostIncrementRanges(
    116   "indvars-post-increment-ranges", cl::Hidden,
    117   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
    118   cl::init(true));
    119 
    120 static cl::opt<bool>
    121 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
    122             cl::desc("Disable Linear Function Test Replace optimization"));
    123 
    124 namespace {
    125 
    126 struct RewritePhi;
    127 
    128 class IndVarSimplify {
    129   LoopInfo *LI;
    130   ScalarEvolution *SE;
    131   DominatorTree *DT;
    132   const DataLayout &DL;
    133   TargetLibraryInfo *TLI;
    134   const TargetTransformInfo *TTI;
    135 
    136   SmallVector<WeakTrackingVH, 16> DeadInsts;
    137   bool Changed = false;
    138 
    139   bool isValidRewrite(Value *FromVal, Value *ToVal);
    140 
    141   void handleFloatingPointIV(Loop *L, PHINode *PH);
    142   void rewriteNonIntegerIVs(Loop *L);
    143 
    144   void simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
    145 
    146   bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
    147   void rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
    148   void rewriteFirstIterationLoopExitValues(Loop *L);
    149 
    150   Value *linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
    151                                    PHINode *IndVar, SCEVExpander &Rewriter);
    152 
    153   void sinkUnusedInvariants(Loop *L);
    154 
    155   Value *expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
    156                             Instruction *InsertPt, Type *Ty);
    157 
    158 public:
    159   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
    160                  const DataLayout &DL, TargetLibraryInfo *TLI,
    161                  TargetTransformInfo *TTI)
    162       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
    163 
    164   bool run(Loop *L);
    165 };
    166 
    167 } // end anonymous namespace
    168 
    169 /// Return true if the SCEV expansion generated by the rewriter can replace the
    170 /// original value. SCEV guarantees that it produces the same value, but the way
    171 /// it is produced may be illegal IR.  Ideally, this function will only be
    172 /// called for verification.
    173 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
    174   // If an SCEV expression subsumed multiple pointers, its expansion could
    175   // reassociate the GEP changing the base pointer. This is illegal because the
    176   // final address produced by a GEP chain must be inbounds relative to its
    177   // underlying object. Otherwise basic alias analysis, among other things,
    178   // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
    179   // producing an expression involving multiple pointers. Until then, we must
    180   // bail out here.
    181   //
    182   // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
    183   // because it understands lcssa phis while SCEV does not.
    184   Value *FromPtr = FromVal;
    185   Value *ToPtr = ToVal;
    186   if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
    187     FromPtr = GEP->getPointerOperand();
    188   }
    189   if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
    190     ToPtr = GEP->getPointerOperand();
    191   }
    192   if (FromPtr != FromVal || ToPtr != ToVal) {
    193     // Quickly check the common case
    194     if (FromPtr == ToPtr)
    195       return true;
    196 
    197     // SCEV may have rewritten an expression that produces the GEP's pointer
    198     // operand. That's ok as long as the pointer operand has the same base
    199     // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
    200     // base of a recurrence. This handles the case in which SCEV expansion
    201     // converts a pointer type recurrence into a nonrecurrent pointer base
    202     // indexed by an integer recurrence.
    203 
    204     // If the GEP base pointer is a vector of pointers, abort.
    205     if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
    206       return false;
    207 
    208     const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
    209     const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
    210     if (FromBase == ToBase)
    211       return true;
    212 
    213     LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
    214                       << " != " << *ToBase << "\n");
    215 
    216     return false;
    217   }
    218   return true;
    219 }
    220 
    221 /// Determine the insertion point for this user. By default, insert immediately
    222 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
    223 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
    224 /// common dominator for the incoming blocks.
    225 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
    226                                           DominatorTree *DT, LoopInfo *LI) {
    227   PHINode *PHI = dyn_cast<PHINode>(User);
    228   if (!PHI)
    229     return User;
    230 
    231   Instruction *InsertPt = nullptr;
    232   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
    233     if (PHI->getIncomingValue(i) != Def)
    234       continue;
    235 
    236     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
    237     if (!InsertPt) {
    238       InsertPt = InsertBB->getTerminator();
    239       continue;
    240     }
    241     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
    242     InsertPt = InsertBB->getTerminator();
    243   }
    244   assert(InsertPt && "Missing phi operand");
    245 
    246   auto *DefI = dyn_cast<Instruction>(Def);
    247   if (!DefI)
    248     return InsertPt;
    249 
    250   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
    251 
    252   auto *L = LI->getLoopFor(DefI->getParent());
    253   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
    254 
    255   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
    256     if (LI->getLoopFor(DTN->getBlock()) == L)
    257       return DTN->getBlock()->getTerminator();
    258 
    259   llvm_unreachable("DefI dominates InsertPt!");
    260 }
    261 
    262 //===----------------------------------------------------------------------===//
    263 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
    264 //===----------------------------------------------------------------------===//
    265 
    266 /// Convert APF to an integer, if possible.
    267 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
    268   bool isExact = false;
    269   // See if we can convert this to an int64_t
    270   uint64_t UIntVal;
    271   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
    272                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
    273       !isExact)
    274     return false;
    275   IntVal = UIntVal;
    276   return true;
    277 }
    278 
    279 /// If the loop has floating induction variable then insert corresponding
    280 /// integer induction variable if possible.
    281 /// For example,
    282 /// for(double i = 0; i < 10000; ++i)
    283 ///   bar(i)
    284 /// is converted into
    285 /// for(int i = 0; i < 10000; ++i)
    286 ///   bar((double)i);
    287 void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
    288   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
    289   unsigned BackEdge     = IncomingEdge^1;
    290 
    291   // Check incoming value.
    292   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
    293 
    294   int64_t InitValue;
    295   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
    296     return;
    297 
    298   // Check IV increment. Reject this PN if increment operation is not
    299   // an add or increment value can not be represented by an integer.
    300   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
    301   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return;
    302 
    303   // If this is not an add of the PHI with a constantfp, or if the constant fp
    304   // is not an integer, bail out.
    305   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
    306   int64_t IncValue;
    307   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
    308       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
    309     return;
    310 
    311   // Check Incr uses. One user is PN and the other user is an exit condition
    312   // used by the conditional terminator.
    313   Value::user_iterator IncrUse = Incr->user_begin();
    314   Instruction *U1 = cast<Instruction>(*IncrUse++);
    315   if (IncrUse == Incr->user_end()) return;
    316   Instruction *U2 = cast<Instruction>(*IncrUse++);
    317   if (IncrUse != Incr->user_end()) return;
    318 
    319   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
    320   // only used by a branch, we can't transform it.
    321   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
    322   if (!Compare)
    323     Compare = dyn_cast<FCmpInst>(U2);
    324   if (!Compare || !Compare->hasOneUse() ||
    325       !isa<BranchInst>(Compare->user_back()))
    326     return;
    327 
    328   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
    329 
    330   // We need to verify that the branch actually controls the iteration count
    331   // of the loop.  If not, the new IV can overflow and no one will notice.
    332   // The branch block must be in the loop and one of the successors must be out
    333   // of the loop.
    334   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
    335   if (!L->contains(TheBr->getParent()) ||
    336       (L->contains(TheBr->getSuccessor(0)) &&
    337        L->contains(TheBr->getSuccessor(1))))
    338     return;
    339 
    340   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
    341   // transform it.
    342   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
    343   int64_t ExitValue;
    344   if (ExitValueVal == nullptr ||
    345       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
    346     return;
    347 
    348   // Find new predicate for integer comparison.
    349   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
    350   switch (Compare->getPredicate()) {
    351   default: return;  // Unknown comparison.
    352   case CmpInst::FCMP_OEQ:
    353   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
    354   case CmpInst::FCMP_ONE:
    355   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
    356   case CmpInst::FCMP_OGT:
    357   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
    358   case CmpInst::FCMP_OGE:
    359   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
    360   case CmpInst::FCMP_OLT:
    361   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
    362   case CmpInst::FCMP_OLE:
    363   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
    364   }
    365 
    366   // We convert the floating point induction variable to a signed i32 value if
    367   // we can.  This is only safe if the comparison will not overflow in a way
    368   // that won't be trapped by the integer equivalent operations.  Check for this
    369   // now.
    370   // TODO: We could use i64 if it is native and the range requires it.
    371 
    372   // The start/stride/exit values must all fit in signed i32.
    373   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
    374     return;
    375 
    376   // If not actually striding (add x, 0.0), avoid touching the code.
    377   if (IncValue == 0)
    378     return;
    379 
    380   // Positive and negative strides have different safety conditions.
    381   if (IncValue > 0) {
    382     // If we have a positive stride, we require the init to be less than the
    383     // exit value.
    384     if (InitValue >= ExitValue)
    385       return;
    386 
    387     uint32_t Range = uint32_t(ExitValue-InitValue);
    388     // Check for infinite loop, either:
    389     // while (i <= Exit) or until (i > Exit)
    390     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
    391       if (++Range == 0) return;  // Range overflows.
    392     }
    393 
    394     unsigned Leftover = Range % uint32_t(IncValue);
    395 
    396     // If this is an equality comparison, we require that the strided value
    397     // exactly land on the exit value, otherwise the IV condition will wrap
    398     // around and do things the fp IV wouldn't.
    399     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
    400         Leftover != 0)
    401       return;
    402 
    403     // If the stride would wrap around the i32 before exiting, we can't
    404     // transform the IV.
    405     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
    406       return;
    407   } else {
    408     // If we have a negative stride, we require the init to be greater than the
    409     // exit value.
    410     if (InitValue <= ExitValue)
    411       return;
    412 
    413     uint32_t Range = uint32_t(InitValue-ExitValue);
    414     // Check for infinite loop, either:
    415     // while (i >= Exit) or until (i < Exit)
    416     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
    417       if (++Range == 0) return;  // Range overflows.
    418     }
    419 
    420     unsigned Leftover = Range % uint32_t(-IncValue);
    421 
    422     // If this is an equality comparison, we require that the strided value
    423     // exactly land on the exit value, otherwise the IV condition will wrap
    424     // around and do things the fp IV wouldn't.
    425     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
    426         Leftover != 0)
    427       return;
    428 
    429     // If the stride would wrap around the i32 before exiting, we can't
    430     // transform the IV.
    431     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
    432       return;
    433   }
    434 
    435   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
    436 
    437   // Insert new integer induction variable.
    438   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
    439   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
    440                       PN->getIncomingBlock(IncomingEdge));
    441 
    442   Value *NewAdd =
    443     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
    444                               Incr->getName()+".int", Incr);
    445   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
    446 
    447   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
    448                                       ConstantInt::get(Int32Ty, ExitValue),
    449                                       Compare->getName());
    450 
    451   // In the following deletions, PN may become dead and may be deleted.
    452   // Use a WeakTrackingVH to observe whether this happens.
    453   WeakTrackingVH WeakPH = PN;
    454 
    455   // Delete the old floating point exit comparison.  The branch starts using the
    456   // new comparison.
    457   NewCompare->takeName(Compare);
    458   Compare->replaceAllUsesWith(NewCompare);
    459   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
    460 
    461   // Delete the old floating point increment.
    462   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
    463   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
    464 
    465   // If the FP induction variable still has uses, this is because something else
    466   // in the loop uses its value.  In order to canonicalize the induction
    467   // variable, we chose to eliminate the IV and rewrite it in terms of an
    468   // int->fp cast.
    469   //
    470   // We give preference to sitofp over uitofp because it is faster on most
    471   // platforms.
    472   if (WeakPH) {
    473     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
    474                                  &*PN->getParent()->getFirstInsertionPt());
    475     PN->replaceAllUsesWith(Conv);
    476     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
    477   }
    478   Changed = true;
    479 }
    480 
    481 void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
    482   // First step.  Check to see if there are any floating-point recurrences.
    483   // If there are, change them into integer recurrences, permitting analysis by
    484   // the SCEV routines.
    485   BasicBlock *Header = L->getHeader();
    486 
    487   SmallVector<WeakTrackingVH, 8> PHIs;
    488   for (PHINode &PN : Header->phis())
    489     PHIs.push_back(&PN);
    490 
    491   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
    492     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
    493       handleFloatingPointIV(L, PN);
    494 
    495   // If the loop previously had floating-point IV, ScalarEvolution
    496   // may not have been able to compute a trip count. Now that we've done some
    497   // re-writing, the trip count may be computable.
    498   if (Changed)
    499     SE->forgetLoop(L);
    500 }
    501 
    502 namespace {
    503 
    504 // Collect information about PHI nodes which can be transformed in
    505 // rewriteLoopExitValues.
    506 struct RewritePhi {
    507   PHINode *PN;
    508 
    509   // Ith incoming value.
    510   unsigned Ith;
    511 
    512   // Exit value after expansion.
    513   Value *Val;
    514 
    515   // High Cost when expansion.
    516   bool HighCost;
    517 
    518   RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
    519       : PN(P), Ith(I), Val(V), HighCost(H) {}
    520 };
    521 
    522 } // end anonymous namespace
    523 
    524 Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
    525                                           Loop *L, Instruction *InsertPt,
    526                                           Type *ResultTy) {
    527   // Before expanding S into an expensive LLVM expression, see if we can use an
    528   // already existing value as the expansion for S.
    529   if (Value *ExistingValue = Rewriter.getExactExistingExpansion(S, InsertPt, L))
    530     if (ExistingValue->getType() == ResultTy)
    531       return ExistingValue;
    532 
    533   // We didn't find anything, fall back to using SCEVExpander.
    534   return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
    535 }
    536 
    537 //===----------------------------------------------------------------------===//
    538 // rewriteLoopExitValues - Optimize IV users outside the loop.
    539 // As a side effect, reduces the amount of IV processing within the loop.
    540 //===----------------------------------------------------------------------===//
    541 
    542 /// Check to see if this loop has a computable loop-invariant execution count.
    543 /// If so, this means that we can compute the final value of any expressions
    544 /// that are recurrent in the loop, and substitute the exit values from the loop
    545 /// into any instructions outside of the loop that use the final values of the
    546 /// current expressions.
    547 ///
    548 /// This is mostly redundant with the regular IndVarSimplify activities that
    549 /// happen later, except that it's more powerful in some cases, because it's
    550 /// able to brute-force evaluate arbitrary instructions as long as they have
    551 /// constant operands at the beginning of the loop.
    552 void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
    553   // Check a pre-condition.
    554   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
    555          "Indvars did not preserve LCSSA!");
    556 
    557   SmallVector<BasicBlock*, 8> ExitBlocks;
    558   L->getUniqueExitBlocks(ExitBlocks);
    559 
    560   SmallVector<RewritePhi, 8> RewritePhiSet;
    561   // Find all values that are computed inside the loop, but used outside of it.
    562   // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
    563   // the exit blocks of the loop to find them.
    564   for (BasicBlock *ExitBB : ExitBlocks) {
    565     // If there are no PHI nodes in this exit block, then no values defined
    566     // inside the loop are used on this path, skip it.
    567     PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
    568     if (!PN) continue;
    569 
    570     unsigned NumPreds = PN->getNumIncomingValues();
    571 
    572     // Iterate over all of the PHI nodes.
    573     BasicBlock::iterator BBI = ExitBB->begin();
    574     while ((PN = dyn_cast<PHINode>(BBI++))) {
    575       if (PN->use_empty())
    576         continue; // dead use, don't replace it
    577 
    578       if (!SE->isSCEVable(PN->getType()))
    579         continue;
    580 
    581       // It's necessary to tell ScalarEvolution about this explicitly so that
    582       // it can walk the def-use list and forget all SCEVs, as it may not be
    583       // watching the PHI itself. Once the new exit value is in place, there
    584       // may not be a def-use connection between the loop and every instruction
    585       // which got a SCEVAddRecExpr for that loop.
    586       SE->forgetValue(PN);
    587 
    588       // Iterate over all of the values in all the PHI nodes.
    589       for (unsigned i = 0; i != NumPreds; ++i) {
    590         // If the value being merged in is not integer or is not defined
    591         // in the loop, skip it.
    592         Value *InVal = PN->getIncomingValue(i);
    593         if (!isa<Instruction>(InVal))
    594           continue;
    595 
    596         // If this pred is for a subloop, not L itself, skip it.
    597         if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
    598           continue; // The Block is in a subloop, skip it.
    599 
    600         // Check that InVal is defined in the loop.
    601         Instruction *Inst = cast<Instruction>(InVal);
    602         if (!L->contains(Inst))
    603           continue;
    604 
    605         // Okay, this instruction has a user outside of the current loop
    606         // and varies predictably *inside* the loop.  Evaluate the value it
    607         // contains when the loop exits, if possible.
    608         const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
    609         if (!SE->isLoopInvariant(ExitValue, L) ||
    610             !isSafeToExpand(ExitValue, *SE))
    611           continue;
    612 
    613         // Computing the value outside of the loop brings no benefit if :
    614         //  - it is definitely used inside the loop in a way which can not be
    615         //    optimized away.
    616         //  - no use outside of the loop can take advantage of hoisting the
    617         //    computation out of the loop
    618         if (ExitValue->getSCEVType()>=scMulExpr) {
    619           unsigned NumHardInternalUses = 0;
    620           unsigned NumSoftExternalUses = 0;
    621           unsigned NumUses = 0;
    622           for (auto IB = Inst->user_begin(), IE = Inst->user_end();
    623                IB != IE && NumUses <= 6; ++IB) {
    624             Instruction *UseInstr = cast<Instruction>(*IB);
    625             unsigned Opc = UseInstr->getOpcode();
    626             NumUses++;
    627             if (L->contains(UseInstr)) {
    628               if (Opc == Instruction::Call || Opc == Instruction::Ret)
    629                 NumHardInternalUses++;
    630             } else {
    631               if (Opc == Instruction::PHI) {
    632                 // Do not count the Phi as a use. LCSSA may have inserted
    633                 // plenty of trivial ones.
    634                 NumUses--;
    635                 for (auto PB = UseInstr->user_begin(),
    636                           PE = UseInstr->user_end();
    637                      PB != PE && NumUses <= 6; ++PB, ++NumUses) {
    638                   unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode();
    639                   if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret)
    640                     NumSoftExternalUses++;
    641                 }
    642                 continue;
    643               }
    644               if (Opc != Instruction::Call && Opc != Instruction::Ret)
    645                 NumSoftExternalUses++;
    646             }
    647           }
    648           if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses)
    649             continue;
    650         }
    651 
    652         bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
    653         Value *ExitVal =
    654             expandSCEVIfNeeded(Rewriter, ExitValue, L, Inst, PN->getType());
    655 
    656         LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
    657                           << '\n'
    658                           << "  LoopVal = " << *Inst << "\n");
    659 
    660         if (!isValidRewrite(Inst, ExitVal)) {
    661           DeadInsts.push_back(ExitVal);
    662           continue;
    663         }
    664 
    665         // Collect all the candidate PHINodes to be rewritten.
    666         RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
    667       }
    668     }
    669   }
    670 
    671   bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
    672 
    673   // Transformation.
    674   for (const RewritePhi &Phi : RewritePhiSet) {
    675     PHINode *PN = Phi.PN;
    676     Value *ExitVal = Phi.Val;
    677 
    678     // Only do the rewrite when the ExitValue can be expanded cheaply.
    679     // If LoopCanBeDel is true, rewrite exit value aggressively.
    680     if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
    681       DeadInsts.push_back(ExitVal);
    682       continue;
    683     }
    684 
    685     Changed = true;
    686     ++NumReplaced;
    687     Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
    688     PN->setIncomingValue(Phi.Ith, ExitVal);
    689 
    690     // If this instruction is dead now, delete it. Don't do it now to avoid
    691     // invalidating iterators.
    692     if (isInstructionTriviallyDead(Inst, TLI))
    693       DeadInsts.push_back(Inst);
    694 
    695     // Replace PN with ExitVal if that is legal and does not break LCSSA.
    696     if (PN->getNumIncomingValues() == 1 &&
    697         LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
    698       PN->replaceAllUsesWith(ExitVal);
    699       PN->eraseFromParent();
    700     }
    701   }
    702 
    703   // The insertion point instruction may have been deleted; clear it out
    704   // so that the rewriter doesn't trip over it later.
    705   Rewriter.clearInsertPoint();
    706 }
    707 
    708 //===---------------------------------------------------------------------===//
    709 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
    710 // they will exit at the first iteration.
    711 //===---------------------------------------------------------------------===//
    712 
    713 /// Check to see if this loop has loop invariant conditions which lead to loop
    714 /// exits. If so, we know that if the exit path is taken, it is at the first
    715 /// loop iteration. This lets us predict exit values of PHI nodes that live in
    716 /// loop header.
    717 void IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
    718   // Verify the input to the pass is already in LCSSA form.
    719   assert(L->isLCSSAForm(*DT));
    720 
    721   SmallVector<BasicBlock *, 8> ExitBlocks;
    722   L->getUniqueExitBlocks(ExitBlocks);
    723   auto *LoopHeader = L->getHeader();
    724   assert(LoopHeader && "Invalid loop");
    725 
    726   for (auto *ExitBB : ExitBlocks) {
    727     // If there are no more PHI nodes in this exit block, then no more
    728     // values defined inside the loop are used on this path.
    729     for (PHINode &PN : ExitBB->phis()) {
    730       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
    731            IncomingValIdx != E; ++IncomingValIdx) {
    732         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
    733 
    734         // We currently only support loop exits from loop header. If the
    735         // incoming block is not loop header, we need to recursively check
    736         // all conditions starting from loop header are loop invariants.
    737         // Additional support might be added in the future.
    738         if (IncomingBB != LoopHeader)
    739           continue;
    740 
    741         // Get condition that leads to the exit path.
    742         auto *TermInst = IncomingBB->getTerminator();
    743 
    744         Value *Cond = nullptr;
    745         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
    746           // Must be a conditional branch, otherwise the block
    747           // should not be in the loop.
    748           Cond = BI->getCondition();
    749         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
    750           Cond = SI->getCondition();
    751         else
    752           continue;
    753 
    754         if (!L->isLoopInvariant(Cond))
    755           continue;
    756 
    757         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
    758 
    759         // Only deal with PHIs.
    760         if (!ExitVal)
    761           continue;
    762 
    763         // If ExitVal is a PHI on the loop header, then we know its
    764         // value along this exit because the exit can only be taken
    765         // on the first iteration.
    766         auto *LoopPreheader = L->getLoopPreheader();
    767         assert(LoopPreheader && "Invalid loop");
    768         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
    769         if (PreheaderIdx != -1) {
    770           assert(ExitVal->getParent() == LoopHeader &&
    771                  "ExitVal must be in loop header");
    772           PN.setIncomingValue(IncomingValIdx,
    773                               ExitVal->getIncomingValue(PreheaderIdx));
    774         }
    775       }
    776     }
    777   }
    778 }
    779 
    780 /// Check whether it is possible to delete the loop after rewriting exit
    781 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
    782 /// aggressively.
    783 bool IndVarSimplify::canLoopBeDeleted(
    784     Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
    785   BasicBlock *Preheader = L->getLoopPreheader();
    786   // If there is no preheader, the loop will not be deleted.
    787   if (!Preheader)
    788     return false;
    789 
    790   // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
    791   // We obviate multiple ExitingBlocks case for simplicity.
    792   // TODO: If we see testcase with multiple ExitingBlocks can be deleted
    793   // after exit value rewriting, we can enhance the logic here.
    794   SmallVector<BasicBlock *, 4> ExitingBlocks;
    795   L->getExitingBlocks(ExitingBlocks);
    796   SmallVector<BasicBlock *, 8> ExitBlocks;
    797   L->getUniqueExitBlocks(ExitBlocks);
    798   if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1)
    799     return false;
    800 
    801   BasicBlock *ExitBlock = ExitBlocks[0];
    802   BasicBlock::iterator BI = ExitBlock->begin();
    803   while (PHINode *P = dyn_cast<PHINode>(BI)) {
    804     Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
    805 
    806     // If the Incoming value of P is found in RewritePhiSet, we know it
    807     // could be rewritten to use a loop invariant value in transformation
    808     // phase later. Skip it in the loop invariant check below.
    809     bool found = false;
    810     for (const RewritePhi &Phi : RewritePhiSet) {
    811       unsigned i = Phi.Ith;
    812       if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
    813         found = true;
    814         break;
    815       }
    816     }
    817 
    818     Instruction *I;
    819     if (!found && (I = dyn_cast<Instruction>(Incoming)))
    820       if (!L->hasLoopInvariantOperands(I))
    821         return false;
    822 
    823     ++BI;
    824   }
    825 
    826   for (auto *BB : L->blocks())
    827     if (llvm::any_of(*BB, [](Instruction &I) {
    828           return I.mayHaveSideEffects();
    829         }))
    830       return false;
    831 
    832   return true;
    833 }
    834 
    835 //===----------------------------------------------------------------------===//
    836 //  IV Widening - Extend the width of an IV to cover its widest uses.
    837 //===----------------------------------------------------------------------===//
    838 
    839 namespace {
    840 
    841 // Collect information about induction variables that are used by sign/zero
    842 // extend operations. This information is recorded by CollectExtend and provides
    843 // the input to WidenIV.
    844 struct WideIVInfo {
    845   PHINode *NarrowIV = nullptr;
    846 
    847   // Widest integer type created [sz]ext
    848   Type *WidestNativeType = nullptr;
    849 
    850   // Was a sext user seen before a zext?
    851   bool IsSigned = false;
    852 };
    853 
    854 } // end anonymous namespace
    855 
    856 /// Update information about the induction variable that is extended by this
    857 /// sign or zero extend operation. This is used to determine the final width of
    858 /// the IV before actually widening it.
    859 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
    860                         const TargetTransformInfo *TTI) {
    861   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
    862   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
    863     return;
    864 
    865   Type *Ty = Cast->getType();
    866   uint64_t Width = SE->getTypeSizeInBits(Ty);
    867   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
    868     return;
    869 
    870   // Check that `Cast` actually extends the induction variable (we rely on this
    871   // later).  This takes care of cases where `Cast` is extending a truncation of
    872   // the narrow induction variable, and thus can end up being narrower than the
    873   // "narrow" induction variable.
    874   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
    875   if (NarrowIVWidth >= Width)
    876     return;
    877 
    878   // Cast is either an sext or zext up to this point.
    879   // We should not widen an indvar if arithmetics on the wider indvar are more
    880   // expensive than those on the narrower indvar. We check only the cost of ADD
    881   // because at least an ADD is required to increment the induction variable. We
    882   // could compute more comprehensively the cost of all instructions on the
    883   // induction variable when necessary.
    884   if (TTI &&
    885       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
    886           TTI->getArithmeticInstrCost(Instruction::Add,
    887                                       Cast->getOperand(0)->getType())) {
    888     return;
    889   }
    890 
    891   if (!WI.WidestNativeType) {
    892     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
    893     WI.IsSigned = IsSigned;
    894     return;
    895   }
    896 
    897   // We extend the IV to satisfy the sign of its first user, arbitrarily.
    898   if (WI.IsSigned != IsSigned)
    899     return;
    900 
    901   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
    902     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
    903 }
    904 
    905 namespace {
    906 
    907 /// Record a link in the Narrow IV def-use chain along with the WideIV that
    908 /// computes the same value as the Narrow IV def.  This avoids caching Use*
    909 /// pointers.
    910 struct NarrowIVDefUse {
    911   Instruction *NarrowDef = nullptr;
    912   Instruction *NarrowUse = nullptr;
    913   Instruction *WideDef = nullptr;
    914 
    915   // True if the narrow def is never negative.  Tracking this information lets
    916   // us use a sign extension instead of a zero extension or vice versa, when
    917   // profitable and legal.
    918   bool NeverNegative = false;
    919 
    920   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
    921                  bool NeverNegative)
    922       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
    923         NeverNegative(NeverNegative) {}
    924 };
    925 
    926 /// The goal of this transform is to remove sign and zero extends without
    927 /// creating any new induction variables. To do this, it creates a new phi of
    928 /// the wider type and redirects all users, either removing extends or inserting
    929 /// truncs whenever we stop propagating the type.
    930 class WidenIV {
    931   // Parameters
    932   PHINode *OrigPhi;
    933   Type *WideType;
    934 
    935   // Context
    936   LoopInfo        *LI;
    937   Loop            *L;
    938   ScalarEvolution *SE;
    939   DominatorTree   *DT;
    940 
    941   // Does the module have any calls to the llvm.experimental.guard intrinsic
    942   // at all? If not we can avoid scanning instructions looking for guards.
    943   bool HasGuards;
    944 
    945   // Result
    946   PHINode *WidePhi = nullptr;
    947   Instruction *WideInc = nullptr;
    948   const SCEV *WideIncExpr = nullptr;
    949   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
    950 
    951   SmallPtrSet<Instruction *,16> Widened;
    952   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
    953 
    954   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
    955 
    956   // A map tracking the kind of extension used to widen each narrow IV
    957   // and narrow IV user.
    958   // Key: pointer to a narrow IV or IV user.
    959   // Value: the kind of extension used to widen this Instruction.
    960   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
    961 
    962   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
    963 
    964   // A map with control-dependent ranges for post increment IV uses. The key is
    965   // a pair of IV def and a use of this def denoting the context. The value is
    966   // a ConstantRange representing possible values of the def at the given
    967   // context.
    968   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
    969 
    970   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
    971                                               Instruction *UseI) {
    972     DefUserPair Key(Def, UseI);
    973     auto It = PostIncRangeInfos.find(Key);
    974     return It == PostIncRangeInfos.end()
    975                ? Optional<ConstantRange>(None)
    976                : Optional<ConstantRange>(It->second);
    977   }
    978 
    979   void calculatePostIncRanges(PHINode *OrigPhi);
    980   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
    981 
    982   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
    983     DefUserPair Key(Def, UseI);
    984     auto It = PostIncRangeInfos.find(Key);
    985     if (It == PostIncRangeInfos.end())
    986       PostIncRangeInfos.insert({Key, R});
    987     else
    988       It->second = R.intersectWith(It->second);
    989   }
    990 
    991 public:
    992   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
    993           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
    994           bool HasGuards)
    995       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
    996         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
    997         HasGuards(HasGuards), DeadInsts(DI) {
    998     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
    999     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
   1000   }
   1001 
   1002   PHINode *createWideIV(SCEVExpander &Rewriter);
   1003 
   1004 protected:
   1005   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
   1006                           Instruction *Use);
   1007 
   1008   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
   1009   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
   1010                                      const SCEVAddRecExpr *WideAR);
   1011   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
   1012 
   1013   ExtendKind getExtendKind(Instruction *I);
   1014 
   1015   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
   1016 
   1017   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
   1018 
   1019   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
   1020 
   1021   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
   1022                               unsigned OpCode) const;
   1023 
   1024   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
   1025 
   1026   bool widenLoopCompare(NarrowIVDefUse DU);
   1027 
   1028   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
   1029 };
   1030 
   1031 } // end anonymous namespace
   1032 
   1033 /// Perform a quick domtree based check for loop invariance assuming that V is
   1034 /// used within the loop. LoopInfo::isLoopInvariant() seems gratuitous for this
   1035 /// purpose.
   1036 static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
   1037   Instruction *Inst = dyn_cast<Instruction>(V);
   1038   if (!Inst)
   1039     return true;
   1040 
   1041   return DT->properlyDominates(Inst->getParent(), L->getHeader());
   1042 }
   1043 
   1044 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
   1045                                  bool IsSigned, Instruction *Use) {
   1046   // Set the debug location and conservative insertion point.
   1047   IRBuilder<> Builder(Use);
   1048   // Hoist the insertion point into loop preheaders as far as possible.
   1049   for (const Loop *L = LI->getLoopFor(Use->getParent());
   1050        L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT);
   1051        L = L->getParentLoop())
   1052     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
   1053 
   1054   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
   1055                     Builder.CreateZExt(NarrowOper, WideType);
   1056 }
   1057 
   1058 /// Instantiate a wide operation to replace a narrow operation. This only needs
   1059 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
   1060 /// 0 for any operation we decide not to clone.
   1061 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
   1062                                   const SCEVAddRecExpr *WideAR) {
   1063   unsigned Opcode = DU.NarrowUse->getOpcode();
   1064   switch (Opcode) {
   1065   default:
   1066     return nullptr;
   1067   case Instruction::Add:
   1068   case Instruction::Mul:
   1069   case Instruction::UDiv:
   1070   case Instruction::Sub:
   1071     return cloneArithmeticIVUser(DU, WideAR);
   1072 
   1073   case Instruction::And:
   1074   case Instruction::Or:
   1075   case Instruction::Xor:
   1076   case Instruction::Shl:
   1077   case Instruction::LShr:
   1078   case Instruction::AShr:
   1079     return cloneBitwiseIVUser(DU);
   1080   }
   1081 }
   1082 
   1083 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
   1084   Instruction *NarrowUse = DU.NarrowUse;
   1085   Instruction *NarrowDef = DU.NarrowDef;
   1086   Instruction *WideDef = DU.WideDef;
   1087 
   1088   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
   1089 
   1090   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
   1091   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
   1092   // invariant and will be folded or hoisted. If it actually comes from a
   1093   // widened IV, it should be removed during a future call to widenIVUse.
   1094   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
   1095   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
   1096                    ? WideDef
   1097                    : createExtendInst(NarrowUse->getOperand(0), WideType,
   1098                                       IsSigned, NarrowUse);
   1099   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
   1100                    ? WideDef
   1101                    : createExtendInst(NarrowUse->getOperand(1), WideType,
   1102                                       IsSigned, NarrowUse);
   1103 
   1104   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
   1105   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
   1106                                         NarrowBO->getName());
   1107   IRBuilder<> Builder(NarrowUse);
   1108   Builder.Insert(WideBO);
   1109   WideBO->copyIRFlags(NarrowBO);
   1110   return WideBO;
   1111 }
   1112 
   1113 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
   1114                                             const SCEVAddRecExpr *WideAR) {
   1115   Instruction *NarrowUse = DU.NarrowUse;
   1116   Instruction *NarrowDef = DU.NarrowDef;
   1117   Instruction *WideDef = DU.WideDef;
   1118 
   1119   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
   1120 
   1121   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
   1122 
   1123   // We're trying to find X such that
   1124   //
   1125   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
   1126   //
   1127   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
   1128   // and check using SCEV if any of them are correct.
   1129 
   1130   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
   1131   // correct solution to X.
   1132   auto GuessNonIVOperand = [&](bool SignExt) {
   1133     const SCEV *WideLHS;
   1134     const SCEV *WideRHS;
   1135 
   1136     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
   1137       if (SignExt)
   1138         return SE->getSignExtendExpr(S, Ty);
   1139       return SE->getZeroExtendExpr(S, Ty);
   1140     };
   1141 
   1142     if (IVOpIdx == 0) {
   1143       WideLHS = SE->getSCEV(WideDef);
   1144       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
   1145       WideRHS = GetExtend(NarrowRHS, WideType);
   1146     } else {
   1147       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
   1148       WideLHS = GetExtend(NarrowLHS, WideType);
   1149       WideRHS = SE->getSCEV(WideDef);
   1150     }
   1151 
   1152     // WideUse is "WideDef `op.wide` X" as described in the comment.
   1153     const SCEV *WideUse = nullptr;
   1154 
   1155     switch (NarrowUse->getOpcode()) {
   1156     default:
   1157       llvm_unreachable("No other possibility!");
   1158 
   1159     case Instruction::Add:
   1160       WideUse = SE->getAddExpr(WideLHS, WideRHS);
   1161       break;
   1162 
   1163     case Instruction::Mul:
   1164       WideUse = SE->getMulExpr(WideLHS, WideRHS);
   1165       break;
   1166 
   1167     case Instruction::UDiv:
   1168       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
   1169       break;
   1170 
   1171     case Instruction::Sub:
   1172       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
   1173       break;
   1174     }
   1175 
   1176     return WideUse == WideAR;
   1177   };
   1178 
   1179   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
   1180   if (!GuessNonIVOperand(SignExtend)) {
   1181     SignExtend = !SignExtend;
   1182     if (!GuessNonIVOperand(SignExtend))
   1183       return nullptr;
   1184   }
   1185 
   1186   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
   1187                    ? WideDef
   1188                    : createExtendInst(NarrowUse->getOperand(0), WideType,
   1189                                       SignExtend, NarrowUse);
   1190   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
   1191                    ? WideDef
   1192                    : createExtendInst(NarrowUse->getOperand(1), WideType,
   1193                                       SignExtend, NarrowUse);
   1194 
   1195   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
   1196   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
   1197                                         NarrowBO->getName());
   1198 
   1199   IRBuilder<> Builder(NarrowUse);
   1200   Builder.Insert(WideBO);
   1201   WideBO->copyIRFlags(NarrowBO);
   1202   return WideBO;
   1203 }
   1204 
   1205 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
   1206   auto It = ExtendKindMap.find(I);
   1207   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
   1208   return It->second;
   1209 }
   1210 
   1211 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
   1212                                      unsigned OpCode) const {
   1213   if (OpCode == Instruction::Add)
   1214     return SE->getAddExpr(LHS, RHS);
   1215   if (OpCode == Instruction::Sub)
   1216     return SE->getMinusSCEV(LHS, RHS);
   1217   if (OpCode == Instruction::Mul)
   1218     return SE->getMulExpr(LHS, RHS);
   1219 
   1220   llvm_unreachable("Unsupported opcode.");
   1221 }
   1222 
   1223 /// No-wrap operations can transfer sign extension of their result to their
   1224 /// operands. Generate the SCEV value for the widened operation without
   1225 /// actually modifying the IR yet. If the expression after extending the
   1226 /// operands is an AddRec for this loop, return the AddRec and the kind of
   1227 /// extension used.
   1228 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
   1229   // Handle the common case of add<nsw/nuw>
   1230   const unsigned OpCode = DU.NarrowUse->getOpcode();
   1231   // Only Add/Sub/Mul instructions supported yet.
   1232   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
   1233       OpCode != Instruction::Mul)
   1234     return {nullptr, Unknown};
   1235 
   1236   // One operand (NarrowDef) has already been extended to WideDef. Now determine
   1237   // if extending the other will lead to a recurrence.
   1238   const unsigned ExtendOperIdx =
   1239       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
   1240   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
   1241 
   1242   const SCEV *ExtendOperExpr = nullptr;
   1243   const OverflowingBinaryOperator *OBO =
   1244     cast<OverflowingBinaryOperator>(DU.NarrowUse);
   1245   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
   1246   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
   1247     ExtendOperExpr = SE->getSignExtendExpr(
   1248       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
   1249   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
   1250     ExtendOperExpr = SE->getZeroExtendExpr(
   1251       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
   1252   else
   1253     return {nullptr, Unknown};
   1254 
   1255   // When creating this SCEV expr, don't apply the current operations NSW or NUW
   1256   // flags. This instruction may be guarded by control flow that the no-wrap
   1257   // behavior depends on. Non-control-equivalent instructions can be mapped to
   1258   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
   1259   // semantics to those operations.
   1260   const SCEV *lhs = SE->getSCEV(DU.WideDef);
   1261   const SCEV *rhs = ExtendOperExpr;
   1262 
   1263   // Let's swap operands to the initial order for the case of non-commutative
   1264   // operations, like SUB. See PR21014.
   1265   if (ExtendOperIdx == 0)
   1266     std::swap(lhs, rhs);
   1267   const SCEVAddRecExpr *AddRec =
   1268       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
   1269 
   1270   if (!AddRec || AddRec->getLoop() != L)
   1271     return {nullptr, Unknown};
   1272 
   1273   return {AddRec, ExtKind};
   1274 }
   1275 
   1276 /// Is this instruction potentially interesting for further simplification after
   1277 /// widening it's type? In other words, can the extend be safely hoisted out of
   1278 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
   1279 /// so, return the extended recurrence and the kind of extension used. Otherwise
   1280 /// return {nullptr, Unknown}.
   1281 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
   1282   if (!SE->isSCEVable(DU.NarrowUse->getType()))
   1283     return {nullptr, Unknown};
   1284 
   1285   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
   1286   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
   1287       SE->getTypeSizeInBits(WideType)) {
   1288     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
   1289     // index. So don't follow this use.
   1290     return {nullptr, Unknown};
   1291   }
   1292 
   1293   const SCEV *WideExpr;
   1294   ExtendKind ExtKind;
   1295   if (DU.NeverNegative) {
   1296     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
   1297     if (isa<SCEVAddRecExpr>(WideExpr))
   1298       ExtKind = SignExtended;
   1299     else {
   1300       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
   1301       ExtKind = ZeroExtended;
   1302     }
   1303   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
   1304     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
   1305     ExtKind = SignExtended;
   1306   } else {
   1307     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
   1308     ExtKind = ZeroExtended;
   1309   }
   1310   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
   1311   if (!AddRec || AddRec->getLoop() != L)
   1312     return {nullptr, Unknown};
   1313   return {AddRec, ExtKind};
   1314 }
   1315 
   1316 /// This IV user cannot be widen. Replace this use of the original narrow IV
   1317 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
   1318 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
   1319   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
   1320                     << *DU.NarrowUse << "\n");
   1321   IRBuilder<> Builder(
   1322       getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
   1323   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
   1324   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
   1325 }
   1326 
   1327 /// If the narrow use is a compare instruction, then widen the compare
   1328 //  (and possibly the other operand).  The extend operation is hoisted into the
   1329 // loop preheader as far as possible.
   1330 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
   1331   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
   1332   if (!Cmp)
   1333     return false;
   1334 
   1335   // We can legally widen the comparison in the following two cases:
   1336   //
   1337   //  - The signedness of the IV extension and comparison match
   1338   //
   1339   //  - The narrow IV is always positive (and thus its sign extension is equal
   1340   //    to its zero extension).  For instance, let's say we're zero extending
   1341   //    %narrow for the following use
   1342   //
   1343   //      icmp slt i32 %narrow, %val   ... (A)
   1344   //
   1345   //    and %narrow is always positive.  Then
   1346   //
   1347   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
   1348   //          == icmp slt i32 zext(%narrow), sext(%val)
   1349   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
   1350   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
   1351     return false;
   1352 
   1353   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
   1354   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
   1355   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
   1356   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
   1357 
   1358   // Widen the compare instruction.
   1359   IRBuilder<> Builder(
   1360       getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
   1361   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
   1362 
   1363   // Widen the other operand of the compare, if necessary.
   1364   if (CastWidth < IVWidth) {
   1365     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
   1366     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
   1367   }
   1368   return true;
   1369 }
   1370 
   1371 /// Determine whether an individual user of the narrow IV can be widened. If so,
   1372 /// return the wide clone of the user.
   1373 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
   1374   assert(ExtendKindMap.count(DU.NarrowDef) &&
   1375          "Should already know the kind of extension used to widen NarrowDef");
   1376 
   1377   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
   1378   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
   1379     if (LI->getLoopFor(UsePhi->getParent()) != L) {
   1380       // For LCSSA phis, sink the truncate outside the loop.
   1381       // After SimplifyCFG most loop exit targets have a single predecessor.
   1382       // Otherwise fall back to a truncate within the loop.
   1383       if (UsePhi->getNumOperands() != 1)
   1384         truncateIVUse(DU, DT, LI);
   1385       else {
   1386         // Widening the PHI requires us to insert a trunc.  The logical place
   1387         // for this trunc is in the same BB as the PHI.  This is not possible if
   1388         // the BB is terminated by a catchswitch.
   1389         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
   1390           return nullptr;
   1391 
   1392         PHINode *WidePhi =
   1393           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
   1394                           UsePhi);
   1395         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
   1396         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
   1397         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
   1398         UsePhi->replaceAllUsesWith(Trunc);
   1399         DeadInsts.emplace_back(UsePhi);
   1400         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
   1401                           << *WidePhi << "\n");
   1402       }
   1403       return nullptr;
   1404     }
   1405   }
   1406 
   1407   // This narrow use can be widened by a sext if it's non-negative or its narrow
   1408   // def was widended by a sext. Same for zext.
   1409   auto canWidenBySExt = [&]() {
   1410     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
   1411   };
   1412   auto canWidenByZExt = [&]() {
   1413     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
   1414   };
   1415 
   1416   // Our raison d'etre! Eliminate sign and zero extension.
   1417   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
   1418       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
   1419     Value *NewDef = DU.WideDef;
   1420     if (DU.NarrowUse->getType() != WideType) {
   1421       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
   1422       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
   1423       if (CastWidth < IVWidth) {
   1424         // The cast isn't as wide as the IV, so insert a Trunc.
   1425         IRBuilder<> Builder(DU.NarrowUse);
   1426         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
   1427       }
   1428       else {
   1429         // A wider extend was hidden behind a narrower one. This may induce
   1430         // another round of IV widening in which the intermediate IV becomes
   1431         // dead. It should be very rare.
   1432         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
   1433                           << " not wide enough to subsume " << *DU.NarrowUse
   1434                           << "\n");
   1435         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
   1436         NewDef = DU.NarrowUse;
   1437       }
   1438     }
   1439     if (NewDef != DU.NarrowUse) {
   1440       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
   1441                         << " replaced by " << *DU.WideDef << "\n");
   1442       ++NumElimExt;
   1443       DU.NarrowUse->replaceAllUsesWith(NewDef);
   1444       DeadInsts.emplace_back(DU.NarrowUse);
   1445     }
   1446     // Now that the extend is gone, we want to expose it's uses for potential
   1447     // further simplification. We don't need to directly inform SimplifyIVUsers
   1448     // of the new users, because their parent IV will be processed later as a
   1449     // new loop phi. If we preserved IVUsers analysis, we would also want to
   1450     // push the uses of WideDef here.
   1451 
   1452     // No further widening is needed. The deceased [sz]ext had done it for us.
   1453     return nullptr;
   1454   }
   1455 
   1456   // Does this user itself evaluate to a recurrence after widening?
   1457   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
   1458   if (!WideAddRec.first)
   1459     WideAddRec = getWideRecurrence(DU);
   1460 
   1461   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
   1462   if (!WideAddRec.first) {
   1463     // If use is a loop condition, try to promote the condition instead of
   1464     // truncating the IV first.
   1465     if (widenLoopCompare(DU))
   1466       return nullptr;
   1467 
   1468     // This user does not evaluate to a recurrence after widening, so don't
   1469     // follow it. Instead insert a Trunc to kill off the original use,
   1470     // eventually isolating the original narrow IV so it can be removed.
   1471     truncateIVUse(DU, DT, LI);
   1472     return nullptr;
   1473   }
   1474   // Assume block terminators cannot evaluate to a recurrence. We can't to
   1475   // insert a Trunc after a terminator if there happens to be a critical edge.
   1476   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
   1477          "SCEV is not expected to evaluate a block terminator");
   1478 
   1479   // Reuse the IV increment that SCEVExpander created as long as it dominates
   1480   // NarrowUse.
   1481   Instruction *WideUse = nullptr;
   1482   if (WideAddRec.first == WideIncExpr &&
   1483       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
   1484     WideUse = WideInc;
   1485   else {
   1486     WideUse = cloneIVUser(DU, WideAddRec.first);
   1487     if (!WideUse)
   1488       return nullptr;
   1489   }
   1490   // Evaluation of WideAddRec ensured that the narrow expression could be
   1491   // extended outside the loop without overflow. This suggests that the wide use
   1492   // evaluates to the same expression as the extended narrow use, but doesn't
   1493   // absolutely guarantee it. Hence the following failsafe check. In rare cases
   1494   // where it fails, we simply throw away the newly created wide use.
   1495   if (WideAddRec.first != SE->getSCEV(WideUse)) {
   1496     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
   1497                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
   1498                       << "\n");
   1499     DeadInsts.emplace_back(WideUse);
   1500     return nullptr;
   1501   }
   1502 
   1503   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
   1504   // Returning WideUse pushes it on the worklist.
   1505   return WideUse;
   1506 }
   1507 
   1508 /// Add eligible users of NarrowDef to NarrowIVUsers.
   1509 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
   1510   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
   1511   bool NonNegativeDef =
   1512       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
   1513                            SE->getConstant(NarrowSCEV->getType(), 0));
   1514   for (User *U : NarrowDef->users()) {
   1515     Instruction *NarrowUser = cast<Instruction>(U);
   1516 
   1517     // Handle data flow merges and bizarre phi cycles.
   1518     if (!Widened.insert(NarrowUser).second)
   1519       continue;
   1520 
   1521     bool NonNegativeUse = false;
   1522     if (!NonNegativeDef) {
   1523       // We might have a control-dependent range information for this context.
   1524       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
   1525         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
   1526     }
   1527 
   1528     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
   1529                                NonNegativeDef || NonNegativeUse);
   1530   }
   1531 }
   1532 
   1533 /// Process a single induction variable. First use the SCEVExpander to create a
   1534 /// wide induction variable that evaluates to the same recurrence as the
   1535 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
   1536 /// def-use chain. After widenIVUse has processed all interesting IV users, the
   1537 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
   1538 ///
   1539 /// It would be simpler to delete uses as they are processed, but we must avoid
   1540 /// invalidating SCEV expressions.
   1541 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
   1542   // Is this phi an induction variable?
   1543   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
   1544   if (!AddRec)
   1545     return nullptr;
   1546 
   1547   // Widen the induction variable expression.
   1548   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
   1549                                ? SE->getSignExtendExpr(AddRec, WideType)
   1550                                : SE->getZeroExtendExpr(AddRec, WideType);
   1551 
   1552   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
   1553          "Expect the new IV expression to preserve its type");
   1554 
   1555   // Can the IV be extended outside the loop without overflow?
   1556   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
   1557   if (!AddRec || AddRec->getLoop() != L)
   1558     return nullptr;
   1559 
   1560   // An AddRec must have loop-invariant operands. Since this AddRec is
   1561   // materialized by a loop header phi, the expression cannot have any post-loop
   1562   // operands, so they must dominate the loop header.
   1563   assert(
   1564       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
   1565       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
   1566       "Loop header phi recurrence inputs do not dominate the loop");
   1567 
   1568   // Iterate over IV uses (including transitive ones) looking for IV increments
   1569   // of the form 'add nsw %iv, <const>'. For each increment and each use of
   1570   // the increment calculate control-dependent range information basing on
   1571   // dominating conditions inside of the loop (e.g. a range check inside of the
   1572   // loop). Calculated ranges are stored in PostIncRangeInfos map.
   1573   //
   1574   // Control-dependent range information is later used to prove that a narrow
   1575   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
   1576   // this on demand because when pushNarrowIVUsers needs this information some
   1577   // of the dominating conditions might be already widened.
   1578   if (UsePostIncrementRanges)
   1579     calculatePostIncRanges(OrigPhi);
   1580 
   1581   // The rewriter provides a value for the desired IV expression. This may
   1582   // either find an existing phi or materialize a new one. Either way, we
   1583   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
   1584   // of the phi-SCC dominates the loop entry.
   1585   Instruction *InsertPt = &L->getHeader()->front();
   1586   WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
   1587 
   1588   // Remembering the WideIV increment generated by SCEVExpander allows
   1589   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
   1590   // employ a general reuse mechanism because the call above is the only call to
   1591   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
   1592   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
   1593     WideInc =
   1594       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
   1595     WideIncExpr = SE->getSCEV(WideInc);
   1596     // Propagate the debug location associated with the original loop increment
   1597     // to the new (widened) increment.
   1598     auto *OrigInc =
   1599       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
   1600     WideInc->setDebugLoc(OrigInc->getDebugLoc());
   1601   }
   1602 
   1603   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
   1604   ++NumWidened;
   1605 
   1606   // Traverse the def-use chain using a worklist starting at the original IV.
   1607   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
   1608 
   1609   Widened.insert(OrigPhi);
   1610   pushNarrowIVUsers(OrigPhi, WidePhi);
   1611 
   1612   while (!NarrowIVUsers.empty()) {
   1613     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
   1614 
   1615     // Process a def-use edge. This may replace the use, so don't hold a
   1616     // use_iterator across it.
   1617     Instruction *WideUse = widenIVUse(DU, Rewriter);
   1618 
   1619     // Follow all def-use edges from the previous narrow use.
   1620     if (WideUse)
   1621       pushNarrowIVUsers(DU.NarrowUse, WideUse);
   1622 
   1623     // widenIVUse may have removed the def-use edge.
   1624     if (DU.NarrowDef->use_empty())
   1625       DeadInsts.emplace_back(DU.NarrowDef);
   1626   }
   1627 
   1628   // Attach any debug information to the new PHI. Since OrigPhi and WidePHI
   1629   // evaluate the same recurrence, we can just copy the debug info over.
   1630   SmallVector<DbgValueInst *, 1> DbgValues;
   1631   llvm::findDbgValues(DbgValues, OrigPhi);
   1632   auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(),
   1633                                      ValueAsMetadata::get(WidePhi));
   1634   for (auto &DbgValue : DbgValues)
   1635     DbgValue->setOperand(0, MDPhi);
   1636   return WidePhi;
   1637 }
   1638 
   1639 /// Calculates control-dependent range for the given def at the given context
   1640 /// by looking at dominating conditions inside of the loop
   1641 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
   1642                                     Instruction *NarrowUser) {
   1643   using namespace llvm::PatternMatch;
   1644 
   1645   Value *NarrowDefLHS;
   1646   const APInt *NarrowDefRHS;
   1647   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
   1648                                  m_APInt(NarrowDefRHS))) ||
   1649       !NarrowDefRHS->isNonNegative())
   1650     return;
   1651 
   1652   auto UpdateRangeFromCondition = [&] (Value *Condition,
   1653                                        bool TrueDest) {
   1654     CmpInst::Predicate Pred;
   1655     Value *CmpRHS;
   1656     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
   1657                                  m_Value(CmpRHS))))
   1658       return;
   1659 
   1660     CmpInst::Predicate P =
   1661             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
   1662 
   1663     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
   1664     auto CmpConstrainedLHSRange =
   1665             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
   1666     auto NarrowDefRange =
   1667             CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS);
   1668 
   1669     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
   1670   };
   1671 
   1672   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
   1673     if (!HasGuards)
   1674       return;
   1675 
   1676     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
   1677                                      Ctx->getParent()->rend())) {
   1678       Value *C = nullptr;
   1679       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
   1680         UpdateRangeFromCondition(C, /*TrueDest=*/true);
   1681     }
   1682   };
   1683 
   1684   UpdateRangeFromGuards(NarrowUser);
   1685 
   1686   BasicBlock *NarrowUserBB = NarrowUser->getParent();
   1687   // If NarrowUserBB is statically unreachable asking dominator queries may
   1688   // yield surprising results. (e.g. the block may not have a dom tree node)
   1689   if (!DT->isReachableFromEntry(NarrowUserBB))
   1690     return;
   1691 
   1692   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
   1693        L->contains(DTB->getBlock());
   1694        DTB = DTB->getIDom()) {
   1695     auto *BB = DTB->getBlock();
   1696     auto *TI = BB->getTerminator();
   1697     UpdateRangeFromGuards(TI);
   1698 
   1699     auto *BI = dyn_cast<BranchInst>(TI);
   1700     if (!BI || !BI->isConditional())
   1701       continue;
   1702 
   1703     auto *TrueSuccessor = BI->getSuccessor(0);
   1704     auto *FalseSuccessor = BI->getSuccessor(1);
   1705 
   1706     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
   1707       return BBE.isSingleEdge() &&
   1708              DT->dominates(BBE, NarrowUser->getParent());
   1709     };
   1710 
   1711     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
   1712       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
   1713 
   1714     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
   1715       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
   1716   }
   1717 }
   1718 
   1719 /// Calculates PostIncRangeInfos map for the given IV
   1720 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
   1721   SmallPtrSet<Instruction *, 16> Visited;
   1722   SmallVector<Instruction *, 6> Worklist;
   1723   Worklist.push_back(OrigPhi);
   1724   Visited.insert(OrigPhi);
   1725 
   1726   while (!Worklist.empty()) {
   1727     Instruction *NarrowDef = Worklist.pop_back_val();
   1728 
   1729     for (Use &U : NarrowDef->uses()) {
   1730       auto *NarrowUser = cast<Instruction>(U.getUser());
   1731 
   1732       // Don't go looking outside the current loop.
   1733       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
   1734       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
   1735         continue;
   1736 
   1737       if (!Visited.insert(NarrowUser).second)
   1738         continue;
   1739 
   1740       Worklist.push_back(NarrowUser);
   1741 
   1742       calculatePostIncRange(NarrowDef, NarrowUser);
   1743     }
   1744   }
   1745 }
   1746 
   1747 //===----------------------------------------------------------------------===//
   1748 //  Live IV Reduction - Minimize IVs live across the loop.
   1749 //===----------------------------------------------------------------------===//
   1750 
   1751 //===----------------------------------------------------------------------===//
   1752 //  Simplification of IV users based on SCEV evaluation.
   1753 //===----------------------------------------------------------------------===//
   1754 
   1755 namespace {
   1756 
   1757 class IndVarSimplifyVisitor : public IVVisitor {
   1758   ScalarEvolution *SE;
   1759   const TargetTransformInfo *TTI;
   1760   PHINode *IVPhi;
   1761 
   1762 public:
   1763   WideIVInfo WI;
   1764 
   1765   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
   1766                         const TargetTransformInfo *TTI,
   1767                         const DominatorTree *DTree)
   1768     : SE(SCEV), TTI(TTI), IVPhi(IV) {
   1769     DT = DTree;
   1770     WI.NarrowIV = IVPhi;
   1771   }
   1772 
   1773   // Implement the interface used by simplifyUsersOfIV.
   1774   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
   1775 };
   1776 
   1777 } // end anonymous namespace
   1778 
   1779 /// Iteratively perform simplification on a worklist of IV users. Each
   1780 /// successive simplification may push more users which may themselves be
   1781 /// candidates for simplification.
   1782 ///
   1783 /// Sign/Zero extend elimination is interleaved with IV simplification.
   1784 void IndVarSimplify::simplifyAndExtend(Loop *L,
   1785                                        SCEVExpander &Rewriter,
   1786                                        LoopInfo *LI) {
   1787   SmallVector<WideIVInfo, 8> WideIVs;
   1788 
   1789   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
   1790           Intrinsic::getName(Intrinsic::experimental_guard));
   1791   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
   1792 
   1793   SmallVector<PHINode*, 8> LoopPhis;
   1794   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
   1795     LoopPhis.push_back(cast<PHINode>(I));
   1796   }
   1797   // Each round of simplification iterates through the SimplifyIVUsers worklist
   1798   // for all current phis, then determines whether any IVs can be
   1799   // widened. Widening adds new phis to LoopPhis, inducing another round of
   1800   // simplification on the wide IVs.
   1801   while (!LoopPhis.empty()) {
   1802     // Evaluate as many IV expressions as possible before widening any IVs. This
   1803     // forces SCEV to set no-wrap flags before evaluating sign/zero
   1804     // extension. The first time SCEV attempts to normalize sign/zero extension,
   1805     // the result becomes final. So for the most predictable results, we delay
   1806     // evaluation of sign/zero extend evaluation until needed, and avoid running
   1807     // other SCEV based analysis prior to simplifyAndExtend.
   1808     do {
   1809       PHINode *CurrIV = LoopPhis.pop_back_val();
   1810 
   1811       // Information about sign/zero extensions of CurrIV.
   1812       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
   1813 
   1814       Changed |=
   1815           simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
   1816 
   1817       if (Visitor.WI.WidestNativeType) {
   1818         WideIVs.push_back(Visitor.WI);
   1819       }
   1820     } while(!LoopPhis.empty());
   1821 
   1822     for (; !WideIVs.empty(); WideIVs.pop_back()) {
   1823       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
   1824       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
   1825         Changed = true;
   1826         LoopPhis.push_back(WidePhi);
   1827       }
   1828     }
   1829   }
   1830 }
   1831 
   1832 //===----------------------------------------------------------------------===//
   1833 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
   1834 //===----------------------------------------------------------------------===//
   1835 
   1836 /// Return true if this loop's backedge taken count expression can be safely and
   1837 /// cheaply expanded into an instruction sequence that can be used by
   1838 /// linearFunctionTestReplace.
   1839 ///
   1840 /// TODO: This fails for pointer-type loop counters with greater than one byte
   1841 /// strides, consequently preventing LFTR from running. For the purpose of LFTR
   1842 /// we could skip this check in the case that the LFTR loop counter (chosen by
   1843 /// FindLoopCounter) is also pointer type. Instead, we could directly convert
   1844 /// the loop test to an inequality test by checking the target data's alignment
   1845 /// of element types (given that the initial pointer value originates from or is
   1846 /// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
   1847 /// However, we don't yet have a strong motivation for converting loop tests
   1848 /// into inequality tests.
   1849 static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE,
   1850                                         SCEVExpander &Rewriter) {
   1851   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
   1852   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
   1853       BackedgeTakenCount->isZero())
   1854     return false;
   1855 
   1856   if (!L->getExitingBlock())
   1857     return false;
   1858 
   1859   // Can't rewrite non-branch yet.
   1860   if (!isa<BranchInst>(L->getExitingBlock()->getTerminator()))
   1861     return false;
   1862 
   1863   if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L))
   1864     return false;
   1865 
   1866   return true;
   1867 }
   1868 
   1869 /// Return the loop header phi IFF IncV adds a loop invariant value to the phi.
   1870 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
   1871   Instruction *IncI = dyn_cast<Instruction>(IncV);
   1872   if (!IncI)
   1873     return nullptr;
   1874 
   1875   switch (IncI->getOpcode()) {
   1876   case Instruction::Add:
   1877   case Instruction::Sub:
   1878     break;
   1879   case Instruction::GetElementPtr:
   1880     // An IV counter must preserve its type.
   1881     if (IncI->getNumOperands() == 2)
   1882       break;
   1883     LLVM_FALLTHROUGH;
   1884   default:
   1885     return nullptr;
   1886   }
   1887 
   1888   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
   1889   if (Phi && Phi->getParent() == L->getHeader()) {
   1890     if (isLoopInvariant(IncI->getOperand(1), L, DT))
   1891       return Phi;
   1892     return nullptr;
   1893   }
   1894   if (IncI->getOpcode() == Instruction::GetElementPtr)
   1895     return nullptr;
   1896 
   1897   // Allow add/sub to be commuted.
   1898   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
   1899   if (Phi && Phi->getParent() == L->getHeader()) {
   1900     if (isLoopInvariant(IncI->getOperand(0), L, DT))
   1901       return Phi;
   1902   }
   1903   return nullptr;
   1904 }
   1905 
   1906 /// Return the compare guarding the loop latch, or NULL for unrecognized tests.
   1907 static ICmpInst *getLoopTest(Loop *L) {
   1908   assert(L->getExitingBlock() && "expected loop exit");
   1909 
   1910   BasicBlock *LatchBlock = L->getLoopLatch();
   1911   // Don't bother with LFTR if the loop is not properly simplified.
   1912   if (!LatchBlock)
   1913     return nullptr;
   1914 
   1915   BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
   1916   assert(BI && "expected exit branch");
   1917 
   1918   return dyn_cast<ICmpInst>(BI->getCondition());
   1919 }
   1920 
   1921 /// linearFunctionTestReplace policy. Return true unless we can show that the
   1922 /// current exit test is already sufficiently canonical.
   1923 static bool needsLFTR(Loop *L, DominatorTree *DT) {
   1924   // Do LFTR to simplify the exit condition to an ICMP.
   1925   ICmpInst *Cond = getLoopTest(L);
   1926   if (!Cond)
   1927     return true;
   1928 
   1929   // Do LFTR to simplify the exit ICMP to EQ/NE
   1930   ICmpInst::Predicate Pred = Cond->getPredicate();
   1931   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
   1932     return true;
   1933 
   1934   // Look for a loop invariant RHS
   1935   Value *LHS = Cond->getOperand(0);
   1936   Value *RHS = Cond->getOperand(1);
   1937   if (!isLoopInvariant(RHS, L, DT)) {
   1938     if (!isLoopInvariant(LHS, L, DT))
   1939       return true;
   1940     std::swap(LHS, RHS);
   1941   }
   1942   // Look for a simple IV counter LHS
   1943   PHINode *Phi = dyn_cast<PHINode>(LHS);
   1944   if (!Phi)
   1945     Phi = getLoopPhiForCounter(LHS, L, DT);
   1946 
   1947   if (!Phi)
   1948     return true;
   1949 
   1950   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
   1951   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
   1952   if (Idx < 0)
   1953     return true;
   1954 
   1955   // Do LFTR if the exit condition's IV is *not* a simple counter.
   1956   Value *IncV = Phi->getIncomingValue(Idx);
   1957   return Phi != getLoopPhiForCounter(IncV, L, DT);
   1958 }
   1959 
   1960 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
   1961 /// down to checking that all operands are constant and listing instructions
   1962 /// that may hide undef.
   1963 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
   1964                                unsigned Depth) {
   1965   if (isa<Constant>(V))
   1966     return !isa<UndefValue>(V);
   1967 
   1968   if (Depth >= 6)
   1969     return false;
   1970 
   1971   // Conservatively handle non-constant non-instructions. For example, Arguments
   1972   // may be undef.
   1973   Instruction *I = dyn_cast<Instruction>(V);
   1974   if (!I)
   1975     return false;
   1976 
   1977   // Load and return values may be undef.
   1978   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
   1979     return false;
   1980 
   1981   // Optimistically handle other instructions.
   1982   for (Value *Op : I->operands()) {
   1983     if (!Visited.insert(Op).second)
   1984       continue;
   1985     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
   1986       return false;
   1987   }
   1988   return true;
   1989 }
   1990 
   1991 /// Return true if the given value is concrete. We must prove that undef can
   1992 /// never reach it.
   1993 ///
   1994 /// TODO: If we decide that this is a good approach to checking for undef, we
   1995 /// may factor it into a common location.
   1996 static bool hasConcreteDef(Value *V) {
   1997   SmallPtrSet<Value*, 8> Visited;
   1998   Visited.insert(V);
   1999   return hasConcreteDefImpl(V, Visited, 0);
   2000 }
   2001 
   2002 /// Return true if this IV has any uses other than the (soon to be rewritten)
   2003 /// loop exit test.
   2004 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
   2005   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
   2006   Value *IncV = Phi->getIncomingValue(LatchIdx);
   2007 
   2008   for (User *U : Phi->users())
   2009     if (U != Cond && U != IncV) return false;
   2010 
   2011   for (User *U : IncV->users())
   2012     if (U != Cond && U != Phi) return false;
   2013   return true;
   2014 }
   2015 
   2016 /// Find an affine IV in canonical form.
   2017 ///
   2018 /// BECount may be an i8* pointer type. The pointer difference is already
   2019 /// valid count without scaling the address stride, so it remains a pointer
   2020 /// expression as far as SCEV is concerned.
   2021 ///
   2022 /// Currently only valid for LFTR. See the comments on hasConcreteDef below.
   2023 ///
   2024 /// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
   2025 ///
   2026 /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
   2027 /// This is difficult in general for SCEV because of potential overflow. But we
   2028 /// could at least handle constant BECounts.
   2029 static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
   2030                                 ScalarEvolution *SE, DominatorTree *DT) {
   2031   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
   2032 
   2033   Value *Cond =
   2034     cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
   2035 
   2036   // Loop over all of the PHI nodes, looking for a simple counter.
   2037   PHINode *BestPhi = nullptr;
   2038   const SCEV *BestInit = nullptr;
   2039   BasicBlock *LatchBlock = L->getLoopLatch();
   2040   assert(LatchBlock && "needsLFTR should guarantee a loop latch");
   2041   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
   2042 
   2043   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
   2044     PHINode *Phi = cast<PHINode>(I);
   2045     if (!SE->isSCEVable(Phi->getType()))
   2046       continue;
   2047 
   2048     // Avoid comparing an integer IV against a pointer Limit.
   2049     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
   2050       continue;
   2051 
   2052     const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
   2053     if (!AR || AR->getLoop() != L || !AR->isAffine())
   2054       continue;
   2055 
   2056     // AR may be a pointer type, while BECount is an integer type.
   2057     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
   2058     // AR may not be a narrower type, or we may never exit.
   2059     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
   2060     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
   2061       continue;
   2062 
   2063     const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
   2064     if (!Step || !Step->isOne())
   2065       continue;
   2066 
   2067     int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
   2068     Value *IncV = Phi->getIncomingValue(LatchIdx);
   2069     if (getLoopPhiForCounter(IncV, L, DT) != Phi)
   2070       continue;
   2071 
   2072     // Avoid reusing a potentially undef value to compute other values that may
   2073     // have originally had a concrete definition.
   2074     if (!hasConcreteDef(Phi)) {
   2075       // We explicitly allow unknown phis as long as they are already used by
   2076       // the loop test. In this case we assume that performing LFTR could not
   2077       // increase the number of undef users.
   2078       if (ICmpInst *Cond = getLoopTest(L)) {
   2079         if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT) &&
   2080             Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) {
   2081           continue;
   2082         }
   2083       }
   2084     }
   2085     const SCEV *Init = AR->getStart();
   2086 
   2087     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
   2088       // Don't force a live loop counter if another IV can be used.
   2089       if (AlmostDeadIV(Phi, LatchBlock, Cond))
   2090         continue;
   2091 
   2092       // Prefer to count-from-zero. This is a more "canonical" counter form. It
   2093       // also prefers integer to pointer IVs.
   2094       if (BestInit->isZero() != Init->isZero()) {
   2095         if (BestInit->isZero())
   2096           continue;
   2097       }
   2098       // If two IVs both count from zero or both count from nonzero then the
   2099       // narrower is likely a dead phi that has been widened. Use the wider phi
   2100       // to allow the other to be eliminated.
   2101       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
   2102         continue;
   2103     }
   2104     BestPhi = Phi;
   2105     BestInit = Init;
   2106   }
   2107   return BestPhi;
   2108 }
   2109 
   2110 /// Help linearFunctionTestReplace by generating a value that holds the RHS of
   2111 /// the new loop test.
   2112 static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
   2113                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
   2114   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
   2115   assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
   2116   const SCEV *IVInit = AR->getStart();
   2117 
   2118   // IVInit may be a pointer while IVCount is an integer when FindLoopCounter
   2119   // finds a valid pointer IV. Sign extend BECount in order to materialize a
   2120   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
   2121   // the existing GEPs whenever possible.
   2122   if (IndVar->getType()->isPointerTy() && !IVCount->getType()->isPointerTy()) {
   2123     // IVOffset will be the new GEP offset that is interpreted by GEP as a
   2124     // signed value. IVCount on the other hand represents the loop trip count,
   2125     // which is an unsigned value. FindLoopCounter only allows induction
   2126     // variables that have a positive unit stride of one. This means we don't
   2127     // have to handle the case of negative offsets (yet) and just need to zero
   2128     // extend IVCount.
   2129     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
   2130     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
   2131 
   2132     // Expand the code for the iteration count.
   2133     assert(SE->isLoopInvariant(IVOffset, L) &&
   2134            "Computed iteration count is not loop invariant!");
   2135     BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
   2136     Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
   2137 
   2138     Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
   2139     assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
   2140     // We could handle pointer IVs other than i8*, but we need to compensate for
   2141     // gep index scaling. See canExpandBackedgeTakenCount comments.
   2142     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
   2143                              cast<PointerType>(GEPBase->getType())
   2144                                  ->getElementType())->isOne() &&
   2145            "unit stride pointer IV must be i8*");
   2146 
   2147     IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
   2148     return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit");
   2149   } else {
   2150     // In any other case, convert both IVInit and IVCount to integers before
   2151     // comparing. This may result in SCEV expansion of pointers, but in practice
   2152     // SCEV will fold the pointer arithmetic away as such:
   2153     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
   2154     //
   2155     // Valid Cases: (1) both integers is most common; (2) both may be pointers
   2156     // for simple memset-style loops.
   2157     //
   2158     // IVInit integer and IVCount pointer would only occur if a canonical IV
   2159     // were generated on top of case #2, which is not expected.
   2160 
   2161     const SCEV *IVLimit = nullptr;
   2162     // For unit stride, IVCount = Start + BECount with 2's complement overflow.
   2163     // For non-zero Start, compute IVCount here.
   2164     if (AR->getStart()->isZero())
   2165       IVLimit = IVCount;
   2166     else {
   2167       assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
   2168       const SCEV *IVInit = AR->getStart();
   2169 
   2170       // For integer IVs, truncate the IV before computing IVInit + BECount.
   2171       if (SE->getTypeSizeInBits(IVInit->getType())
   2172           > SE->getTypeSizeInBits(IVCount->getType()))
   2173         IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
   2174 
   2175       IVLimit = SE->getAddExpr(IVInit, IVCount);
   2176     }
   2177     // Expand the code for the iteration count.
   2178     BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
   2179     IRBuilder<> Builder(BI);
   2180     assert(SE->isLoopInvariant(IVLimit, L) &&
   2181            "Computed iteration count is not loop invariant!");
   2182     // Ensure that we generate the same type as IndVar, or a smaller integer
   2183     // type. In the presence of null pointer values, we have an integer type
   2184     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
   2185     Type *LimitTy = IVCount->getType()->isPointerTy() ?
   2186       IndVar->getType() : IVCount->getType();
   2187     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
   2188   }
   2189 }
   2190 
   2191 /// This method rewrites the exit condition of the loop to be a canonical !=
   2192 /// comparison against the incremented loop induction variable.  This pass is
   2193 /// able to rewrite the exit tests of any loop where the SCEV analysis can
   2194 /// determine a loop-invariant trip count of the loop, which is actually a much
   2195 /// broader range than just linear tests.
   2196 Value *IndVarSimplify::
   2197 linearFunctionTestReplace(Loop *L,
   2198                           const SCEV *BackedgeTakenCount,
   2199                           PHINode *IndVar,
   2200                           SCEVExpander &Rewriter) {
   2201   assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
   2202 
   2203   // Initialize CmpIndVar and IVCount to their preincremented values.
   2204   Value *CmpIndVar = IndVar;
   2205   const SCEV *IVCount = BackedgeTakenCount;
   2206 
   2207   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
   2208 
   2209   // If the exiting block is the same as the backedge block, we prefer to
   2210   // compare against the post-incremented value, otherwise we must compare
   2211   // against the preincremented value.
   2212   if (L->getExitingBlock() == L->getLoopLatch()) {
   2213     // Add one to the "backedge-taken" count to get the trip count.
   2214     // This addition may overflow, which is valid as long as the comparison is
   2215     // truncated to BackedgeTakenCount->getType().
   2216     IVCount = SE->getAddExpr(BackedgeTakenCount,
   2217                              SE->getOne(BackedgeTakenCount->getType()));
   2218     // The BackedgeTaken expression contains the number of times that the
   2219     // backedge branches to the loop header.  This is one less than the
   2220     // number of times the loop executes, so use the incremented indvar.
   2221     CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
   2222   }
   2223 
   2224   Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
   2225   assert(ExitCnt->getType()->isPointerTy() ==
   2226              IndVar->getType()->isPointerTy() &&
   2227          "genLoopLimit missed a cast");
   2228 
   2229   // Insert a new icmp_ne or icmp_eq instruction before the branch.
   2230   BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
   2231   ICmpInst::Predicate P;
   2232   if (L->contains(BI->getSuccessor(0)))
   2233     P = ICmpInst::ICMP_NE;
   2234   else
   2235     P = ICmpInst::ICMP_EQ;
   2236 
   2237   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
   2238                     << "      LHS:" << *CmpIndVar << '\n'
   2239                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
   2240                     << "\n"
   2241                     << "      RHS:\t" << *ExitCnt << "\n"
   2242                     << "  IVCount:\t" << *IVCount << "\n");
   2243 
   2244   IRBuilder<> Builder(BI);
   2245 
   2246   // The new loop exit condition should reuse the debug location of the
   2247   // original loop exit condition.
   2248   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
   2249     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
   2250 
   2251   // LFTR can ignore IV overflow and truncate to the width of
   2252   // BECount. This avoids materializing the add(zext(add)) expression.
   2253   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
   2254   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
   2255   if (CmpIndVarSize > ExitCntSize) {
   2256     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
   2257     const SCEV *ARStart = AR->getStart();
   2258     const SCEV *ARStep = AR->getStepRecurrence(*SE);
   2259     // For constant IVCount, avoid truncation.
   2260     if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
   2261       const APInt &Start = cast<SCEVConstant>(ARStart)->getAPInt();
   2262       APInt Count = cast<SCEVConstant>(IVCount)->getAPInt();
   2263       // Note that the post-inc value of BackedgeTakenCount may have overflowed
   2264       // above such that IVCount is now zero.
   2265       if (IVCount != BackedgeTakenCount && Count == 0) {
   2266         Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize);
   2267         ++Count;
   2268       }
   2269       else
   2270         Count = Count.zext(CmpIndVarSize);
   2271       APInt NewLimit;
   2272       if (cast<SCEVConstant>(ARStep)->getValue()->isNegative())
   2273         NewLimit = Start - Count;
   2274       else
   2275         NewLimit = Start + Count;
   2276       ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit);
   2277 
   2278       LLVM_DEBUG(dbgs() << "  Widen RHS:\t" << *ExitCnt << "\n");
   2279     } else {
   2280       // We try to extend trip count first. If that doesn't work we truncate IV.
   2281       // Zext(trunc(IV)) == IV implies equivalence of the following two:
   2282       // Trunc(IV) == ExitCnt and IV == zext(ExitCnt). Similarly for sext. If
   2283       // one of the two holds, extend the trip count, otherwise we truncate IV.
   2284       bool Extended = false;
   2285       const SCEV *IV = SE->getSCEV(CmpIndVar);
   2286       const SCEV *ZExtTrunc =
   2287            SE->getZeroExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
   2288                                                      ExitCnt->getType()),
   2289                                  CmpIndVar->getType());
   2290 
   2291       if (ZExtTrunc == IV) {
   2292         Extended = true;
   2293         ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
   2294                                      "wide.trip.count");
   2295       } else {
   2296         const SCEV *SExtTrunc =
   2297           SE->getSignExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
   2298                                                     ExitCnt->getType()),
   2299                                 CmpIndVar->getType());
   2300         if (SExtTrunc == IV) {
   2301           Extended = true;
   2302           ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
   2303                                        "wide.trip.count");
   2304         }
   2305       }
   2306 
   2307       if (!Extended)
   2308         CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
   2309                                         "lftr.wideiv");
   2310     }
   2311   }
   2312   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
   2313   Value *OrigCond = BI->getCondition();
   2314   // It's tempting to use replaceAllUsesWith here to fully replace the old
   2315   // comparison, but that's not immediately safe, since users of the old
   2316   // comparison may not be dominated by the new comparison. Instead, just
   2317   // update the branch to use the new comparison; in the common case this
   2318   // will make old comparison dead.
   2319   BI->setCondition(Cond);
   2320   DeadInsts.push_back(OrigCond);
   2321 
   2322   ++NumLFTR;
   2323   Changed = true;
   2324   return Cond;
   2325 }
   2326 
   2327 //===----------------------------------------------------------------------===//
   2328 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
   2329 //===----------------------------------------------------------------------===//
   2330 
   2331 /// If there's a single exit block, sink any loop-invariant values that
   2332 /// were defined in the preheader but not used inside the loop into the
   2333 /// exit block to reduce register pressure in the loop.
   2334 void IndVarSimplify::sinkUnusedInvariants(Loop *L) {
   2335   BasicBlock *ExitBlock = L->getExitBlock();
   2336   if (!ExitBlock) return;
   2337 
   2338   BasicBlock *Preheader = L->getLoopPreheader();
   2339   if (!Preheader) return;
   2340 
   2341   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
   2342   BasicBlock::iterator I(Preheader->getTerminator());
   2343   while (I != Preheader->begin()) {
   2344     --I;
   2345     // New instructions were inserted at the end of the preheader.
   2346     if (isa<PHINode>(I))
   2347       break;
   2348 
   2349     // Don't move instructions which might have side effects, since the side
   2350     // effects need to complete before instructions inside the loop.  Also don't
   2351     // move instructions which might read memory, since the loop may modify
   2352     // memory. Note that it's okay if the instruction might have undefined
   2353     // behavior: LoopSimplify guarantees that the preheader dominates the exit
   2354     // block.
   2355     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
   2356       continue;
   2357 
   2358     // Skip debug info intrinsics.
   2359     if (isa<DbgInfoIntrinsic>(I))
   2360       continue;
   2361 
   2362     // Skip eh pad instructions.
   2363     if (I->isEHPad())
   2364       continue;
   2365 
   2366     // Don't sink alloca: we never want to sink static alloca's out of the
   2367     // entry block, and correctly sinking dynamic alloca's requires
   2368     // checks for stacksave/stackrestore intrinsics.
   2369     // FIXME: Refactor this check somehow?
   2370     if (isa<AllocaInst>(I))
   2371       continue;
   2372 
   2373     // Determine if there is a use in or before the loop (direct or
   2374     // otherwise).
   2375     bool UsedInLoop = false;
   2376     for (Use &U : I->uses()) {
   2377       Instruction *User = cast<Instruction>(U.getUser());
   2378       BasicBlock *UseBB = User->getParent();
   2379       if (PHINode *P = dyn_cast<PHINode>(User)) {
   2380         unsigned i =
   2381           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
   2382         UseBB = P->getIncomingBlock(i);
   2383       }
   2384       if (UseBB == Preheader || L->contains(UseBB)) {
   2385         UsedInLoop = true;
   2386         break;
   2387       }
   2388     }
   2389 
   2390     // If there is, the def must remain in the preheader.
   2391     if (UsedInLoop)
   2392       continue;
   2393 
   2394     // Otherwise, sink it to the exit block.
   2395     Instruction *ToMove = &*I;
   2396     bool Done = false;
   2397 
   2398     if (I != Preheader->begin()) {
   2399       // Skip debug info intrinsics.
   2400       do {
   2401         --I;
   2402       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
   2403 
   2404       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
   2405         Done = true;
   2406     } else {
   2407       Done = true;
   2408     }
   2409 
   2410     ToMove->moveBefore(*ExitBlock, InsertPt);
   2411     if (Done) break;
   2412     InsertPt = ToMove->getIterator();
   2413   }
   2414 }
   2415 
   2416 //===----------------------------------------------------------------------===//
   2417 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
   2418 //===----------------------------------------------------------------------===//
   2419 
   2420 bool IndVarSimplify::run(Loop *L) {
   2421   // We need (and expect!) the incoming loop to be in LCSSA.
   2422   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
   2423          "LCSSA required to run indvars!");
   2424 
   2425   // If LoopSimplify form is not available, stay out of trouble. Some notes:
   2426   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
   2427   //    canonicalization can be a pessimization without LSR to "clean up"
   2428   //    afterwards.
   2429   //  - We depend on having a preheader; in particular,
   2430   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
   2431   //    and we're in trouble if we can't find the induction variable even when
   2432   //    we've manually inserted one.
   2433   //  - LFTR relies on having a single backedge.
   2434   if (!L->isLoopSimplifyForm())
   2435     return false;
   2436 
   2437   // If there are any floating-point recurrences, attempt to
   2438   // transform them to use integer recurrences.
   2439   rewriteNonIntegerIVs(L);
   2440 
   2441   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
   2442 
   2443   // Create a rewriter object which we'll use to transform the code with.
   2444   SCEVExpander Rewriter(*SE, DL, "indvars");
   2445 #ifndef NDEBUG
   2446   Rewriter.setDebugType(DEBUG_TYPE);
   2447 #endif
   2448 
   2449   // Eliminate redundant IV users.
   2450   //
   2451   // Simplification works best when run before other consumers of SCEV. We
   2452   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
   2453   // other expressions involving loop IVs have been evaluated. This helps SCEV
   2454   // set no-wrap flags before normalizing sign/zero extension.
   2455   Rewriter.disableCanonicalMode();
   2456   simplifyAndExtend(L, Rewriter, LI);
   2457 
   2458   // Check to see if this loop has a computable loop-invariant execution count.
   2459   // If so, this means that we can compute the final value of any expressions
   2460   // that are recurrent in the loop, and substitute the exit values from the
   2461   // loop into any instructions outside of the loop that use the final values of
   2462   // the current expressions.
   2463   //
   2464   if (ReplaceExitValue != NeverRepl &&
   2465       !isa<SCEVCouldNotCompute>(BackedgeTakenCount))
   2466     rewriteLoopExitValues(L, Rewriter);
   2467 
   2468   // Eliminate redundant IV cycles.
   2469   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
   2470 
   2471   // If we have a trip count expression, rewrite the loop's exit condition
   2472   // using it.  We can currently only handle loops with a single exit.
   2473   if (!DisableLFTR && canExpandBackedgeTakenCount(L, SE, Rewriter) &&
   2474       needsLFTR(L, DT)) {
   2475     PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
   2476     if (IndVar) {
   2477       // Check preconditions for proper SCEVExpander operation. SCEV does not
   2478       // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
   2479       // pass that uses the SCEVExpander must do it. This does not work well for
   2480       // loop passes because SCEVExpander makes assumptions about all loops,
   2481       // while LoopPassManager only forces the current loop to be simplified.
   2482       //
   2483       // FIXME: SCEV expansion has no way to bail out, so the caller must
   2484       // explicitly check any assumptions made by SCEV. Brittle.
   2485       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
   2486       if (!AR || AR->getLoop()->getLoopPreheader())
   2487         (void)linearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
   2488                                         Rewriter);
   2489     }
   2490   }
   2491   // Clear the rewriter cache, because values that are in the rewriter's cache
   2492   // can be deleted in the loop below, causing the AssertingVH in the cache to
   2493   // trigger.
   2494   Rewriter.clear();
   2495 
   2496   // Now that we're done iterating through lists, clean up any instructions
   2497   // which are now dead.
   2498   while (!DeadInsts.empty())
   2499     if (Instruction *Inst =
   2500             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
   2501       RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
   2502 
   2503   // The Rewriter may not be used from this point on.
   2504 
   2505   // Loop-invariant instructions in the preheader that aren't used in the
   2506   // loop may be sunk below the loop to reduce register pressure.
   2507   sinkUnusedInvariants(L);
   2508 
   2509   // rewriteFirstIterationLoopExitValues does not rely on the computation of
   2510   // trip count and therefore can further simplify exit values in addition to
   2511   // rewriteLoopExitValues.
   2512   rewriteFirstIterationLoopExitValues(L);
   2513 
   2514   // Clean up dead instructions.
   2515   Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
   2516 
   2517   // Check a post-condition.
   2518   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
   2519          "Indvars did not preserve LCSSA!");
   2520 
   2521   // Verify that LFTR, and any other change have not interfered with SCEV's
   2522   // ability to compute trip count.
   2523 #ifndef NDEBUG
   2524   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
   2525     SE->forgetLoop(L);
   2526     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
   2527     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
   2528         SE->getTypeSizeInBits(NewBECount->getType()))
   2529       NewBECount = SE->getTruncateOrNoop(NewBECount,
   2530                                          BackedgeTakenCount->getType());
   2531     else
   2532       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
   2533                                                  NewBECount->getType());
   2534     assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
   2535   }
   2536 #endif
   2537 
   2538   return Changed;
   2539 }
   2540 
   2541 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
   2542                                           LoopStandardAnalysisResults &AR,
   2543                                           LPMUpdater &) {
   2544   Function *F = L.getHeader()->getParent();
   2545   const DataLayout &DL = F->getParent()->getDataLayout();
   2546 
   2547   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
   2548   if (!IVS.run(&L))
   2549     return PreservedAnalyses::all();
   2550 
   2551   auto PA = getLoopPassPreservedAnalyses();
   2552   PA.preserveSet<CFGAnalyses>();
   2553   return PA;
   2554 }
   2555 
   2556 namespace {
   2557 
   2558 struct IndVarSimplifyLegacyPass : public LoopPass {
   2559   static char ID; // Pass identification, replacement for typeid
   2560 
   2561   IndVarSimplifyLegacyPass() : LoopPass(ID) {
   2562     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
   2563   }
   2564 
   2565   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
   2566     if (skipLoop(L))
   2567       return false;
   2568 
   2569     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   2570     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
   2571     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   2572     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
   2573     auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
   2574     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
   2575     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
   2576     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
   2577 
   2578     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
   2579     return IVS.run(L);
   2580   }
   2581 
   2582   void getAnalysisUsage(AnalysisUsage &AU) const override {
   2583     AU.setPreservesCFG();
   2584     getLoopAnalysisUsage(AU);
   2585   }
   2586 };
   2587 
   2588 } // end anonymous namespace
   2589 
   2590 char IndVarSimplifyLegacyPass::ID = 0;
   2591 
   2592 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
   2593                       "Induction Variable Simplification", false, false)
   2594 INITIALIZE_PASS_DEPENDENCY(LoopPass)
   2595 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
   2596                     "Induction Variable Simplification", false, false)
   2597 
   2598 Pass *llvm::createIndVarSimplifyPass() {
   2599   return new IndVarSimplifyLegacyPass();
   2600 }
   2601