Home | History | Annotate | Download | only in Scalar
      1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This transformation analyzes and transforms the induction variables (and
     11 // computations derived from them) into simpler forms suitable for subsequent
     12 // analysis and transformation.
     13 //
     14 // If the trip count of a loop is computable, this pass also makes the following
     15 // changes:
     16 //   1. The exit condition for the loop is canonicalized to compare the
     17 //      induction value against the exit value.  This turns loops like:
     18 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
     19 //   2. Any use outside of the loop of an expression derived from the indvar
     20 //      is changed to compute the derived value outside of the loop, eliminating
     21 //      the dependence on the exit value of the induction variable.  If the only
     22 //      purpose of the loop is to compute the exit value of some derived
     23 //      expression, this transformation will make the loop dead.
     24 //
     25 //===----------------------------------------------------------------------===//
     26 
     27 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
     28 #include "llvm/Transforms/Scalar.h"
     29 #include "llvm/ADT/SmallVector.h"
     30 #include "llvm/ADT/Statistic.h"
     31 #include "llvm/Analysis/GlobalsModRef.h"
     32 #include "llvm/Analysis/LoopInfo.h"
     33 #include "llvm/Analysis/LoopPass.h"
     34 #include "llvm/Analysis/LoopPassManager.h"
     35 #include "llvm/Analysis/ScalarEvolutionExpander.h"
     36 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
     37 #include "llvm/Analysis/TargetLibraryInfo.h"
     38 #include "llvm/Analysis/TargetTransformInfo.h"
     39 #include "llvm/IR/BasicBlock.h"
     40 #include "llvm/IR/CFG.h"
     41 #include "llvm/IR/Constants.h"
     42 #include "llvm/IR/DataLayout.h"
     43 #include "llvm/IR/Dominators.h"
     44 #include "llvm/IR/Instructions.h"
     45 #include "llvm/IR/IntrinsicInst.h"
     46 #include "llvm/IR/LLVMContext.h"
     47 #include "llvm/IR/PatternMatch.h"
     48 #include "llvm/IR/Type.h"
     49 #include "llvm/Support/CommandLine.h"
     50 #include "llvm/Support/Debug.h"
     51 #include "llvm/Support/raw_ostream.h"
     52 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
     53 #include "llvm/Transforms/Utils/Local.h"
     54 #include "llvm/Transforms/Utils/LoopUtils.h"
     55 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
     56 using namespace llvm;
     57 
     58 #define DEBUG_TYPE "indvars"
     59 
     60 STATISTIC(NumWidened     , "Number of indvars widened");
     61 STATISTIC(NumReplaced    , "Number of exit values replaced");
     62 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
     63 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
     64 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
     65 
     66 // Trip count verification can be enabled by default under NDEBUG if we
     67 // implement a strong expression equivalence checker in SCEV. Until then, we
     68 // use the verify-indvars flag, which may assert in some cases.
     69 static cl::opt<bool> VerifyIndvars(
     70   "verify-indvars", cl::Hidden,
     71   cl::desc("Verify the ScalarEvolution result after running indvars"));
     72 
     73 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, AlwaysRepl };
     74 
     75 static cl::opt<ReplaceExitVal> ReplaceExitValue(
     76     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
     77     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
     78     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
     79                clEnumValN(OnlyCheapRepl, "cheap",
     80                           "only replace exit value when the cost is cheap"),
     81                clEnumValN(AlwaysRepl, "always",
     82                           "always replace exit value whenever possible"),
     83                clEnumValEnd));
     84 
     85 namespace {
     86 struct RewritePhi;
     87 
     88 class IndVarSimplify {
     89   LoopInfo *LI;
     90   ScalarEvolution *SE;
     91   DominatorTree *DT;
     92   const DataLayout &DL;
     93   TargetLibraryInfo *TLI;
     94   const TargetTransformInfo *TTI;
     95 
     96   SmallVector<WeakVH, 16> DeadInsts;
     97   bool Changed = false;
     98 
     99   bool isValidRewrite(Value *FromVal, Value *ToVal);
    100 
    101   void handleFloatingPointIV(Loop *L, PHINode *PH);
    102   void rewriteNonIntegerIVs(Loop *L);
    103 
    104   void simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
    105 
    106   bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
    107   void rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
    108   void rewriteFirstIterationLoopExitValues(Loop *L);
    109 
    110   Value *linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
    111                                    PHINode *IndVar, SCEVExpander &Rewriter);
    112 
    113   void sinkUnusedInvariants(Loop *L);
    114 
    115   Value *expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
    116                             Instruction *InsertPt, Type *Ty);
    117 
    118 public:
    119   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
    120                  const DataLayout &DL, TargetLibraryInfo *TLI,
    121                  TargetTransformInfo *TTI)
    122       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
    123 
    124   bool run(Loop *L);
    125 };
    126 }
    127 
    128 /// Return true if the SCEV expansion generated by the rewriter can replace the
    129 /// original value. SCEV guarantees that it produces the same value, but the way
    130 /// it is produced may be illegal IR.  Ideally, this function will only be
    131 /// called for verification.
    132 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
    133   // If an SCEV expression subsumed multiple pointers, its expansion could
    134   // reassociate the GEP changing the base pointer. This is illegal because the
    135   // final address produced by a GEP chain must be inbounds relative to its
    136   // underlying object. Otherwise basic alias analysis, among other things,
    137   // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
    138   // producing an expression involving multiple pointers. Until then, we must
    139   // bail out here.
    140   //
    141   // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
    142   // because it understands lcssa phis while SCEV does not.
    143   Value *FromPtr = FromVal;
    144   Value *ToPtr = ToVal;
    145   if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
    146     FromPtr = GEP->getPointerOperand();
    147   }
    148   if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
    149     ToPtr = GEP->getPointerOperand();
    150   }
    151   if (FromPtr != FromVal || ToPtr != ToVal) {
    152     // Quickly check the common case
    153     if (FromPtr == ToPtr)
    154       return true;
    155 
    156     // SCEV may have rewritten an expression that produces the GEP's pointer
    157     // operand. That's ok as long as the pointer operand has the same base
    158     // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
    159     // base of a recurrence. This handles the case in which SCEV expansion
    160     // converts a pointer type recurrence into a nonrecurrent pointer base
    161     // indexed by an integer recurrence.
    162 
    163     // If the GEP base pointer is a vector of pointers, abort.
    164     if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
    165       return false;
    166 
    167     const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
    168     const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
    169     if (FromBase == ToBase)
    170       return true;
    171 
    172     DEBUG(dbgs() << "INDVARS: GEP rewrite bail out "
    173           << *FromBase << " != " << *ToBase << "\n");
    174 
    175     return false;
    176   }
    177   return true;
    178 }
    179 
    180 /// Determine the insertion point for this user. By default, insert immediately
    181 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
    182 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
    183 /// common dominator for the incoming blocks.
    184 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
    185                                           DominatorTree *DT, LoopInfo *LI) {
    186   PHINode *PHI = dyn_cast<PHINode>(User);
    187   if (!PHI)
    188     return User;
    189 
    190   Instruction *InsertPt = nullptr;
    191   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
    192     if (PHI->getIncomingValue(i) != Def)
    193       continue;
    194 
    195     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
    196     if (!InsertPt) {
    197       InsertPt = InsertBB->getTerminator();
    198       continue;
    199     }
    200     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
    201     InsertPt = InsertBB->getTerminator();
    202   }
    203   assert(InsertPt && "Missing phi operand");
    204 
    205   auto *DefI = dyn_cast<Instruction>(Def);
    206   if (!DefI)
    207     return InsertPt;
    208 
    209   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
    210 
    211   auto *L = LI->getLoopFor(DefI->getParent());
    212   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
    213 
    214   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
    215     if (LI->getLoopFor(DTN->getBlock()) == L)
    216       return DTN->getBlock()->getTerminator();
    217 
    218   llvm_unreachable("DefI dominates InsertPt!");
    219 }
    220 
    221 //===----------------------------------------------------------------------===//
    222 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
    223 //===----------------------------------------------------------------------===//
    224 
    225 /// Convert APF to an integer, if possible.
    226 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
    227   bool isExact = false;
    228   // See if we can convert this to an int64_t
    229   uint64_t UIntVal;
    230   if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
    231                            &isExact) != APFloat::opOK || !isExact)
    232     return false;
    233   IntVal = UIntVal;
    234   return true;
    235 }
    236 
    237 /// If the loop has floating induction variable then insert corresponding
    238 /// integer induction variable if possible.
    239 /// For example,
    240 /// for(double i = 0; i < 10000; ++i)
    241 ///   bar(i)
    242 /// is converted into
    243 /// for(int i = 0; i < 10000; ++i)
    244 ///   bar((double)i);
    245 ///
    246 void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
    247   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
    248   unsigned BackEdge     = IncomingEdge^1;
    249 
    250   // Check incoming value.
    251   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
    252 
    253   int64_t InitValue;
    254   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
    255     return;
    256 
    257   // Check IV increment. Reject this PN if increment operation is not
    258   // an add or increment value can not be represented by an integer.
    259   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
    260   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return;
    261 
    262   // If this is not an add of the PHI with a constantfp, or if the constant fp
    263   // is not an integer, bail out.
    264   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
    265   int64_t IncValue;
    266   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
    267       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
    268     return;
    269 
    270   // Check Incr uses. One user is PN and the other user is an exit condition
    271   // used by the conditional terminator.
    272   Value::user_iterator IncrUse = Incr->user_begin();
    273   Instruction *U1 = cast<Instruction>(*IncrUse++);
    274   if (IncrUse == Incr->user_end()) return;
    275   Instruction *U2 = cast<Instruction>(*IncrUse++);
    276   if (IncrUse != Incr->user_end()) return;
    277 
    278   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
    279   // only used by a branch, we can't transform it.
    280   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
    281   if (!Compare)
    282     Compare = dyn_cast<FCmpInst>(U2);
    283   if (!Compare || !Compare->hasOneUse() ||
    284       !isa<BranchInst>(Compare->user_back()))
    285     return;
    286 
    287   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
    288 
    289   // We need to verify that the branch actually controls the iteration count
    290   // of the loop.  If not, the new IV can overflow and no one will notice.
    291   // The branch block must be in the loop and one of the successors must be out
    292   // of the loop.
    293   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
    294   if (!L->contains(TheBr->getParent()) ||
    295       (L->contains(TheBr->getSuccessor(0)) &&
    296        L->contains(TheBr->getSuccessor(1))))
    297     return;
    298 
    299 
    300   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
    301   // transform it.
    302   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
    303   int64_t ExitValue;
    304   if (ExitValueVal == nullptr ||
    305       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
    306     return;
    307 
    308   // Find new predicate for integer comparison.
    309   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
    310   switch (Compare->getPredicate()) {
    311   default: return;  // Unknown comparison.
    312   case CmpInst::FCMP_OEQ:
    313   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
    314   case CmpInst::FCMP_ONE:
    315   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
    316   case CmpInst::FCMP_OGT:
    317   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
    318   case CmpInst::FCMP_OGE:
    319   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
    320   case CmpInst::FCMP_OLT:
    321   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
    322   case CmpInst::FCMP_OLE:
    323   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
    324   }
    325 
    326   // We convert the floating point induction variable to a signed i32 value if
    327   // we can.  This is only safe if the comparison will not overflow in a way
    328   // that won't be trapped by the integer equivalent operations.  Check for this
    329   // now.
    330   // TODO: We could use i64 if it is native and the range requires it.
    331 
    332   // The start/stride/exit values must all fit in signed i32.
    333   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
    334     return;
    335 
    336   // If not actually striding (add x, 0.0), avoid touching the code.
    337   if (IncValue == 0)
    338     return;
    339 
    340   // Positive and negative strides have different safety conditions.
    341   if (IncValue > 0) {
    342     // If we have a positive stride, we require the init to be less than the
    343     // exit value.
    344     if (InitValue >= ExitValue)
    345       return;
    346 
    347     uint32_t Range = uint32_t(ExitValue-InitValue);
    348     // Check for infinite loop, either:
    349     // while (i <= Exit) or until (i > Exit)
    350     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
    351       if (++Range == 0) return;  // Range overflows.
    352     }
    353 
    354     unsigned Leftover = Range % uint32_t(IncValue);
    355 
    356     // If this is an equality comparison, we require that the strided value
    357     // exactly land on the exit value, otherwise the IV condition will wrap
    358     // around and do things the fp IV wouldn't.
    359     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
    360         Leftover != 0)
    361       return;
    362 
    363     // If the stride would wrap around the i32 before exiting, we can't
    364     // transform the IV.
    365     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
    366       return;
    367 
    368   } else {
    369     // If we have a negative stride, we require the init to be greater than the
    370     // exit value.
    371     if (InitValue <= ExitValue)
    372       return;
    373 
    374     uint32_t Range = uint32_t(InitValue-ExitValue);
    375     // Check for infinite loop, either:
    376     // while (i >= Exit) or until (i < Exit)
    377     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
    378       if (++Range == 0) return;  // Range overflows.
    379     }
    380 
    381     unsigned Leftover = Range % uint32_t(-IncValue);
    382 
    383     // If this is an equality comparison, we require that the strided value
    384     // exactly land on the exit value, otherwise the IV condition will wrap
    385     // around and do things the fp IV wouldn't.
    386     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
    387         Leftover != 0)
    388       return;
    389 
    390     // If the stride would wrap around the i32 before exiting, we can't
    391     // transform the IV.
    392     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
    393       return;
    394   }
    395 
    396   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
    397 
    398   // Insert new integer induction variable.
    399   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
    400   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
    401                       PN->getIncomingBlock(IncomingEdge));
    402 
    403   Value *NewAdd =
    404     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
    405                               Incr->getName()+".int", Incr);
    406   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
    407 
    408   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
    409                                       ConstantInt::get(Int32Ty, ExitValue),
    410                                       Compare->getName());
    411 
    412   // In the following deletions, PN may become dead and may be deleted.
    413   // Use a WeakVH to observe whether this happens.
    414   WeakVH WeakPH = PN;
    415 
    416   // Delete the old floating point exit comparison.  The branch starts using the
    417   // new comparison.
    418   NewCompare->takeName(Compare);
    419   Compare->replaceAllUsesWith(NewCompare);
    420   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
    421 
    422   // Delete the old floating point increment.
    423   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
    424   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
    425 
    426   // If the FP induction variable still has uses, this is because something else
    427   // in the loop uses its value.  In order to canonicalize the induction
    428   // variable, we chose to eliminate the IV and rewrite it in terms of an
    429   // int->fp cast.
    430   //
    431   // We give preference to sitofp over uitofp because it is faster on most
    432   // platforms.
    433   if (WeakPH) {
    434     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
    435                                  &*PN->getParent()->getFirstInsertionPt());
    436     PN->replaceAllUsesWith(Conv);
    437     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
    438   }
    439   Changed = true;
    440 }
    441 
    442 void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
    443   // First step.  Check to see if there are any floating-point recurrences.
    444   // If there are, change them into integer recurrences, permitting analysis by
    445   // the SCEV routines.
    446   //
    447   BasicBlock *Header = L->getHeader();
    448 
    449   SmallVector<WeakVH, 8> PHIs;
    450   for (BasicBlock::iterator I = Header->begin();
    451        PHINode *PN = dyn_cast<PHINode>(I); ++I)
    452     PHIs.push_back(PN);
    453 
    454   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
    455     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
    456       handleFloatingPointIV(L, PN);
    457 
    458   // If the loop previously had floating-point IV, ScalarEvolution
    459   // may not have been able to compute a trip count. Now that we've done some
    460   // re-writing, the trip count may be computable.
    461   if (Changed)
    462     SE->forgetLoop(L);
    463 }
    464 
    465 namespace {
    466 // Collect information about PHI nodes which can be transformed in
    467 // rewriteLoopExitValues.
    468 struct RewritePhi {
    469   PHINode *PN;
    470   unsigned Ith;  // Ith incoming value.
    471   Value *Val;    // Exit value after expansion.
    472   bool HighCost; // High Cost when expansion.
    473 
    474   RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
    475       : PN(P), Ith(I), Val(V), HighCost(H) {}
    476 };
    477 }
    478 
    479 Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
    480                                           Loop *L, Instruction *InsertPt,
    481                                           Type *ResultTy) {
    482   // Before expanding S into an expensive LLVM expression, see if we can use an
    483   // already existing value as the expansion for S.
    484   if (Value *ExistingValue = Rewriter.findExistingExpansion(S, InsertPt, L))
    485     if (ExistingValue->getType() == ResultTy)
    486       return ExistingValue;
    487 
    488   // We didn't find anything, fall back to using SCEVExpander.
    489   return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
    490 }
    491 
    492 //===----------------------------------------------------------------------===//
    493 // rewriteLoopExitValues - Optimize IV users outside the loop.
    494 // As a side effect, reduces the amount of IV processing within the loop.
    495 //===----------------------------------------------------------------------===//
    496 
    497 /// Check to see if this loop has a computable loop-invariant execution count.
    498 /// If so, this means that we can compute the final value of any expressions
    499 /// that are recurrent in the loop, and substitute the exit values from the loop
    500 /// into any instructions outside of the loop that use the final values of the
    501 /// current expressions.
    502 ///
    503 /// This is mostly redundant with the regular IndVarSimplify activities that
    504 /// happen later, except that it's more powerful in some cases, because it's
    505 /// able to brute-force evaluate arbitrary instructions as long as they have
    506 /// constant operands at the beginning of the loop.
    507 void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
    508   // Check a pre-condition.
    509   assert(L->isRecursivelyLCSSAForm(*DT) && "Indvars did not preserve LCSSA!");
    510 
    511   SmallVector<BasicBlock*, 8> ExitBlocks;
    512   L->getUniqueExitBlocks(ExitBlocks);
    513 
    514   SmallVector<RewritePhi, 8> RewritePhiSet;
    515   // Find all values that are computed inside the loop, but used outside of it.
    516   // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
    517   // the exit blocks of the loop to find them.
    518   for (BasicBlock *ExitBB : ExitBlocks) {
    519     // If there are no PHI nodes in this exit block, then no values defined
    520     // inside the loop are used on this path, skip it.
    521     PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
    522     if (!PN) continue;
    523 
    524     unsigned NumPreds = PN->getNumIncomingValues();
    525 
    526     // Iterate over all of the PHI nodes.
    527     BasicBlock::iterator BBI = ExitBB->begin();
    528     while ((PN = dyn_cast<PHINode>(BBI++))) {
    529       if (PN->use_empty())
    530         continue; // dead use, don't replace it
    531 
    532       if (!SE->isSCEVable(PN->getType()))
    533         continue;
    534 
    535       // It's necessary to tell ScalarEvolution about this explicitly so that
    536       // it can walk the def-use list and forget all SCEVs, as it may not be
    537       // watching the PHI itself. Once the new exit value is in place, there
    538       // may not be a def-use connection between the loop and every instruction
    539       // which got a SCEVAddRecExpr for that loop.
    540       SE->forgetValue(PN);
    541 
    542       // Iterate over all of the values in all the PHI nodes.
    543       for (unsigned i = 0; i != NumPreds; ++i) {
    544         // If the value being merged in is not integer or is not defined
    545         // in the loop, skip it.
    546         Value *InVal = PN->getIncomingValue(i);
    547         if (!isa<Instruction>(InVal))
    548           continue;
    549 
    550         // If this pred is for a subloop, not L itself, skip it.
    551         if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
    552           continue; // The Block is in a subloop, skip it.
    553 
    554         // Check that InVal is defined in the loop.
    555         Instruction *Inst = cast<Instruction>(InVal);
    556         if (!L->contains(Inst))
    557           continue;
    558 
    559         // Okay, this instruction has a user outside of the current loop
    560         // and varies predictably *inside* the loop.  Evaluate the value it
    561         // contains when the loop exits, if possible.
    562         const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
    563         if (!SE->isLoopInvariant(ExitValue, L) ||
    564             !isSafeToExpand(ExitValue, *SE))
    565           continue;
    566 
    567         // Computing the value outside of the loop brings no benefit if :
    568         //  - it is definitely used inside the loop in a way which can not be
    569         //    optimized away.
    570         //  - no use outside of the loop can take advantage of hoisting the
    571         //    computation out of the loop
    572         if (ExitValue->getSCEVType()>=scMulExpr) {
    573           unsigned NumHardInternalUses = 0;
    574           unsigned NumSoftExternalUses = 0;
    575           unsigned NumUses = 0;
    576           for (auto IB = Inst->user_begin(), IE = Inst->user_end();
    577                IB != IE && NumUses <= 6; ++IB) {
    578             Instruction *UseInstr = cast<Instruction>(*IB);
    579             unsigned Opc = UseInstr->getOpcode();
    580             NumUses++;
    581             if (L->contains(UseInstr)) {
    582               if (Opc == Instruction::Call || Opc == Instruction::Ret)
    583                 NumHardInternalUses++;
    584             } else {
    585               if (Opc == Instruction::PHI) {
    586                 // Do not count the Phi as a use. LCSSA may have inserted
    587                 // plenty of trivial ones.
    588                 NumUses--;
    589                 for (auto PB = UseInstr->user_begin(),
    590                           PE = UseInstr->user_end();
    591                      PB != PE && NumUses <= 6; ++PB, ++NumUses) {
    592                   unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode();
    593                   if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret)
    594                     NumSoftExternalUses++;
    595                 }
    596                 continue;
    597               }
    598               if (Opc != Instruction::Call && Opc != Instruction::Ret)
    599                 NumSoftExternalUses++;
    600             }
    601           }
    602           if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses)
    603             continue;
    604         }
    605 
    606         bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
    607         Value *ExitVal =
    608             expandSCEVIfNeeded(Rewriter, ExitValue, L, Inst, PN->getType());
    609 
    610         DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
    611                      << "  LoopVal = " << *Inst << "\n");
    612 
    613         if (!isValidRewrite(Inst, ExitVal)) {
    614           DeadInsts.push_back(ExitVal);
    615           continue;
    616         }
    617 
    618         // Collect all the candidate PHINodes to be rewritten.
    619         RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
    620       }
    621     }
    622   }
    623 
    624   bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
    625 
    626   // Transformation.
    627   for (const RewritePhi &Phi : RewritePhiSet) {
    628     PHINode *PN = Phi.PN;
    629     Value *ExitVal = Phi.Val;
    630 
    631     // Only do the rewrite when the ExitValue can be expanded cheaply.
    632     // If LoopCanBeDel is true, rewrite exit value aggressively.
    633     if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
    634       DeadInsts.push_back(ExitVal);
    635       continue;
    636     }
    637 
    638     Changed = true;
    639     ++NumReplaced;
    640     Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
    641     PN->setIncomingValue(Phi.Ith, ExitVal);
    642 
    643     // If this instruction is dead now, delete it. Don't do it now to avoid
    644     // invalidating iterators.
    645     if (isInstructionTriviallyDead(Inst, TLI))
    646       DeadInsts.push_back(Inst);
    647 
    648     // Replace PN with ExitVal if that is legal and does not break LCSSA.
    649     if (PN->getNumIncomingValues() == 1 &&
    650         LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
    651       PN->replaceAllUsesWith(ExitVal);
    652       PN->eraseFromParent();
    653     }
    654   }
    655 
    656   // The insertion point instruction may have been deleted; clear it out
    657   // so that the rewriter doesn't trip over it later.
    658   Rewriter.clearInsertPoint();
    659 }
    660 
    661 //===---------------------------------------------------------------------===//
    662 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
    663 // they will exit at the first iteration.
    664 //===---------------------------------------------------------------------===//
    665 
    666 /// Check to see if this loop has loop invariant conditions which lead to loop
    667 /// exits. If so, we know that if the exit path is taken, it is at the first
    668 /// loop iteration. This lets us predict exit values of PHI nodes that live in
    669 /// loop header.
    670 void IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
    671   // Verify the input to the pass is already in LCSSA form.
    672   assert(L->isLCSSAForm(*DT));
    673 
    674   SmallVector<BasicBlock *, 8> ExitBlocks;
    675   L->getUniqueExitBlocks(ExitBlocks);
    676   auto *LoopHeader = L->getHeader();
    677   assert(LoopHeader && "Invalid loop");
    678 
    679   for (auto *ExitBB : ExitBlocks) {
    680     BasicBlock::iterator BBI = ExitBB->begin();
    681     // If there are no more PHI nodes in this exit block, then no more
    682     // values defined inside the loop are used on this path.
    683     while (auto *PN = dyn_cast<PHINode>(BBI++)) {
    684       for (unsigned IncomingValIdx = 0, E = PN->getNumIncomingValues();
    685           IncomingValIdx != E; ++IncomingValIdx) {
    686         auto *IncomingBB = PN->getIncomingBlock(IncomingValIdx);
    687 
    688         // We currently only support loop exits from loop header. If the
    689         // incoming block is not loop header, we need to recursively check
    690         // all conditions starting from loop header are loop invariants.
    691         // Additional support might be added in the future.
    692         if (IncomingBB != LoopHeader)
    693           continue;
    694 
    695         // Get condition that leads to the exit path.
    696         auto *TermInst = IncomingBB->getTerminator();
    697 
    698         Value *Cond = nullptr;
    699         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
    700           // Must be a conditional branch, otherwise the block
    701           // should not be in the loop.
    702           Cond = BI->getCondition();
    703         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
    704           Cond = SI->getCondition();
    705         else
    706           continue;
    707 
    708         if (!L->isLoopInvariant(Cond))
    709           continue;
    710 
    711         auto *ExitVal =
    712             dyn_cast<PHINode>(PN->getIncomingValue(IncomingValIdx));
    713 
    714         // Only deal with PHIs.
    715         if (!ExitVal)
    716           continue;
    717 
    718         // If ExitVal is a PHI on the loop header, then we know its
    719         // value along this exit because the exit can only be taken
    720         // on the first iteration.
    721         auto *LoopPreheader = L->getLoopPreheader();
    722         assert(LoopPreheader && "Invalid loop");
    723         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
    724         if (PreheaderIdx != -1) {
    725           assert(ExitVal->getParent() == LoopHeader &&
    726                  "ExitVal must be in loop header");
    727           PN->setIncomingValue(IncomingValIdx,
    728               ExitVal->getIncomingValue(PreheaderIdx));
    729         }
    730       }
    731     }
    732   }
    733 }
    734 
    735 /// Check whether it is possible to delete the loop after rewriting exit
    736 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
    737 /// aggressively.
    738 bool IndVarSimplify::canLoopBeDeleted(
    739     Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
    740 
    741   BasicBlock *Preheader = L->getLoopPreheader();
    742   // If there is no preheader, the loop will not be deleted.
    743   if (!Preheader)
    744     return false;
    745 
    746   // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
    747   // We obviate multiple ExitingBlocks case for simplicity.
    748   // TODO: If we see testcase with multiple ExitingBlocks can be deleted
    749   // after exit value rewriting, we can enhance the logic here.
    750   SmallVector<BasicBlock *, 4> ExitingBlocks;
    751   L->getExitingBlocks(ExitingBlocks);
    752   SmallVector<BasicBlock *, 8> ExitBlocks;
    753   L->getUniqueExitBlocks(ExitBlocks);
    754   if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1)
    755     return false;
    756 
    757   BasicBlock *ExitBlock = ExitBlocks[0];
    758   BasicBlock::iterator BI = ExitBlock->begin();
    759   while (PHINode *P = dyn_cast<PHINode>(BI)) {
    760     Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
    761 
    762     // If the Incoming value of P is found in RewritePhiSet, we know it
    763     // could be rewritten to use a loop invariant value in transformation
    764     // phase later. Skip it in the loop invariant check below.
    765     bool found = false;
    766     for (const RewritePhi &Phi : RewritePhiSet) {
    767       unsigned i = Phi.Ith;
    768       if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
    769         found = true;
    770         break;
    771       }
    772     }
    773 
    774     Instruction *I;
    775     if (!found && (I = dyn_cast<Instruction>(Incoming)))
    776       if (!L->hasLoopInvariantOperands(I))
    777         return false;
    778 
    779     ++BI;
    780   }
    781 
    782   for (auto *BB : L->blocks())
    783     if (any_of(*BB, [](Instruction &I) { return I.mayHaveSideEffects(); }))
    784       return false;
    785 
    786   return true;
    787 }
    788 
    789 //===----------------------------------------------------------------------===//
    790 //  IV Widening - Extend the width of an IV to cover its widest uses.
    791 //===----------------------------------------------------------------------===//
    792 
    793 namespace {
    794 // Collect information about induction variables that are used by sign/zero
    795 // extend operations. This information is recorded by CollectExtend and provides
    796 // the input to WidenIV.
    797 struct WideIVInfo {
    798   PHINode *NarrowIV = nullptr;
    799   Type *WidestNativeType = nullptr; // Widest integer type created [sz]ext
    800   bool IsSigned = false;            // Was a sext user seen before a zext?
    801 };
    802 }
    803 
    804 /// Update information about the induction variable that is extended by this
    805 /// sign or zero extend operation. This is used to determine the final width of
    806 /// the IV before actually widening it.
    807 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
    808                         const TargetTransformInfo *TTI) {
    809   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
    810   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
    811     return;
    812 
    813   Type *Ty = Cast->getType();
    814   uint64_t Width = SE->getTypeSizeInBits(Ty);
    815   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
    816     return;
    817 
    818   // Cast is either an sext or zext up to this point.
    819   // We should not widen an indvar if arithmetics on the wider indvar are more
    820   // expensive than those on the narrower indvar. We check only the cost of ADD
    821   // because at least an ADD is required to increment the induction variable. We
    822   // could compute more comprehensively the cost of all instructions on the
    823   // induction variable when necessary.
    824   if (TTI &&
    825       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
    826           TTI->getArithmeticInstrCost(Instruction::Add,
    827                                       Cast->getOperand(0)->getType())) {
    828     return;
    829   }
    830 
    831   if (!WI.WidestNativeType) {
    832     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
    833     WI.IsSigned = IsSigned;
    834     return;
    835   }
    836 
    837   // We extend the IV to satisfy the sign of its first user, arbitrarily.
    838   if (WI.IsSigned != IsSigned)
    839     return;
    840 
    841   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
    842     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
    843 }
    844 
    845 namespace {
    846 
    847 /// Record a link in the Narrow IV def-use chain along with the WideIV that
    848 /// computes the same value as the Narrow IV def.  This avoids caching Use*
    849 /// pointers.
    850 struct NarrowIVDefUse {
    851   Instruction *NarrowDef = nullptr;
    852   Instruction *NarrowUse = nullptr;
    853   Instruction *WideDef = nullptr;
    854 
    855   // True if the narrow def is never negative.  Tracking this information lets
    856   // us use a sign extension instead of a zero extension or vice versa, when
    857   // profitable and legal.
    858   bool NeverNegative = false;
    859 
    860   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
    861                  bool NeverNegative)
    862       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
    863         NeverNegative(NeverNegative) {}
    864 };
    865 
    866 /// The goal of this transform is to remove sign and zero extends without
    867 /// creating any new induction variables. To do this, it creates a new phi of
    868 /// the wider type and redirects all users, either removing extends or inserting
    869 /// truncs whenever we stop propagating the type.
    870 ///
    871 class WidenIV {
    872   // Parameters
    873   PHINode *OrigPhi;
    874   Type *WideType;
    875   bool IsSigned;
    876 
    877   // Context
    878   LoopInfo        *LI;
    879   Loop            *L;
    880   ScalarEvolution *SE;
    881   DominatorTree   *DT;
    882 
    883   // Result
    884   PHINode *WidePhi;
    885   Instruction *WideInc;
    886   const SCEV *WideIncExpr;
    887   SmallVectorImpl<WeakVH> &DeadInsts;
    888 
    889   SmallPtrSet<Instruction*,16> Widened;
    890   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
    891 
    892 public:
    893   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo,
    894           ScalarEvolution *SEv, DominatorTree *DTree,
    895           SmallVectorImpl<WeakVH> &DI) :
    896     OrigPhi(WI.NarrowIV),
    897     WideType(WI.WidestNativeType),
    898     IsSigned(WI.IsSigned),
    899     LI(LInfo),
    900     L(LI->getLoopFor(OrigPhi->getParent())),
    901     SE(SEv),
    902     DT(DTree),
    903     WidePhi(nullptr),
    904     WideInc(nullptr),
    905     WideIncExpr(nullptr),
    906     DeadInsts(DI) {
    907     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
    908   }
    909 
    910   PHINode *createWideIV(SCEVExpander &Rewriter);
    911 
    912 protected:
    913   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
    914                           Instruction *Use);
    915 
    916   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
    917   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
    918                                      const SCEVAddRecExpr *WideAR);
    919   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
    920 
    921   const SCEVAddRecExpr *getWideRecurrence(Instruction *NarrowUse);
    922 
    923   const SCEVAddRecExpr* getExtendedOperandRecurrence(NarrowIVDefUse DU);
    924 
    925   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
    926                               unsigned OpCode) const;
    927 
    928   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
    929 
    930   bool widenLoopCompare(NarrowIVDefUse DU);
    931 
    932   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
    933 };
    934 } // anonymous namespace
    935 
    936 /// Perform a quick domtree based check for loop invariance assuming that V is
    937 /// used within the loop. LoopInfo::isLoopInvariant() seems gratuitous for this
    938 /// purpose.
    939 static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
    940   Instruction *Inst = dyn_cast<Instruction>(V);
    941   if (!Inst)
    942     return true;
    943 
    944   return DT->properlyDominates(Inst->getParent(), L->getHeader());
    945 }
    946 
    947 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
    948                                  bool IsSigned, Instruction *Use) {
    949   // Set the debug location and conservative insertion point.
    950   IRBuilder<> Builder(Use);
    951   // Hoist the insertion point into loop preheaders as far as possible.
    952   for (const Loop *L = LI->getLoopFor(Use->getParent());
    953        L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT);
    954        L = L->getParentLoop())
    955     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
    956 
    957   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
    958                     Builder.CreateZExt(NarrowOper, WideType);
    959 }
    960 
    961 /// Instantiate a wide operation to replace a narrow operation. This only needs
    962 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
    963 /// 0 for any operation we decide not to clone.
    964 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
    965                                   const SCEVAddRecExpr *WideAR) {
    966   unsigned Opcode = DU.NarrowUse->getOpcode();
    967   switch (Opcode) {
    968   default:
    969     return nullptr;
    970   case Instruction::Add:
    971   case Instruction::Mul:
    972   case Instruction::UDiv:
    973   case Instruction::Sub:
    974     return cloneArithmeticIVUser(DU, WideAR);
    975 
    976   case Instruction::And:
    977   case Instruction::Or:
    978   case Instruction::Xor:
    979   case Instruction::Shl:
    980   case Instruction::LShr:
    981   case Instruction::AShr:
    982     return cloneBitwiseIVUser(DU);
    983   }
    984 }
    985 
    986 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
    987   Instruction *NarrowUse = DU.NarrowUse;
    988   Instruction *NarrowDef = DU.NarrowDef;
    989   Instruction *WideDef = DU.WideDef;
    990 
    991   DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
    992 
    993   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
    994   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
    995   // invariant and will be folded or hoisted. If it actually comes from a
    996   // widened IV, it should be removed during a future call to widenIVUse.
    997   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
    998                    ? WideDef
    999                    : createExtendInst(NarrowUse->getOperand(0), WideType,
   1000                                       IsSigned, NarrowUse);
   1001   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
   1002                    ? WideDef
   1003                    : createExtendInst(NarrowUse->getOperand(1), WideType,
   1004                                       IsSigned, NarrowUse);
   1005 
   1006   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
   1007   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
   1008                                         NarrowBO->getName());
   1009   IRBuilder<> Builder(NarrowUse);
   1010   Builder.Insert(WideBO);
   1011   WideBO->copyIRFlags(NarrowBO);
   1012   return WideBO;
   1013 }
   1014 
   1015 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
   1016                                             const SCEVAddRecExpr *WideAR) {
   1017   Instruction *NarrowUse = DU.NarrowUse;
   1018   Instruction *NarrowDef = DU.NarrowDef;
   1019   Instruction *WideDef = DU.WideDef;
   1020 
   1021   DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
   1022 
   1023   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
   1024 
   1025   // We're trying to find X such that
   1026   //
   1027   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
   1028   //
   1029   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
   1030   // and check using SCEV if any of them are correct.
   1031 
   1032   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
   1033   // correct solution to X.
   1034   auto GuessNonIVOperand = [&](bool SignExt) {
   1035     const SCEV *WideLHS;
   1036     const SCEV *WideRHS;
   1037 
   1038     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
   1039       if (SignExt)
   1040         return SE->getSignExtendExpr(S, Ty);
   1041       return SE->getZeroExtendExpr(S, Ty);
   1042     };
   1043 
   1044     if (IVOpIdx == 0) {
   1045       WideLHS = SE->getSCEV(WideDef);
   1046       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
   1047       WideRHS = GetExtend(NarrowRHS, WideType);
   1048     } else {
   1049       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
   1050       WideLHS = GetExtend(NarrowLHS, WideType);
   1051       WideRHS = SE->getSCEV(WideDef);
   1052     }
   1053 
   1054     // WideUse is "WideDef `op.wide` X" as described in the comment.
   1055     const SCEV *WideUse = nullptr;
   1056 
   1057     switch (NarrowUse->getOpcode()) {
   1058     default:
   1059       llvm_unreachable("No other possibility!");
   1060 
   1061     case Instruction::Add:
   1062       WideUse = SE->getAddExpr(WideLHS, WideRHS);
   1063       break;
   1064 
   1065     case Instruction::Mul:
   1066       WideUse = SE->getMulExpr(WideLHS, WideRHS);
   1067       break;
   1068 
   1069     case Instruction::UDiv:
   1070       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
   1071       break;
   1072 
   1073     case Instruction::Sub:
   1074       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
   1075       break;
   1076     }
   1077 
   1078     return WideUse == WideAR;
   1079   };
   1080 
   1081   bool SignExtend = IsSigned;
   1082   if (!GuessNonIVOperand(SignExtend)) {
   1083     SignExtend = !SignExtend;
   1084     if (!GuessNonIVOperand(SignExtend))
   1085       return nullptr;
   1086   }
   1087 
   1088   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
   1089                    ? WideDef
   1090                    : createExtendInst(NarrowUse->getOperand(0), WideType,
   1091                                       SignExtend, NarrowUse);
   1092   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
   1093                    ? WideDef
   1094                    : createExtendInst(NarrowUse->getOperand(1), WideType,
   1095                                       SignExtend, NarrowUse);
   1096 
   1097   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
   1098   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
   1099                                         NarrowBO->getName());
   1100 
   1101   IRBuilder<> Builder(NarrowUse);
   1102   Builder.Insert(WideBO);
   1103   WideBO->copyIRFlags(NarrowBO);
   1104   return WideBO;
   1105 }
   1106 
   1107 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
   1108                                      unsigned OpCode) const {
   1109   if (OpCode == Instruction::Add)
   1110     return SE->getAddExpr(LHS, RHS);
   1111   if (OpCode == Instruction::Sub)
   1112     return SE->getMinusSCEV(LHS, RHS);
   1113   if (OpCode == Instruction::Mul)
   1114     return SE->getMulExpr(LHS, RHS);
   1115 
   1116   llvm_unreachable("Unsupported opcode.");
   1117 }
   1118 
   1119 /// No-wrap operations can transfer sign extension of their result to their
   1120 /// operands. Generate the SCEV value for the widened operation without
   1121 /// actually modifying the IR yet. If the expression after extending the
   1122 /// operands is an AddRec for this loop, return it.
   1123 const SCEVAddRecExpr* WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
   1124 
   1125   // Handle the common case of add<nsw/nuw>
   1126   const unsigned OpCode = DU.NarrowUse->getOpcode();
   1127   // Only Add/Sub/Mul instructions supported yet.
   1128   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
   1129       OpCode != Instruction::Mul)
   1130     return nullptr;
   1131 
   1132   // One operand (NarrowDef) has already been extended to WideDef. Now determine
   1133   // if extending the other will lead to a recurrence.
   1134   const unsigned ExtendOperIdx =
   1135       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
   1136   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
   1137 
   1138   const SCEV *ExtendOperExpr = nullptr;
   1139   const OverflowingBinaryOperator *OBO =
   1140     cast<OverflowingBinaryOperator>(DU.NarrowUse);
   1141   if (IsSigned && OBO->hasNoSignedWrap())
   1142     ExtendOperExpr = SE->getSignExtendExpr(
   1143       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
   1144   else if(!IsSigned && OBO->hasNoUnsignedWrap())
   1145     ExtendOperExpr = SE->getZeroExtendExpr(
   1146       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
   1147   else
   1148     return nullptr;
   1149 
   1150   // When creating this SCEV expr, don't apply the current operations NSW or NUW
   1151   // flags. This instruction may be guarded by control flow that the no-wrap
   1152   // behavior depends on. Non-control-equivalent instructions can be mapped to
   1153   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
   1154   // semantics to those operations.
   1155   const SCEV *lhs = SE->getSCEV(DU.WideDef);
   1156   const SCEV *rhs = ExtendOperExpr;
   1157 
   1158   // Let's swap operands to the initial order for the case of non-commutative
   1159   // operations, like SUB. See PR21014.
   1160   if (ExtendOperIdx == 0)
   1161     std::swap(lhs, rhs);
   1162   const SCEVAddRecExpr *AddRec =
   1163       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
   1164 
   1165   if (!AddRec || AddRec->getLoop() != L)
   1166     return nullptr;
   1167   return AddRec;
   1168 }
   1169 
   1170 /// Is this instruction potentially interesting for further simplification after
   1171 /// widening it's type? In other words, can the extend be safely hoisted out of
   1172 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
   1173 /// so, return the sign or zero extended recurrence. Otherwise return NULL.
   1174 const SCEVAddRecExpr *WidenIV::getWideRecurrence(Instruction *NarrowUse) {
   1175   if (!SE->isSCEVable(NarrowUse->getType()))
   1176     return nullptr;
   1177 
   1178   const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
   1179   if (SE->getTypeSizeInBits(NarrowExpr->getType())
   1180       >= SE->getTypeSizeInBits(WideType)) {
   1181     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
   1182     // index. So don't follow this use.
   1183     return nullptr;
   1184   }
   1185 
   1186   const SCEV *WideExpr = IsSigned ?
   1187     SE->getSignExtendExpr(NarrowExpr, WideType) :
   1188     SE->getZeroExtendExpr(NarrowExpr, WideType);
   1189   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
   1190   if (!AddRec || AddRec->getLoop() != L)
   1191     return nullptr;
   1192   return AddRec;
   1193 }
   1194 
   1195 /// This IV user cannot be widen. Replace this use of the original narrow IV
   1196 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
   1197 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
   1198   DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef
   1199         << " for user " << *DU.NarrowUse << "\n");
   1200   IRBuilder<> Builder(
   1201       getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
   1202   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
   1203   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
   1204 }
   1205 
   1206 /// If the narrow use is a compare instruction, then widen the compare
   1207 //  (and possibly the other operand).  The extend operation is hoisted into the
   1208 // loop preheader as far as possible.
   1209 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
   1210   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
   1211   if (!Cmp)
   1212     return false;
   1213 
   1214   // We can legally widen the comparison in the following two cases:
   1215   //
   1216   //  - The signedness of the IV extension and comparison match
   1217   //
   1218   //  - The narrow IV is always positive (and thus its sign extension is equal
   1219   //    to its zero extension).  For instance, let's say we're zero extending
   1220   //    %narrow for the following use
   1221   //
   1222   //      icmp slt i32 %narrow, %val   ... (A)
   1223   //
   1224   //    and %narrow is always positive.  Then
   1225   //
   1226   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
   1227   //          == icmp slt i32 zext(%narrow), sext(%val)
   1228 
   1229   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
   1230     return false;
   1231 
   1232   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
   1233   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
   1234   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
   1235   assert (CastWidth <= IVWidth && "Unexpected width while widening compare.");
   1236 
   1237   // Widen the compare instruction.
   1238   IRBuilder<> Builder(
   1239       getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
   1240   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
   1241 
   1242   // Widen the other operand of the compare, if necessary.
   1243   if (CastWidth < IVWidth) {
   1244     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
   1245     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
   1246   }
   1247   return true;
   1248 }
   1249 
   1250 /// Determine whether an individual user of the narrow IV can be widened. If so,
   1251 /// return the wide clone of the user.
   1252 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
   1253 
   1254   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
   1255   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
   1256     if (LI->getLoopFor(UsePhi->getParent()) != L) {
   1257       // For LCSSA phis, sink the truncate outside the loop.
   1258       // After SimplifyCFG most loop exit targets have a single predecessor.
   1259       // Otherwise fall back to a truncate within the loop.
   1260       if (UsePhi->getNumOperands() != 1)
   1261         truncateIVUse(DU, DT, LI);
   1262       else {
   1263         // Widening the PHI requires us to insert a trunc.  The logical place
   1264         // for this trunc is in the same BB as the PHI.  This is not possible if
   1265         // the BB is terminated by a catchswitch.
   1266         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
   1267           return nullptr;
   1268 
   1269         PHINode *WidePhi =
   1270           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
   1271                           UsePhi);
   1272         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
   1273         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
   1274         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
   1275         UsePhi->replaceAllUsesWith(Trunc);
   1276         DeadInsts.emplace_back(UsePhi);
   1277         DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi
   1278               << " to " << *WidePhi << "\n");
   1279       }
   1280       return nullptr;
   1281     }
   1282   }
   1283   // Our raison d'etre! Eliminate sign and zero extension.
   1284   if (IsSigned ? isa<SExtInst>(DU.NarrowUse) : isa<ZExtInst>(DU.NarrowUse)) {
   1285     Value *NewDef = DU.WideDef;
   1286     if (DU.NarrowUse->getType() != WideType) {
   1287       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
   1288       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
   1289       if (CastWidth < IVWidth) {
   1290         // The cast isn't as wide as the IV, so insert a Trunc.
   1291         IRBuilder<> Builder(DU.NarrowUse);
   1292         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
   1293       }
   1294       else {
   1295         // A wider extend was hidden behind a narrower one. This may induce
   1296         // another round of IV widening in which the intermediate IV becomes
   1297         // dead. It should be very rare.
   1298         DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
   1299               << " not wide enough to subsume " << *DU.NarrowUse << "\n");
   1300         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
   1301         NewDef = DU.NarrowUse;
   1302       }
   1303     }
   1304     if (NewDef != DU.NarrowUse) {
   1305       DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
   1306             << " replaced by " << *DU.WideDef << "\n");
   1307       ++NumElimExt;
   1308       DU.NarrowUse->replaceAllUsesWith(NewDef);
   1309       DeadInsts.emplace_back(DU.NarrowUse);
   1310     }
   1311     // Now that the extend is gone, we want to expose it's uses for potential
   1312     // further simplification. We don't need to directly inform SimplifyIVUsers
   1313     // of the new users, because their parent IV will be processed later as a
   1314     // new loop phi. If we preserved IVUsers analysis, we would also want to
   1315     // push the uses of WideDef here.
   1316 
   1317     // No further widening is needed. The deceased [sz]ext had done it for us.
   1318     return nullptr;
   1319   }
   1320 
   1321   // Does this user itself evaluate to a recurrence after widening?
   1322   const SCEVAddRecExpr *WideAddRec = getWideRecurrence(DU.NarrowUse);
   1323   if (!WideAddRec)
   1324     WideAddRec = getExtendedOperandRecurrence(DU);
   1325 
   1326   if (!WideAddRec) {
   1327     // If use is a loop condition, try to promote the condition instead of
   1328     // truncating the IV first.
   1329     if (widenLoopCompare(DU))
   1330       return nullptr;
   1331 
   1332     // This user does not evaluate to a recurence after widening, so don't
   1333     // follow it. Instead insert a Trunc to kill off the original use,
   1334     // eventually isolating the original narrow IV so it can be removed.
   1335     truncateIVUse(DU, DT, LI);
   1336     return nullptr;
   1337   }
   1338   // Assume block terminators cannot evaluate to a recurrence. We can't to
   1339   // insert a Trunc after a terminator if there happens to be a critical edge.
   1340   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
   1341          "SCEV is not expected to evaluate a block terminator");
   1342 
   1343   // Reuse the IV increment that SCEVExpander created as long as it dominates
   1344   // NarrowUse.
   1345   Instruction *WideUse = nullptr;
   1346   if (WideAddRec == WideIncExpr && Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
   1347     WideUse = WideInc;
   1348   else {
   1349     WideUse = cloneIVUser(DU, WideAddRec);
   1350     if (!WideUse)
   1351       return nullptr;
   1352   }
   1353   // Evaluation of WideAddRec ensured that the narrow expression could be
   1354   // extended outside the loop without overflow. This suggests that the wide use
   1355   // evaluates to the same expression as the extended narrow use, but doesn't
   1356   // absolutely guarantee it. Hence the following failsafe check. In rare cases
   1357   // where it fails, we simply throw away the newly created wide use.
   1358   if (WideAddRec != SE->getSCEV(WideUse)) {
   1359     DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
   1360           << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
   1361     DeadInsts.emplace_back(WideUse);
   1362     return nullptr;
   1363   }
   1364 
   1365   // Returning WideUse pushes it on the worklist.
   1366   return WideUse;
   1367 }
   1368 
   1369 /// Add eligible users of NarrowDef to NarrowIVUsers.
   1370 ///
   1371 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
   1372   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
   1373   bool NeverNegative =
   1374       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
   1375                            SE->getConstant(NarrowSCEV->getType(), 0));
   1376   for (User *U : NarrowDef->users()) {
   1377     Instruction *NarrowUser = cast<Instruction>(U);
   1378 
   1379     // Handle data flow merges and bizarre phi cycles.
   1380     if (!Widened.insert(NarrowUser).second)
   1381       continue;
   1382 
   1383     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, NeverNegative);
   1384   }
   1385 }
   1386 
   1387 /// Process a single induction variable. First use the SCEVExpander to create a
   1388 /// wide induction variable that evaluates to the same recurrence as the
   1389 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
   1390 /// def-use chain. After widenIVUse has processed all interesting IV users, the
   1391 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
   1392 ///
   1393 /// It would be simpler to delete uses as they are processed, but we must avoid
   1394 /// invalidating SCEV expressions.
   1395 ///
   1396 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
   1397   // Is this phi an induction variable?
   1398   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
   1399   if (!AddRec)
   1400     return nullptr;
   1401 
   1402   // Widen the induction variable expression.
   1403   const SCEV *WideIVExpr = IsSigned ?
   1404     SE->getSignExtendExpr(AddRec, WideType) :
   1405     SE->getZeroExtendExpr(AddRec, WideType);
   1406 
   1407   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
   1408          "Expect the new IV expression to preserve its type");
   1409 
   1410   // Can the IV be extended outside the loop without overflow?
   1411   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
   1412   if (!AddRec || AddRec->getLoop() != L)
   1413     return nullptr;
   1414 
   1415   // An AddRec must have loop-invariant operands. Since this AddRec is
   1416   // materialized by a loop header phi, the expression cannot have any post-loop
   1417   // operands, so they must dominate the loop header.
   1418   assert(
   1419       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
   1420       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
   1421       "Loop header phi recurrence inputs do not dominate the loop");
   1422 
   1423   // The rewriter provides a value for the desired IV expression. This may
   1424   // either find an existing phi or materialize a new one. Either way, we
   1425   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
   1426   // of the phi-SCC dominates the loop entry.
   1427   Instruction *InsertPt = &L->getHeader()->front();
   1428   WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
   1429 
   1430   // Remembering the WideIV increment generated by SCEVExpander allows
   1431   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
   1432   // employ a general reuse mechanism because the call above is the only call to
   1433   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
   1434   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
   1435     WideInc =
   1436       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
   1437     WideIncExpr = SE->getSCEV(WideInc);
   1438   }
   1439 
   1440   DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
   1441   ++NumWidened;
   1442 
   1443   // Traverse the def-use chain using a worklist starting at the original IV.
   1444   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
   1445 
   1446   Widened.insert(OrigPhi);
   1447   pushNarrowIVUsers(OrigPhi, WidePhi);
   1448 
   1449   while (!NarrowIVUsers.empty()) {
   1450     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
   1451 
   1452     // Process a def-use edge. This may replace the use, so don't hold a
   1453     // use_iterator across it.
   1454     Instruction *WideUse = widenIVUse(DU, Rewriter);
   1455 
   1456     // Follow all def-use edges from the previous narrow use.
   1457     if (WideUse)
   1458       pushNarrowIVUsers(DU.NarrowUse, WideUse);
   1459 
   1460     // widenIVUse may have removed the def-use edge.
   1461     if (DU.NarrowDef->use_empty())
   1462       DeadInsts.emplace_back(DU.NarrowDef);
   1463   }
   1464   return WidePhi;
   1465 }
   1466 
   1467 //===----------------------------------------------------------------------===//
   1468 //  Live IV Reduction - Minimize IVs live across the loop.
   1469 //===----------------------------------------------------------------------===//
   1470 
   1471 
   1472 //===----------------------------------------------------------------------===//
   1473 //  Simplification of IV users based on SCEV evaluation.
   1474 //===----------------------------------------------------------------------===//
   1475 
   1476 namespace {
   1477 class IndVarSimplifyVisitor : public IVVisitor {
   1478   ScalarEvolution *SE;
   1479   const TargetTransformInfo *TTI;
   1480   PHINode *IVPhi;
   1481 
   1482 public:
   1483   WideIVInfo WI;
   1484 
   1485   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
   1486                         const TargetTransformInfo *TTI,
   1487                         const DominatorTree *DTree)
   1488     : SE(SCEV), TTI(TTI), IVPhi(IV) {
   1489     DT = DTree;
   1490     WI.NarrowIV = IVPhi;
   1491   }
   1492 
   1493   // Implement the interface used by simplifyUsersOfIV.
   1494   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
   1495 };
   1496 }
   1497 
   1498 /// Iteratively perform simplification on a worklist of IV users. Each
   1499 /// successive simplification may push more users which may themselves be
   1500 /// candidates for simplification.
   1501 ///
   1502 /// Sign/Zero extend elimination is interleaved with IV simplification.
   1503 ///
   1504 void IndVarSimplify::simplifyAndExtend(Loop *L,
   1505                                        SCEVExpander &Rewriter,
   1506                                        LoopInfo *LI) {
   1507   SmallVector<WideIVInfo, 8> WideIVs;
   1508 
   1509   SmallVector<PHINode*, 8> LoopPhis;
   1510   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
   1511     LoopPhis.push_back(cast<PHINode>(I));
   1512   }
   1513   // Each round of simplification iterates through the SimplifyIVUsers worklist
   1514   // for all current phis, then determines whether any IVs can be
   1515   // widened. Widening adds new phis to LoopPhis, inducing another round of
   1516   // simplification on the wide IVs.
   1517   while (!LoopPhis.empty()) {
   1518     // Evaluate as many IV expressions as possible before widening any IVs. This
   1519     // forces SCEV to set no-wrap flags before evaluating sign/zero
   1520     // extension. The first time SCEV attempts to normalize sign/zero extension,
   1521     // the result becomes final. So for the most predictable results, we delay
   1522     // evaluation of sign/zero extend evaluation until needed, and avoid running
   1523     // other SCEV based analysis prior to simplifyAndExtend.
   1524     do {
   1525       PHINode *CurrIV = LoopPhis.pop_back_val();
   1526 
   1527       // Information about sign/zero extensions of CurrIV.
   1528       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
   1529 
   1530       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, &Visitor);
   1531 
   1532       if (Visitor.WI.WidestNativeType) {
   1533         WideIVs.push_back(Visitor.WI);
   1534       }
   1535     } while(!LoopPhis.empty());
   1536 
   1537     for (; !WideIVs.empty(); WideIVs.pop_back()) {
   1538       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts);
   1539       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
   1540         Changed = true;
   1541         LoopPhis.push_back(WidePhi);
   1542       }
   1543     }
   1544   }
   1545 }
   1546 
   1547 //===----------------------------------------------------------------------===//
   1548 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
   1549 //===----------------------------------------------------------------------===//
   1550 
   1551 /// Return true if this loop's backedge taken count expression can be safely and
   1552 /// cheaply expanded into an instruction sequence that can be used by
   1553 /// linearFunctionTestReplace.
   1554 ///
   1555 /// TODO: This fails for pointer-type loop counters with greater than one byte
   1556 /// strides, consequently preventing LFTR from running. For the purpose of LFTR
   1557 /// we could skip this check in the case that the LFTR loop counter (chosen by
   1558 /// FindLoopCounter) is also pointer type. Instead, we could directly convert
   1559 /// the loop test to an inequality test by checking the target data's alignment
   1560 /// of element types (given that the initial pointer value originates from or is
   1561 /// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
   1562 /// However, we don't yet have a strong motivation for converting loop tests
   1563 /// into inequality tests.
   1564 static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE,
   1565                                         SCEVExpander &Rewriter) {
   1566   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
   1567   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
   1568       BackedgeTakenCount->isZero())
   1569     return false;
   1570 
   1571   if (!L->getExitingBlock())
   1572     return false;
   1573 
   1574   // Can't rewrite non-branch yet.
   1575   if (!isa<BranchInst>(L->getExitingBlock()->getTerminator()))
   1576     return false;
   1577 
   1578   if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L))
   1579     return false;
   1580 
   1581   return true;
   1582 }
   1583 
   1584 /// Return the loop header phi IFF IncV adds a loop invariant value to the phi.
   1585 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
   1586   Instruction *IncI = dyn_cast<Instruction>(IncV);
   1587   if (!IncI)
   1588     return nullptr;
   1589 
   1590   switch (IncI->getOpcode()) {
   1591   case Instruction::Add:
   1592   case Instruction::Sub:
   1593     break;
   1594   case Instruction::GetElementPtr:
   1595     // An IV counter must preserve its type.
   1596     if (IncI->getNumOperands() == 2)
   1597       break;
   1598   default:
   1599     return nullptr;
   1600   }
   1601 
   1602   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
   1603   if (Phi && Phi->getParent() == L->getHeader()) {
   1604     if (isLoopInvariant(IncI->getOperand(1), L, DT))
   1605       return Phi;
   1606     return nullptr;
   1607   }
   1608   if (IncI->getOpcode() == Instruction::GetElementPtr)
   1609     return nullptr;
   1610 
   1611   // Allow add/sub to be commuted.
   1612   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
   1613   if (Phi && Phi->getParent() == L->getHeader()) {
   1614     if (isLoopInvariant(IncI->getOperand(0), L, DT))
   1615       return Phi;
   1616   }
   1617   return nullptr;
   1618 }
   1619 
   1620 /// Return the compare guarding the loop latch, or NULL for unrecognized tests.
   1621 static ICmpInst *getLoopTest(Loop *L) {
   1622   assert(L->getExitingBlock() && "expected loop exit");
   1623 
   1624   BasicBlock *LatchBlock = L->getLoopLatch();
   1625   // Don't bother with LFTR if the loop is not properly simplified.
   1626   if (!LatchBlock)
   1627     return nullptr;
   1628 
   1629   BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
   1630   assert(BI && "expected exit branch");
   1631 
   1632   return dyn_cast<ICmpInst>(BI->getCondition());
   1633 }
   1634 
   1635 /// linearFunctionTestReplace policy. Return true unless we can show that the
   1636 /// current exit test is already sufficiently canonical.
   1637 static bool needsLFTR(Loop *L, DominatorTree *DT) {
   1638   // Do LFTR to simplify the exit condition to an ICMP.
   1639   ICmpInst *Cond = getLoopTest(L);
   1640   if (!Cond)
   1641     return true;
   1642 
   1643   // Do LFTR to simplify the exit ICMP to EQ/NE
   1644   ICmpInst::Predicate Pred = Cond->getPredicate();
   1645   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
   1646     return true;
   1647 
   1648   // Look for a loop invariant RHS
   1649   Value *LHS = Cond->getOperand(0);
   1650   Value *RHS = Cond->getOperand(1);
   1651   if (!isLoopInvariant(RHS, L, DT)) {
   1652     if (!isLoopInvariant(LHS, L, DT))
   1653       return true;
   1654     std::swap(LHS, RHS);
   1655   }
   1656   // Look for a simple IV counter LHS
   1657   PHINode *Phi = dyn_cast<PHINode>(LHS);
   1658   if (!Phi)
   1659     Phi = getLoopPhiForCounter(LHS, L, DT);
   1660 
   1661   if (!Phi)
   1662     return true;
   1663 
   1664   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
   1665   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
   1666   if (Idx < 0)
   1667     return true;
   1668 
   1669   // Do LFTR if the exit condition's IV is *not* a simple counter.
   1670   Value *IncV = Phi->getIncomingValue(Idx);
   1671   return Phi != getLoopPhiForCounter(IncV, L, DT);
   1672 }
   1673 
   1674 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
   1675 /// down to checking that all operands are constant and listing instructions
   1676 /// that may hide undef.
   1677 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
   1678                                unsigned Depth) {
   1679   if (isa<Constant>(V))
   1680     return !isa<UndefValue>(V);
   1681 
   1682   if (Depth >= 6)
   1683     return false;
   1684 
   1685   // Conservatively handle non-constant non-instructions. For example, Arguments
   1686   // may be undef.
   1687   Instruction *I = dyn_cast<Instruction>(V);
   1688   if (!I)
   1689     return false;
   1690 
   1691   // Load and return values may be undef.
   1692   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
   1693     return false;
   1694 
   1695   // Optimistically handle other instructions.
   1696   for (Value *Op : I->operands()) {
   1697     if (!Visited.insert(Op).second)
   1698       continue;
   1699     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
   1700       return false;
   1701   }
   1702   return true;
   1703 }
   1704 
   1705 /// Return true if the given value is concrete. We must prove that undef can
   1706 /// never reach it.
   1707 ///
   1708 /// TODO: If we decide that this is a good approach to checking for undef, we
   1709 /// may factor it into a common location.
   1710 static bool hasConcreteDef(Value *V) {
   1711   SmallPtrSet<Value*, 8> Visited;
   1712   Visited.insert(V);
   1713   return hasConcreteDefImpl(V, Visited, 0);
   1714 }
   1715 
   1716 /// Return true if this IV has any uses other than the (soon to be rewritten)
   1717 /// loop exit test.
   1718 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
   1719   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
   1720   Value *IncV = Phi->getIncomingValue(LatchIdx);
   1721 
   1722   for (User *U : Phi->users())
   1723     if (U != Cond && U != IncV) return false;
   1724 
   1725   for (User *U : IncV->users())
   1726     if (U != Cond && U != Phi) return false;
   1727   return true;
   1728 }
   1729 
   1730 /// Find an affine IV in canonical form.
   1731 ///
   1732 /// BECount may be an i8* pointer type. The pointer difference is already
   1733 /// valid count without scaling the address stride, so it remains a pointer
   1734 /// expression as far as SCEV is concerned.
   1735 ///
   1736 /// Currently only valid for LFTR. See the comments on hasConcreteDef below.
   1737 ///
   1738 /// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
   1739 ///
   1740 /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
   1741 /// This is difficult in general for SCEV because of potential overflow. But we
   1742 /// could at least handle constant BECounts.
   1743 static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
   1744                                 ScalarEvolution *SE, DominatorTree *DT) {
   1745   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
   1746 
   1747   Value *Cond =
   1748     cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
   1749 
   1750   // Loop over all of the PHI nodes, looking for a simple counter.
   1751   PHINode *BestPhi = nullptr;
   1752   const SCEV *BestInit = nullptr;
   1753   BasicBlock *LatchBlock = L->getLoopLatch();
   1754   assert(LatchBlock && "needsLFTR should guarantee a loop latch");
   1755   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
   1756 
   1757   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
   1758     PHINode *Phi = cast<PHINode>(I);
   1759     if (!SE->isSCEVable(Phi->getType()))
   1760       continue;
   1761 
   1762     // Avoid comparing an integer IV against a pointer Limit.
   1763     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
   1764       continue;
   1765 
   1766     const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
   1767     if (!AR || AR->getLoop() != L || !AR->isAffine())
   1768       continue;
   1769 
   1770     // AR may be a pointer type, while BECount is an integer type.
   1771     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
   1772     // AR may not be a narrower type, or we may never exit.
   1773     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
   1774     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
   1775       continue;
   1776 
   1777     const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
   1778     if (!Step || !Step->isOne())
   1779       continue;
   1780 
   1781     int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
   1782     Value *IncV = Phi->getIncomingValue(LatchIdx);
   1783     if (getLoopPhiForCounter(IncV, L, DT) != Phi)
   1784       continue;
   1785 
   1786     // Avoid reusing a potentially undef value to compute other values that may
   1787     // have originally had a concrete definition.
   1788     if (!hasConcreteDef(Phi)) {
   1789       // We explicitly allow unknown phis as long as they are already used by
   1790       // the loop test. In this case we assume that performing LFTR could not
   1791       // increase the number of undef users.
   1792       if (ICmpInst *Cond = getLoopTest(L)) {
   1793         if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT) &&
   1794             Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) {
   1795           continue;
   1796         }
   1797       }
   1798     }
   1799     const SCEV *Init = AR->getStart();
   1800 
   1801     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
   1802       // Don't force a live loop counter if another IV can be used.
   1803       if (AlmostDeadIV(Phi, LatchBlock, Cond))
   1804         continue;
   1805 
   1806       // Prefer to count-from-zero. This is a more "canonical" counter form. It
   1807       // also prefers integer to pointer IVs.
   1808       if (BestInit->isZero() != Init->isZero()) {
   1809         if (BestInit->isZero())
   1810           continue;
   1811       }
   1812       // If two IVs both count from zero or both count from nonzero then the
   1813       // narrower is likely a dead phi that has been widened. Use the wider phi
   1814       // to allow the other to be eliminated.
   1815       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
   1816         continue;
   1817     }
   1818     BestPhi = Phi;
   1819     BestInit = Init;
   1820   }
   1821   return BestPhi;
   1822 }
   1823 
   1824 /// Help linearFunctionTestReplace by generating a value that holds the RHS of
   1825 /// the new loop test.
   1826 static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
   1827                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
   1828   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
   1829   assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
   1830   const SCEV *IVInit = AR->getStart();
   1831 
   1832   // IVInit may be a pointer while IVCount is an integer when FindLoopCounter
   1833   // finds a valid pointer IV. Sign extend BECount in order to materialize a
   1834   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
   1835   // the existing GEPs whenever possible.
   1836   if (IndVar->getType()->isPointerTy() && !IVCount->getType()->isPointerTy()) {
   1837     // IVOffset will be the new GEP offset that is interpreted by GEP as a
   1838     // signed value. IVCount on the other hand represents the loop trip count,
   1839     // which is an unsigned value. FindLoopCounter only allows induction
   1840     // variables that have a positive unit stride of one. This means we don't
   1841     // have to handle the case of negative offsets (yet) and just need to zero
   1842     // extend IVCount.
   1843     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
   1844     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
   1845 
   1846     // Expand the code for the iteration count.
   1847     assert(SE->isLoopInvariant(IVOffset, L) &&
   1848            "Computed iteration count is not loop invariant!");
   1849     BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
   1850     Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
   1851 
   1852     Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
   1853     assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
   1854     // We could handle pointer IVs other than i8*, but we need to compensate for
   1855     // gep index scaling. See canExpandBackedgeTakenCount comments.
   1856     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
   1857                              cast<PointerType>(GEPBase->getType())
   1858                                  ->getElementType())->isOne() &&
   1859            "unit stride pointer IV must be i8*");
   1860 
   1861     IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
   1862     return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit");
   1863   } else {
   1864     // In any other case, convert both IVInit and IVCount to integers before
   1865     // comparing. This may result in SCEV expension of pointers, but in practice
   1866     // SCEV will fold the pointer arithmetic away as such:
   1867     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
   1868     //
   1869     // Valid Cases: (1) both integers is most common; (2) both may be pointers
   1870     // for simple memset-style loops.
   1871     //
   1872     // IVInit integer and IVCount pointer would only occur if a canonical IV
   1873     // were generated on top of case #2, which is not expected.
   1874 
   1875     const SCEV *IVLimit = nullptr;
   1876     // For unit stride, IVCount = Start + BECount with 2's complement overflow.
   1877     // For non-zero Start, compute IVCount here.
   1878     if (AR->getStart()->isZero())
   1879       IVLimit = IVCount;
   1880     else {
   1881       assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
   1882       const SCEV *IVInit = AR->getStart();
   1883 
   1884       // For integer IVs, truncate the IV before computing IVInit + BECount.
   1885       if (SE->getTypeSizeInBits(IVInit->getType())
   1886           > SE->getTypeSizeInBits(IVCount->getType()))
   1887         IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
   1888 
   1889       IVLimit = SE->getAddExpr(IVInit, IVCount);
   1890     }
   1891     // Expand the code for the iteration count.
   1892     BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
   1893     IRBuilder<> Builder(BI);
   1894     assert(SE->isLoopInvariant(IVLimit, L) &&
   1895            "Computed iteration count is not loop invariant!");
   1896     // Ensure that we generate the same type as IndVar, or a smaller integer
   1897     // type. In the presence of null pointer values, we have an integer type
   1898     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
   1899     Type *LimitTy = IVCount->getType()->isPointerTy() ?
   1900       IndVar->getType() : IVCount->getType();
   1901     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
   1902   }
   1903 }
   1904 
   1905 /// This method rewrites the exit condition of the loop to be a canonical !=
   1906 /// comparison against the incremented loop induction variable.  This pass is
   1907 /// able to rewrite the exit tests of any loop where the SCEV analysis can
   1908 /// determine a loop-invariant trip count of the loop, which is actually a much
   1909 /// broader range than just linear tests.
   1910 Value *IndVarSimplify::
   1911 linearFunctionTestReplace(Loop *L,
   1912                           const SCEV *BackedgeTakenCount,
   1913                           PHINode *IndVar,
   1914                           SCEVExpander &Rewriter) {
   1915   assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
   1916 
   1917   // Initialize CmpIndVar and IVCount to their preincremented values.
   1918   Value *CmpIndVar = IndVar;
   1919   const SCEV *IVCount = BackedgeTakenCount;
   1920 
   1921   // If the exiting block is the same as the backedge block, we prefer to
   1922   // compare against the post-incremented value, otherwise we must compare
   1923   // against the preincremented value.
   1924   if (L->getExitingBlock() == L->getLoopLatch()) {
   1925     // Add one to the "backedge-taken" count to get the trip count.
   1926     // This addition may overflow, which is valid as long as the comparison is
   1927     // truncated to BackedgeTakenCount->getType().
   1928     IVCount = SE->getAddExpr(BackedgeTakenCount,
   1929                              SE->getOne(BackedgeTakenCount->getType()));
   1930     // The BackedgeTaken expression contains the number of times that the
   1931     // backedge branches to the loop header.  This is one less than the
   1932     // number of times the loop executes, so use the incremented indvar.
   1933     CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
   1934   }
   1935 
   1936   Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
   1937   assert(ExitCnt->getType()->isPointerTy() ==
   1938              IndVar->getType()->isPointerTy() &&
   1939          "genLoopLimit missed a cast");
   1940 
   1941   // Insert a new icmp_ne or icmp_eq instruction before the branch.
   1942   BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
   1943   ICmpInst::Predicate P;
   1944   if (L->contains(BI->getSuccessor(0)))
   1945     P = ICmpInst::ICMP_NE;
   1946   else
   1947     P = ICmpInst::ICMP_EQ;
   1948 
   1949   DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
   1950                << "      LHS:" << *CmpIndVar << '\n'
   1951                << "       op:\t"
   1952                << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
   1953                << "      RHS:\t" << *ExitCnt << "\n"
   1954                << "  IVCount:\t" << *IVCount << "\n");
   1955 
   1956   IRBuilder<> Builder(BI);
   1957 
   1958   // LFTR can ignore IV overflow and truncate to the width of
   1959   // BECount. This avoids materializing the add(zext(add)) expression.
   1960   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
   1961   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
   1962   if (CmpIndVarSize > ExitCntSize) {
   1963     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
   1964     const SCEV *ARStart = AR->getStart();
   1965     const SCEV *ARStep = AR->getStepRecurrence(*SE);
   1966     // For constant IVCount, avoid truncation.
   1967     if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
   1968       const APInt &Start = cast<SCEVConstant>(ARStart)->getAPInt();
   1969       APInt Count = cast<SCEVConstant>(IVCount)->getAPInt();
   1970       // Note that the post-inc value of BackedgeTakenCount may have overflowed
   1971       // above such that IVCount is now zero.
   1972       if (IVCount != BackedgeTakenCount && Count == 0) {
   1973         Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize);
   1974         ++Count;
   1975       }
   1976       else
   1977         Count = Count.zext(CmpIndVarSize);
   1978       APInt NewLimit;
   1979       if (cast<SCEVConstant>(ARStep)->getValue()->isNegative())
   1980         NewLimit = Start - Count;
   1981       else
   1982         NewLimit = Start + Count;
   1983       ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit);
   1984 
   1985       DEBUG(dbgs() << "  Widen RHS:\t" << *ExitCnt << "\n");
   1986     } else {
   1987       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
   1988                                       "lftr.wideiv");
   1989     }
   1990   }
   1991   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
   1992   Value *OrigCond = BI->getCondition();
   1993   // It's tempting to use replaceAllUsesWith here to fully replace the old
   1994   // comparison, but that's not immediately safe, since users of the old
   1995   // comparison may not be dominated by the new comparison. Instead, just
   1996   // update the branch to use the new comparison; in the common case this
   1997   // will make old comparison dead.
   1998   BI->setCondition(Cond);
   1999   DeadInsts.push_back(OrigCond);
   2000 
   2001   ++NumLFTR;
   2002   Changed = true;
   2003   return Cond;
   2004 }
   2005 
   2006 //===----------------------------------------------------------------------===//
   2007 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
   2008 //===----------------------------------------------------------------------===//
   2009 
   2010 /// If there's a single exit block, sink any loop-invariant values that
   2011 /// were defined in the preheader but not used inside the loop into the
   2012 /// exit block to reduce register pressure in the loop.
   2013 void IndVarSimplify::sinkUnusedInvariants(Loop *L) {
   2014   BasicBlock *ExitBlock = L->getExitBlock();
   2015   if (!ExitBlock) return;
   2016 
   2017   BasicBlock *Preheader = L->getLoopPreheader();
   2018   if (!Preheader) return;
   2019 
   2020   Instruction *InsertPt = &*ExitBlock->getFirstInsertionPt();
   2021   BasicBlock::iterator I(Preheader->getTerminator());
   2022   while (I != Preheader->begin()) {
   2023     --I;
   2024     // New instructions were inserted at the end of the preheader.
   2025     if (isa<PHINode>(I))
   2026       break;
   2027 
   2028     // Don't move instructions which might have side effects, since the side
   2029     // effects need to complete before instructions inside the loop.  Also don't
   2030     // move instructions which might read memory, since the loop may modify
   2031     // memory. Note that it's okay if the instruction might have undefined
   2032     // behavior: LoopSimplify guarantees that the preheader dominates the exit
   2033     // block.
   2034     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
   2035       continue;
   2036 
   2037     // Skip debug info intrinsics.
   2038     if (isa<DbgInfoIntrinsic>(I))
   2039       continue;
   2040 
   2041     // Skip eh pad instructions.
   2042     if (I->isEHPad())
   2043       continue;
   2044 
   2045     // Don't sink alloca: we never want to sink static alloca's out of the
   2046     // entry block, and correctly sinking dynamic alloca's requires
   2047     // checks for stacksave/stackrestore intrinsics.
   2048     // FIXME: Refactor this check somehow?
   2049     if (isa<AllocaInst>(I))
   2050       continue;
   2051 
   2052     // Determine if there is a use in or before the loop (direct or
   2053     // otherwise).
   2054     bool UsedInLoop = false;
   2055     for (Use &U : I->uses()) {
   2056       Instruction *User = cast<Instruction>(U.getUser());
   2057       BasicBlock *UseBB = User->getParent();
   2058       if (PHINode *P = dyn_cast<PHINode>(User)) {
   2059         unsigned i =
   2060           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
   2061         UseBB = P->getIncomingBlock(i);
   2062       }
   2063       if (UseBB == Preheader || L->contains(UseBB)) {
   2064         UsedInLoop = true;
   2065         break;
   2066       }
   2067     }
   2068 
   2069     // If there is, the def must remain in the preheader.
   2070     if (UsedInLoop)
   2071       continue;
   2072 
   2073     // Otherwise, sink it to the exit block.
   2074     Instruction *ToMove = &*I;
   2075     bool Done = false;
   2076 
   2077     if (I != Preheader->begin()) {
   2078       // Skip debug info intrinsics.
   2079       do {
   2080         --I;
   2081       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
   2082 
   2083       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
   2084         Done = true;
   2085     } else {
   2086       Done = true;
   2087     }
   2088 
   2089     ToMove->moveBefore(InsertPt);
   2090     if (Done) break;
   2091     InsertPt = ToMove;
   2092   }
   2093 }
   2094 
   2095 //===----------------------------------------------------------------------===//
   2096 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
   2097 //===----------------------------------------------------------------------===//
   2098 
   2099 bool IndVarSimplify::run(Loop *L) {
   2100   // We need (and expect!) the incoming loop to be in LCSSA.
   2101   assert(L->isRecursivelyLCSSAForm(*DT) && "LCSSA required to run indvars!");
   2102 
   2103   // If LoopSimplify form is not available, stay out of trouble. Some notes:
   2104   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
   2105   //    canonicalization can be a pessimization without LSR to "clean up"
   2106   //    afterwards.
   2107   //  - We depend on having a preheader; in particular,
   2108   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
   2109   //    and we're in trouble if we can't find the induction variable even when
   2110   //    we've manually inserted one.
   2111   if (!L->isLoopSimplifyForm())
   2112     return false;
   2113 
   2114   // If there are any floating-point recurrences, attempt to
   2115   // transform them to use integer recurrences.
   2116   rewriteNonIntegerIVs(L);
   2117 
   2118   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
   2119 
   2120   // Create a rewriter object which we'll use to transform the code with.
   2121   SCEVExpander Rewriter(*SE, DL, "indvars");
   2122 #ifndef NDEBUG
   2123   Rewriter.setDebugType(DEBUG_TYPE);
   2124 #endif
   2125 
   2126   // Eliminate redundant IV users.
   2127   //
   2128   // Simplification works best when run before other consumers of SCEV. We
   2129   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
   2130   // other expressions involving loop IVs have been evaluated. This helps SCEV
   2131   // set no-wrap flags before normalizing sign/zero extension.
   2132   Rewriter.disableCanonicalMode();
   2133   simplifyAndExtend(L, Rewriter, LI);
   2134 
   2135   // Check to see if this loop has a computable loop-invariant execution count.
   2136   // If so, this means that we can compute the final value of any expressions
   2137   // that are recurrent in the loop, and substitute the exit values from the
   2138   // loop into any instructions outside of the loop that use the final values of
   2139   // the current expressions.
   2140   //
   2141   if (ReplaceExitValue != NeverRepl &&
   2142       !isa<SCEVCouldNotCompute>(BackedgeTakenCount))
   2143     rewriteLoopExitValues(L, Rewriter);
   2144 
   2145   // Eliminate redundant IV cycles.
   2146   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
   2147 
   2148   // If we have a trip count expression, rewrite the loop's exit condition
   2149   // using it.  We can currently only handle loops with a single exit.
   2150   if (canExpandBackedgeTakenCount(L, SE, Rewriter) && needsLFTR(L, DT)) {
   2151     PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
   2152     if (IndVar) {
   2153       // Check preconditions for proper SCEVExpander operation. SCEV does not
   2154       // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
   2155       // pass that uses the SCEVExpander must do it. This does not work well for
   2156       // loop passes because SCEVExpander makes assumptions about all loops,
   2157       // while LoopPassManager only forces the current loop to be simplified.
   2158       //
   2159       // FIXME: SCEV expansion has no way to bail out, so the caller must
   2160       // explicitly check any assumptions made by SCEV. Brittle.
   2161       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
   2162       if (!AR || AR->getLoop()->getLoopPreheader())
   2163         (void)linearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
   2164                                         Rewriter);
   2165     }
   2166   }
   2167   // Clear the rewriter cache, because values that are in the rewriter's cache
   2168   // can be deleted in the loop below, causing the AssertingVH in the cache to
   2169   // trigger.
   2170   Rewriter.clear();
   2171 
   2172   // Now that we're done iterating through lists, clean up any instructions
   2173   // which are now dead.
   2174   while (!DeadInsts.empty())
   2175     if (Instruction *Inst =
   2176             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
   2177       RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
   2178 
   2179   // The Rewriter may not be used from this point on.
   2180 
   2181   // Loop-invariant instructions in the preheader that aren't used in the
   2182   // loop may be sunk below the loop to reduce register pressure.
   2183   sinkUnusedInvariants(L);
   2184 
   2185   // rewriteFirstIterationLoopExitValues does not rely on the computation of
   2186   // trip count and therefore can further simplify exit values in addition to
   2187   // rewriteLoopExitValues.
   2188   rewriteFirstIterationLoopExitValues(L);
   2189 
   2190   // Clean up dead instructions.
   2191   Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
   2192 
   2193   // Check a post-condition.
   2194   assert(L->isRecursivelyLCSSAForm(*DT) && "Indvars did not preserve LCSSA!");
   2195 
   2196   // Verify that LFTR, and any other change have not interfered with SCEV's
   2197   // ability to compute trip count.
   2198 #ifndef NDEBUG
   2199   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
   2200     SE->forgetLoop(L);
   2201     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
   2202     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
   2203         SE->getTypeSizeInBits(NewBECount->getType()))
   2204       NewBECount = SE->getTruncateOrNoop(NewBECount,
   2205                                          BackedgeTakenCount->getType());
   2206     else
   2207       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
   2208                                                  NewBECount->getType());
   2209     assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
   2210   }
   2211 #endif
   2212 
   2213   return Changed;
   2214 }
   2215 
   2216 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, AnalysisManager<Loop> &AM) {
   2217   auto &FAM = AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
   2218   Function *F = L.getHeader()->getParent();
   2219   const DataLayout &DL = F->getParent()->getDataLayout();
   2220 
   2221   auto *LI = FAM.getCachedResult<LoopAnalysis>(*F);
   2222   auto *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(*F);
   2223   auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(*F);
   2224 
   2225   assert((LI && SE && DT) &&
   2226          "Analyses required for indvarsimplify not available!");
   2227 
   2228   // Optional analyses.
   2229   auto *TTI = FAM.getCachedResult<TargetIRAnalysis>(*F);
   2230   auto *TLI = FAM.getCachedResult<TargetLibraryAnalysis>(*F);
   2231 
   2232   IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
   2233   if (!IVS.run(&L))
   2234     return PreservedAnalyses::all();
   2235 
   2236   // FIXME: This should also 'preserve the CFG'.
   2237   return getLoopPassPreservedAnalyses();
   2238 }
   2239 
   2240 namespace {
   2241 struct IndVarSimplifyLegacyPass : public LoopPass {
   2242   static char ID; // Pass identification, replacement for typeid
   2243   IndVarSimplifyLegacyPass() : LoopPass(ID) {
   2244     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
   2245   }
   2246 
   2247   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
   2248     if (skipLoop(L))
   2249       return false;
   2250 
   2251     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   2252     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
   2253     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   2254     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
   2255     auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
   2256     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
   2257     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
   2258     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
   2259 
   2260     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
   2261     return IVS.run(L);
   2262   }
   2263 
   2264   void getAnalysisUsage(AnalysisUsage &AU) const override {
   2265     AU.setPreservesCFG();
   2266     getLoopAnalysisUsage(AU);
   2267   }
   2268 };
   2269 }
   2270 
   2271 char IndVarSimplifyLegacyPass::ID = 0;
   2272 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
   2273                       "Induction Variable Simplification", false, false)
   2274 INITIALIZE_PASS_DEPENDENCY(LoopPass)
   2275 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
   2276                     "Induction Variable Simplification", false, false)
   2277 
   2278 Pass *llvm::createIndVarSimplifyPass() {
   2279   return new IndVarSimplifyLegacyPass();
   2280 }
   2281