Home | History | Annotate | Download | only in Scalar
      1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This transformation analyzes and transforms the induction variables (and
     11 // computations derived from them) into simpler forms suitable for subsequent
     12 // analysis and transformation.
     13 //
     14 // If the trip count of a loop is computable, this pass also makes the following
     15 // changes:
     16 //   1. The exit condition for the loop is canonicalized to compare the
     17 //      induction value against the exit value.  This turns loops like:
     18 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
     19 //   2. Any use outside of the loop of an expression derived from the indvar
     20 //      is changed to compute the derived value outside of the loop, eliminating
     21 //      the dependence on the exit value of the induction variable.  If the only
     22 //      purpose of the loop is to compute the exit value of some derived
     23 //      expression, this transformation will make the loop dead.
     24 //
     25 //===----------------------------------------------------------------------===//
     26 
     27 #define DEBUG_TYPE "indvars"
     28 #include "llvm/Transforms/Scalar.h"
     29 #include "llvm/BasicBlock.h"
     30 #include "llvm/Constants.h"
     31 #include "llvm/Instructions.h"
     32 #include "llvm/IntrinsicInst.h"
     33 #include "llvm/LLVMContext.h"
     34 #include "llvm/Type.h"
     35 #include "llvm/Analysis/Dominators.h"
     36 #include "llvm/Analysis/IVUsers.h"
     37 #include "llvm/Analysis/ScalarEvolutionExpander.h"
     38 #include "llvm/Analysis/LoopInfo.h"
     39 #include "llvm/Analysis/LoopPass.h"
     40 #include "llvm/Support/CFG.h"
     41 #include "llvm/Support/CommandLine.h"
     42 #include "llvm/Support/Debug.h"
     43 #include "llvm/Support/raw_ostream.h"
     44 #include "llvm/Transforms/Utils/Local.h"
     45 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
     46 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
     47 #include "llvm/Target/TargetData.h"
     48 #include "llvm/ADT/DenseMap.h"
     49 #include "llvm/ADT/SmallVector.h"
     50 #include "llvm/ADT/Statistic.h"
     51 using namespace llvm;
     52 
     53 STATISTIC(NumRemoved     , "Number of aux indvars removed");
     54 STATISTIC(NumWidened     , "Number of indvars widened");
     55 STATISTIC(NumInserted    , "Number of canonical indvars added");
     56 STATISTIC(NumReplaced    , "Number of exit values replaced");
     57 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
     58 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
     59 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
     60 
     61 namespace llvm {
     62   cl::opt<bool> EnableIVRewrite(
     63     "enable-iv-rewrite", cl::Hidden,
     64     cl::desc("Enable canonical induction variable rewriting"));
     65 
     66   // Trip count verification can be enabled by default under NDEBUG if we
     67   // implement a strong expression equivalence checker in SCEV. Until then, we
     68   // use the verify-indvars flag, which may assert in some cases.
     69   cl::opt<bool> VerifyIndvars(
     70     "verify-indvars", cl::Hidden,
     71     cl::desc("Verify the ScalarEvolution result after running indvars"));
     72 }
     73 
     74 namespace {
     75   class IndVarSimplify : public LoopPass {
     76     IVUsers         *IU;
     77     LoopInfo        *LI;
     78     ScalarEvolution *SE;
     79     DominatorTree   *DT;
     80     TargetData      *TD;
     81 
     82     SmallVector<WeakVH, 16> DeadInsts;
     83     bool Changed;
     84   public:
     85 
     86     static char ID; // Pass identification, replacement for typeid
     87     IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0),
     88                        Changed(false) {
     89       initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
     90     }
     91 
     92     virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
     93 
     94     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
     95       AU.addRequired<DominatorTree>();
     96       AU.addRequired<LoopInfo>();
     97       AU.addRequired<ScalarEvolution>();
     98       AU.addRequiredID(LoopSimplifyID);
     99       AU.addRequiredID(LCSSAID);
    100       if (EnableIVRewrite)
    101         AU.addRequired<IVUsers>();
    102       AU.addPreserved<ScalarEvolution>();
    103       AU.addPreservedID(LoopSimplifyID);
    104       AU.addPreservedID(LCSSAID);
    105       if (EnableIVRewrite)
    106         AU.addPreserved<IVUsers>();
    107       AU.setPreservesCFG();
    108     }
    109 
    110   private:
    111     virtual void releaseMemory() {
    112       DeadInsts.clear();
    113     }
    114 
    115     bool isValidRewrite(Value *FromVal, Value *ToVal);
    116 
    117     void HandleFloatingPointIV(Loop *L, PHINode *PH);
    118     void RewriteNonIntegerIVs(Loop *L);
    119 
    120     void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM);
    121 
    122     void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
    123 
    124     void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
    125 
    126     Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
    127                                      PHINode *IndVar, SCEVExpander &Rewriter);
    128 
    129     void SinkUnusedInvariants(Loop *L);
    130   };
    131 }
    132 
    133 char IndVarSimplify::ID = 0;
    134 INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
    135                 "Induction Variable Simplification", false, false)
    136 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
    137 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
    138 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
    139 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
    140 INITIALIZE_PASS_DEPENDENCY(LCSSA)
    141 INITIALIZE_PASS_DEPENDENCY(IVUsers)
    142 INITIALIZE_PASS_END(IndVarSimplify, "indvars",
    143                 "Induction Variable Simplification", false, false)
    144 
    145 Pass *llvm::createIndVarSimplifyPass() {
    146   return new IndVarSimplify();
    147 }
    148 
    149 /// isValidRewrite - Return true if the SCEV expansion generated by the
    150 /// rewriter can replace the original value. SCEV guarantees that it
    151 /// produces the same value, but the way it is produced may be illegal IR.
    152 /// Ideally, this function will only be called for verification.
    153 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
    154   // If an SCEV expression subsumed multiple pointers, its expansion could
    155   // reassociate the GEP changing the base pointer. This is illegal because the
    156   // final address produced by a GEP chain must be inbounds relative to its
    157   // underlying object. Otherwise basic alias analysis, among other things,
    158   // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
    159   // producing an expression involving multiple pointers. Until then, we must
    160   // bail out here.
    161   //
    162   // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
    163   // because it understands lcssa phis while SCEV does not.
    164   Value *FromPtr = FromVal;
    165   Value *ToPtr = ToVal;
    166   if (GEPOperator *GEP = dyn_cast<GEPOperator>(FromVal)) {
    167     FromPtr = GEP->getPointerOperand();
    168   }
    169   if (GEPOperator *GEP = dyn_cast<GEPOperator>(ToVal)) {
    170     ToPtr = GEP->getPointerOperand();
    171   }
    172   if (FromPtr != FromVal || ToPtr != ToVal) {
    173     // Quickly check the common case
    174     if (FromPtr == ToPtr)
    175       return true;
    176 
    177     // SCEV may have rewritten an expression that produces the GEP's pointer
    178     // operand. That's ok as long as the pointer operand has the same base
    179     // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
    180     // base of a recurrence. This handles the case in which SCEV expansion
    181     // converts a pointer type recurrence into a nonrecurrent pointer base
    182     // indexed by an integer recurrence.
    183     const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
    184     const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
    185     if (FromBase == ToBase)
    186       return true;
    187 
    188     DEBUG(dbgs() << "INDVARS: GEP rewrite bail out "
    189           << *FromBase << " != " << *ToBase << "\n");
    190 
    191     return false;
    192   }
    193   return true;
    194 }
    195 
    196 /// Determine the insertion point for this user. By default, insert immediately
    197 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
    198 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
    199 /// common dominator for the incoming blocks.
    200 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
    201                                           DominatorTree *DT) {
    202   PHINode *PHI = dyn_cast<PHINode>(User);
    203   if (!PHI)
    204     return User;
    205 
    206   Instruction *InsertPt = 0;
    207   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
    208     if (PHI->getIncomingValue(i) != Def)
    209       continue;
    210 
    211     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
    212     if (!InsertPt) {
    213       InsertPt = InsertBB->getTerminator();
    214       continue;
    215     }
    216     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
    217     InsertPt = InsertBB->getTerminator();
    218   }
    219   assert(InsertPt && "Missing phi operand");
    220   assert((!isa<Instruction>(Def) ||
    221           DT->dominates(cast<Instruction>(Def), InsertPt)) &&
    222          "def does not dominate all uses");
    223   return InsertPt;
    224 }
    225 
    226 //===----------------------------------------------------------------------===//
    227 // RewriteNonIntegerIVs and helpers. Prefer integer IVs.
    228 //===----------------------------------------------------------------------===//
    229 
    230 /// ConvertToSInt - Convert APF to an integer, if possible.
    231 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
    232   bool isExact = false;
    233   if (&APF.getSemantics() == &APFloat::PPCDoubleDouble)
    234     return false;
    235   // See if we can convert this to an int64_t
    236   uint64_t UIntVal;
    237   if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
    238                            &isExact) != APFloat::opOK || !isExact)
    239     return false;
    240   IntVal = UIntVal;
    241   return true;
    242 }
    243 
    244 /// HandleFloatingPointIV - If the loop has floating induction variable
    245 /// then insert corresponding integer induction variable if possible.
    246 /// For example,
    247 /// for(double i = 0; i < 10000; ++i)
    248 ///   bar(i)
    249 /// is converted into
    250 /// for(int i = 0; i < 10000; ++i)
    251 ///   bar((double)i);
    252 ///
    253 void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
    254   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
    255   unsigned BackEdge     = IncomingEdge^1;
    256 
    257   // Check incoming value.
    258   ConstantFP *InitValueVal =
    259     dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
    260 
    261   int64_t InitValue;
    262   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
    263     return;
    264 
    265   // Check IV increment. Reject this PN if increment operation is not
    266   // an add or increment value can not be represented by an integer.
    267   BinaryOperator *Incr =
    268     dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
    269   if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
    270 
    271   // If this is not an add of the PHI with a constantfp, or if the constant fp
    272   // is not an integer, bail out.
    273   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
    274   int64_t IncValue;
    275   if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
    276       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
    277     return;
    278 
    279   // Check Incr uses. One user is PN and the other user is an exit condition
    280   // used by the conditional terminator.
    281   Value::use_iterator IncrUse = Incr->use_begin();
    282   Instruction *U1 = cast<Instruction>(*IncrUse++);
    283   if (IncrUse == Incr->use_end()) return;
    284   Instruction *U2 = cast<Instruction>(*IncrUse++);
    285   if (IncrUse != Incr->use_end()) return;
    286 
    287   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
    288   // only used by a branch, we can't transform it.
    289   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
    290   if (!Compare)
    291     Compare = dyn_cast<FCmpInst>(U2);
    292   if (Compare == 0 || !Compare->hasOneUse() ||
    293       !isa<BranchInst>(Compare->use_back()))
    294     return;
    295 
    296   BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
    297 
    298   // We need to verify that the branch actually controls the iteration count
    299   // of the loop.  If not, the new IV can overflow and no one will notice.
    300   // The branch block must be in the loop and one of the successors must be out
    301   // of the loop.
    302   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
    303   if (!L->contains(TheBr->getParent()) ||
    304       (L->contains(TheBr->getSuccessor(0)) &&
    305        L->contains(TheBr->getSuccessor(1))))
    306     return;
    307 
    308 
    309   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
    310   // transform it.
    311   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
    312   int64_t ExitValue;
    313   if (ExitValueVal == 0 ||
    314       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
    315     return;
    316 
    317   // Find new predicate for integer comparison.
    318   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
    319   switch (Compare->getPredicate()) {
    320   default: return;  // Unknown comparison.
    321   case CmpInst::FCMP_OEQ:
    322   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
    323   case CmpInst::FCMP_ONE:
    324   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
    325   case CmpInst::FCMP_OGT:
    326   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
    327   case CmpInst::FCMP_OGE:
    328   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
    329   case CmpInst::FCMP_OLT:
    330   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
    331   case CmpInst::FCMP_OLE:
    332   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
    333   }
    334 
    335   // We convert the floating point induction variable to a signed i32 value if
    336   // we can.  This is only safe if the comparison will not overflow in a way
    337   // that won't be trapped by the integer equivalent operations.  Check for this
    338   // now.
    339   // TODO: We could use i64 if it is native and the range requires it.
    340 
    341   // The start/stride/exit values must all fit in signed i32.
    342   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
    343     return;
    344 
    345   // If not actually striding (add x, 0.0), avoid touching the code.
    346   if (IncValue == 0)
    347     return;
    348 
    349   // Positive and negative strides have different safety conditions.
    350   if (IncValue > 0) {
    351     // If we have a positive stride, we require the init to be less than the
    352     // exit value.
    353     if (InitValue >= ExitValue)
    354       return;
    355 
    356     uint32_t Range = uint32_t(ExitValue-InitValue);
    357     // Check for infinite loop, either:
    358     // while (i <= Exit) or until (i > Exit)
    359     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
    360       if (++Range == 0) return;  // Range overflows.
    361     }
    362 
    363     unsigned Leftover = Range % uint32_t(IncValue);
    364 
    365     // If this is an equality comparison, we require that the strided value
    366     // exactly land on the exit value, otherwise the IV condition will wrap
    367     // around and do things the fp IV wouldn't.
    368     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
    369         Leftover != 0)
    370       return;
    371 
    372     // If the stride would wrap around the i32 before exiting, we can't
    373     // transform the IV.
    374     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
    375       return;
    376 
    377   } else {
    378     // If we have a negative stride, we require the init to be greater than the
    379     // exit value.
    380     if (InitValue <= ExitValue)
    381       return;
    382 
    383     uint32_t Range = uint32_t(InitValue-ExitValue);
    384     // Check for infinite loop, either:
    385     // while (i >= Exit) or until (i < Exit)
    386     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
    387       if (++Range == 0) return;  // Range overflows.
    388     }
    389 
    390     unsigned Leftover = Range % uint32_t(-IncValue);
    391 
    392     // If this is an equality comparison, we require that the strided value
    393     // exactly land on the exit value, otherwise the IV condition will wrap
    394     // around and do things the fp IV wouldn't.
    395     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
    396         Leftover != 0)
    397       return;
    398 
    399     // If the stride would wrap around the i32 before exiting, we can't
    400     // transform the IV.
    401     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
    402       return;
    403   }
    404 
    405   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
    406 
    407   // Insert new integer induction variable.
    408   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
    409   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
    410                       PN->getIncomingBlock(IncomingEdge));
    411 
    412   Value *NewAdd =
    413     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
    414                               Incr->getName()+".int", Incr);
    415   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
    416 
    417   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
    418                                       ConstantInt::get(Int32Ty, ExitValue),
    419                                       Compare->getName());
    420 
    421   // In the following deletions, PN may become dead and may be deleted.
    422   // Use a WeakVH to observe whether this happens.
    423   WeakVH WeakPH = PN;
    424 
    425   // Delete the old floating point exit comparison.  The branch starts using the
    426   // new comparison.
    427   NewCompare->takeName(Compare);
    428   Compare->replaceAllUsesWith(NewCompare);
    429   RecursivelyDeleteTriviallyDeadInstructions(Compare);
    430 
    431   // Delete the old floating point increment.
    432   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
    433   RecursivelyDeleteTriviallyDeadInstructions(Incr);
    434 
    435   // If the FP induction variable still has uses, this is because something else
    436   // in the loop uses its value.  In order to canonicalize the induction
    437   // variable, we chose to eliminate the IV and rewrite it in terms of an
    438   // int->fp cast.
    439   //
    440   // We give preference to sitofp over uitofp because it is faster on most
    441   // platforms.
    442   if (WeakPH) {
    443     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
    444                                  PN->getParent()->getFirstInsertionPt());
    445     PN->replaceAllUsesWith(Conv);
    446     RecursivelyDeleteTriviallyDeadInstructions(PN);
    447   }
    448 
    449   // Add a new IVUsers entry for the newly-created integer PHI.
    450   if (IU)
    451     IU->AddUsersIfInteresting(NewPHI);
    452 
    453   Changed = true;
    454 }
    455 
    456 void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
    457   // First step.  Check to see if there are any floating-point recurrences.
    458   // If there are, change them into integer recurrences, permitting analysis by
    459   // the SCEV routines.
    460   //
    461   BasicBlock *Header = L->getHeader();
    462 
    463   SmallVector<WeakVH, 8> PHIs;
    464   for (BasicBlock::iterator I = Header->begin();
    465        PHINode *PN = dyn_cast<PHINode>(I); ++I)
    466     PHIs.push_back(PN);
    467 
    468   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
    469     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
    470       HandleFloatingPointIV(L, PN);
    471 
    472   // If the loop previously had floating-point IV, ScalarEvolution
    473   // may not have been able to compute a trip count. Now that we've done some
    474   // re-writing, the trip count may be computable.
    475   if (Changed)
    476     SE->forgetLoop(L);
    477 }
    478 
    479 //===----------------------------------------------------------------------===//
    480 // RewriteLoopExitValues - Optimize IV users outside the loop.
    481 // As a side effect, reduces the amount of IV processing within the loop.
    482 //===----------------------------------------------------------------------===//
    483 
    484 /// RewriteLoopExitValues - Check to see if this loop has a computable
    485 /// loop-invariant execution count.  If so, this means that we can compute the
    486 /// final value of any expressions that are recurrent in the loop, and
    487 /// substitute the exit values from the loop into any instructions outside of
    488 /// the loop that use the final values of the current expressions.
    489 ///
    490 /// This is mostly redundant with the regular IndVarSimplify activities that
    491 /// happen later, except that it's more powerful in some cases, because it's
    492 /// able to brute-force evaluate arbitrary instructions as long as they have
    493 /// constant operands at the beginning of the loop.
    494 void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
    495   // Verify the input to the pass in already in LCSSA form.
    496   assert(L->isLCSSAForm(*DT));
    497 
    498   SmallVector<BasicBlock*, 8> ExitBlocks;
    499   L->getUniqueExitBlocks(ExitBlocks);
    500 
    501   // Find all values that are computed inside the loop, but used outside of it.
    502   // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
    503   // the exit blocks of the loop to find them.
    504   for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
    505     BasicBlock *ExitBB = ExitBlocks[i];
    506 
    507     // If there are no PHI nodes in this exit block, then no values defined
    508     // inside the loop are used on this path, skip it.
    509     PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
    510     if (!PN) continue;
    511 
    512     unsigned NumPreds = PN->getNumIncomingValues();
    513 
    514     // Iterate over all of the PHI nodes.
    515     BasicBlock::iterator BBI = ExitBB->begin();
    516     while ((PN = dyn_cast<PHINode>(BBI++))) {
    517       if (PN->use_empty())
    518         continue; // dead use, don't replace it
    519 
    520       // SCEV only supports integer expressions for now.
    521       if (!PN->getType()->isIntegerTy() && !PN->getType()->isPointerTy())
    522         continue;
    523 
    524       // It's necessary to tell ScalarEvolution about this explicitly so that
    525       // it can walk the def-use list and forget all SCEVs, as it may not be
    526       // watching the PHI itself. Once the new exit value is in place, there
    527       // may not be a def-use connection between the loop and every instruction
    528       // which got a SCEVAddRecExpr for that loop.
    529       SE->forgetValue(PN);
    530 
    531       // Iterate over all of the values in all the PHI nodes.
    532       for (unsigned i = 0; i != NumPreds; ++i) {
    533         // If the value being merged in is not integer or is not defined
    534         // in the loop, skip it.
    535         Value *InVal = PN->getIncomingValue(i);
    536         if (!isa<Instruction>(InVal))
    537           continue;
    538 
    539         // If this pred is for a subloop, not L itself, skip it.
    540         if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
    541           continue; // The Block is in a subloop, skip it.
    542 
    543         // Check that InVal is defined in the loop.
    544         Instruction *Inst = cast<Instruction>(InVal);
    545         if (!L->contains(Inst))
    546           continue;
    547 
    548         // Okay, this instruction has a user outside of the current loop
    549         // and varies predictably *inside* the loop.  Evaluate the value it
    550         // contains when the loop exits, if possible.
    551         const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
    552         if (!SE->isLoopInvariant(ExitValue, L))
    553           continue;
    554 
    555         Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
    556 
    557         DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
    558                      << "  LoopVal = " << *Inst << "\n");
    559 
    560         if (!isValidRewrite(Inst, ExitVal)) {
    561           DeadInsts.push_back(ExitVal);
    562           continue;
    563         }
    564         Changed = true;
    565         ++NumReplaced;
    566 
    567         PN->setIncomingValue(i, ExitVal);
    568 
    569         // If this instruction is dead now, delete it.
    570         RecursivelyDeleteTriviallyDeadInstructions(Inst);
    571 
    572         if (NumPreds == 1) {
    573           // Completely replace a single-pred PHI. This is safe, because the
    574           // NewVal won't be variant in the loop, so we don't need an LCSSA phi
    575           // node anymore.
    576           PN->replaceAllUsesWith(ExitVal);
    577           RecursivelyDeleteTriviallyDeadInstructions(PN);
    578         }
    579       }
    580       if (NumPreds != 1) {
    581         // Clone the PHI and delete the original one. This lets IVUsers and
    582         // any other maps purge the original user from their records.
    583         PHINode *NewPN = cast<PHINode>(PN->clone());
    584         NewPN->takeName(PN);
    585         NewPN->insertBefore(PN);
    586         PN->replaceAllUsesWith(NewPN);
    587         PN->eraseFromParent();
    588       }
    589     }
    590   }
    591 
    592   // The insertion point instruction may have been deleted; clear it out
    593   // so that the rewriter doesn't trip over it later.
    594   Rewriter.clearInsertPoint();
    595 }
    596 
    597 //===----------------------------------------------------------------------===//
    598 //  Rewrite IV users based on a canonical IV.
    599 //  Only for use with -enable-iv-rewrite.
    600 //===----------------------------------------------------------------------===//
    601 
    602 /// FIXME: It is an extremely bad idea to indvar substitute anything more
    603 /// complex than affine induction variables.  Doing so will put expensive
    604 /// polynomial evaluations inside of the loop, and the str reduction pass
    605 /// currently can only reduce affine polynomials.  For now just disable
    606 /// indvar subst on anything more complex than an affine addrec, unless
    607 /// it can be expanded to a trivial value.
    608 static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
    609   // Loop-invariant values are safe.
    610   if (SE->isLoopInvariant(S, L)) return true;
    611 
    612   // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
    613   // to transform them into efficient code.
    614   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
    615     return AR->isAffine();
    616 
    617   // An add is safe it all its operands are safe.
    618   if (const SCEVCommutativeExpr *Commutative
    619       = dyn_cast<SCEVCommutativeExpr>(S)) {
    620     for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
    621          E = Commutative->op_end(); I != E; ++I)
    622       if (!isSafe(*I, L, SE)) return false;
    623     return true;
    624   }
    625 
    626   // A cast is safe if its operand is.
    627   if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
    628     return isSafe(C->getOperand(), L, SE);
    629 
    630   // A udiv is safe if its operands are.
    631   if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
    632     return isSafe(UD->getLHS(), L, SE) &&
    633            isSafe(UD->getRHS(), L, SE);
    634 
    635   // SCEVUnknown is always safe.
    636   if (isa<SCEVUnknown>(S))
    637     return true;
    638 
    639   // Nothing else is safe.
    640   return false;
    641 }
    642 
    643 void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
    644   // Rewrite all induction variable expressions in terms of the canonical
    645   // induction variable.
    646   //
    647   // If there were induction variables of other sizes or offsets, manually
    648   // add the offsets to the primary induction variable and cast, avoiding
    649   // the need for the code evaluation methods to insert induction variables
    650   // of different sizes.
    651   for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
    652     Value *Op = UI->getOperandValToReplace();
    653     Type *UseTy = Op->getType();
    654     Instruction *User = UI->getUser();
    655 
    656     // Compute the final addrec to expand into code.
    657     const SCEV *AR = IU->getReplacementExpr(*UI);
    658 
    659     // Evaluate the expression out of the loop, if possible.
    660     if (!L->contains(UI->getUser())) {
    661       const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
    662       if (SE->isLoopInvariant(ExitVal, L))
    663         AR = ExitVal;
    664     }
    665 
    666     // FIXME: It is an extremely bad idea to indvar substitute anything more
    667     // complex than affine induction variables.  Doing so will put expensive
    668     // polynomial evaluations inside of the loop, and the str reduction pass
    669     // currently can only reduce affine polynomials.  For now just disable
    670     // indvar subst on anything more complex than an affine addrec, unless
    671     // it can be expanded to a trivial value.
    672     if (!isSafe(AR, L, SE))
    673       continue;
    674 
    675     // Determine the insertion point for this user. By default, insert
    676     // immediately before the user. The SCEVExpander class will automatically
    677     // hoist loop invariants out of the loop. For PHI nodes, there may be
    678     // multiple uses, so compute the nearest common dominator for the
    679     // incoming blocks.
    680     Instruction *InsertPt = getInsertPointForUses(User, Op, DT);
    681 
    682     // Now expand it into actual Instructions and patch it into place.
    683     Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
    684 
    685     DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
    686                  << "   into = " << *NewVal << "\n");
    687 
    688     if (!isValidRewrite(Op, NewVal)) {
    689       DeadInsts.push_back(NewVal);
    690       continue;
    691     }
    692     // Inform ScalarEvolution that this value is changing. The change doesn't
    693     // affect its value, but it does potentially affect which use lists the
    694     // value will be on after the replacement, which affects ScalarEvolution's
    695     // ability to walk use lists and drop dangling pointers when a value is
    696     // deleted.
    697     SE->forgetValue(User);
    698 
    699     // Patch the new value into place.
    700     if (Op->hasName())
    701       NewVal->takeName(Op);
    702     if (Instruction *NewValI = dyn_cast<Instruction>(NewVal))
    703       NewValI->setDebugLoc(User->getDebugLoc());
    704     User->replaceUsesOfWith(Op, NewVal);
    705     UI->setOperandValToReplace(NewVal);
    706 
    707     ++NumRemoved;
    708     Changed = true;
    709 
    710     // The old value may be dead now.
    711     DeadInsts.push_back(Op);
    712   }
    713 }
    714 
    715 //===----------------------------------------------------------------------===//
    716 //  IV Widening - Extend the width of an IV to cover its widest uses.
    717 //===----------------------------------------------------------------------===//
    718 
    719 namespace {
    720   // Collect information about induction variables that are used by sign/zero
    721   // extend operations. This information is recorded by CollectExtend and
    722   // provides the input to WidenIV.
    723   struct WideIVInfo {
    724     PHINode *NarrowIV;
    725     Type *WidestNativeType; // Widest integer type created [sz]ext
    726     bool IsSigned;          // Was an sext user seen before a zext?
    727 
    728     WideIVInfo() : NarrowIV(0), WidestNativeType(0), IsSigned(false) {}
    729   };
    730 
    731   class WideIVVisitor : public IVVisitor {
    732     ScalarEvolution *SE;
    733     const TargetData *TD;
    734 
    735   public:
    736     WideIVInfo WI;
    737 
    738     WideIVVisitor(PHINode *NarrowIV, ScalarEvolution *SCEV,
    739                   const TargetData *TData) :
    740       SE(SCEV), TD(TData) { WI.NarrowIV = NarrowIV; }
    741 
    742     // Implement the interface used by simplifyUsersOfIV.
    743     virtual void visitCast(CastInst *Cast);
    744   };
    745 }
    746 
    747 /// visitCast - Update information about the induction variable that is
    748 /// extended by this sign or zero extend operation. This is used to determine
    749 /// the final width of the IV before actually widening it.
    750 void WideIVVisitor::visitCast(CastInst *Cast) {
    751   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
    752   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
    753     return;
    754 
    755   Type *Ty = Cast->getType();
    756   uint64_t Width = SE->getTypeSizeInBits(Ty);
    757   if (TD && !TD->isLegalInteger(Width))
    758     return;
    759 
    760   if (!WI.WidestNativeType) {
    761     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
    762     WI.IsSigned = IsSigned;
    763     return;
    764   }
    765 
    766   // We extend the IV to satisfy the sign of its first user, arbitrarily.
    767   if (WI.IsSigned != IsSigned)
    768     return;
    769 
    770   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
    771     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
    772 }
    773 
    774 namespace {
    775 
    776 /// NarrowIVDefUse - Record a link in the Narrow IV def-use chain along with the
    777 /// WideIV that computes the same value as the Narrow IV def.  This avoids
    778 /// caching Use* pointers.
    779 struct NarrowIVDefUse {
    780   Instruction *NarrowDef;
    781   Instruction *NarrowUse;
    782   Instruction *WideDef;
    783 
    784   NarrowIVDefUse(): NarrowDef(0), NarrowUse(0), WideDef(0) {}
    785 
    786   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD):
    787     NarrowDef(ND), NarrowUse(NU), WideDef(WD) {}
    788 };
    789 
    790 /// WidenIV - The goal of this transform is to remove sign and zero extends
    791 /// without creating any new induction variables. To do this, it creates a new
    792 /// phi of the wider type and redirects all users, either removing extends or
    793 /// inserting truncs whenever we stop propagating the type.
    794 ///
    795 class WidenIV {
    796   // Parameters
    797   PHINode *OrigPhi;
    798   Type *WideType;
    799   bool IsSigned;
    800 
    801   // Context
    802   LoopInfo        *LI;
    803   Loop            *L;
    804   ScalarEvolution *SE;
    805   DominatorTree   *DT;
    806 
    807   // Result
    808   PHINode *WidePhi;
    809   Instruction *WideInc;
    810   const SCEV *WideIncExpr;
    811   SmallVectorImpl<WeakVH> &DeadInsts;
    812 
    813   SmallPtrSet<Instruction*,16> Widened;
    814   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
    815 
    816 public:
    817   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo,
    818           ScalarEvolution *SEv, DominatorTree *DTree,
    819           SmallVectorImpl<WeakVH> &DI) :
    820     OrigPhi(WI.NarrowIV),
    821     WideType(WI.WidestNativeType),
    822     IsSigned(WI.IsSigned),
    823     LI(LInfo),
    824     L(LI->getLoopFor(OrigPhi->getParent())),
    825     SE(SEv),
    826     DT(DTree),
    827     WidePhi(0),
    828     WideInc(0),
    829     WideIncExpr(0),
    830     DeadInsts(DI) {
    831     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
    832   }
    833 
    834   PHINode *CreateWideIV(SCEVExpander &Rewriter);
    835 
    836 protected:
    837   Value *getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
    838                    Instruction *Use);
    839 
    840   Instruction *CloneIVUser(NarrowIVDefUse DU);
    841 
    842   const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse);
    843 
    844   const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU);
    845 
    846   Instruction *WidenIVUse(NarrowIVDefUse DU);
    847 
    848   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
    849 };
    850 } // anonymous namespace
    851 
    852 /// isLoopInvariant - Perform a quick domtree based check for loop invariance
    853 /// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems
    854 /// gratuitous for this purpose.
    855 static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
    856   Instruction *Inst = dyn_cast<Instruction>(V);
    857   if (!Inst)
    858     return true;
    859 
    860   return DT->properlyDominates(Inst->getParent(), L->getHeader());
    861 }
    862 
    863 Value *WidenIV::getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
    864                           Instruction *Use) {
    865   // Set the debug location and conservative insertion point.
    866   IRBuilder<> Builder(Use);
    867   // Hoist the insertion point into loop preheaders as far as possible.
    868   for (const Loop *L = LI->getLoopFor(Use->getParent());
    869        L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT);
    870        L = L->getParentLoop())
    871     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
    872 
    873   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
    874                     Builder.CreateZExt(NarrowOper, WideType);
    875 }
    876 
    877 /// CloneIVUser - Instantiate a wide operation to replace a narrow
    878 /// operation. This only needs to handle operations that can evaluation to
    879 /// SCEVAddRec. It can safely return 0 for any operation we decide not to clone.
    880 Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
    881   unsigned Opcode = DU.NarrowUse->getOpcode();
    882   switch (Opcode) {
    883   default:
    884     return 0;
    885   case Instruction::Add:
    886   case Instruction::Mul:
    887   case Instruction::UDiv:
    888   case Instruction::Sub:
    889   case Instruction::And:
    890   case Instruction::Or:
    891   case Instruction::Xor:
    892   case Instruction::Shl:
    893   case Instruction::LShr:
    894   case Instruction::AShr:
    895     DEBUG(dbgs() << "Cloning IVUser: " << *DU.NarrowUse << "\n");
    896 
    897     // Replace NarrowDef operands with WideDef. Otherwise, we don't know
    898     // anything about the narrow operand yet so must insert a [sz]ext. It is
    899     // probably loop invariant and will be folded or hoisted. If it actually
    900     // comes from a widened IV, it should be removed during a future call to
    901     // WidenIVUse.
    902     Value *LHS = (DU.NarrowUse->getOperand(0) == DU.NarrowDef) ? DU.WideDef :
    903       getExtend(DU.NarrowUse->getOperand(0), WideType, IsSigned, DU.NarrowUse);
    904     Value *RHS = (DU.NarrowUse->getOperand(1) == DU.NarrowDef) ? DU.WideDef :
    905       getExtend(DU.NarrowUse->getOperand(1), WideType, IsSigned, DU.NarrowUse);
    906 
    907     BinaryOperator *NarrowBO = cast<BinaryOperator>(DU.NarrowUse);
    908     BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(),
    909                                                     LHS, RHS,
    910                                                     NarrowBO->getName());
    911     IRBuilder<> Builder(DU.NarrowUse);
    912     Builder.Insert(WideBO);
    913     if (const OverflowingBinaryOperator *OBO =
    914         dyn_cast<OverflowingBinaryOperator>(NarrowBO)) {
    915       if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();
    916       if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();
    917     }
    918     return WideBO;
    919   }
    920   llvm_unreachable(0);
    921 }
    922 
    923 /// No-wrap operations can transfer sign extension of their result to their
    924 /// operands. Generate the SCEV value for the widened operation without
    925 /// actually modifying the IR yet. If the expression after extending the
    926 /// operands is an AddRec for this loop, return it.
    927 const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
    928   // Handle the common case of add<nsw/nuw>
    929   if (DU.NarrowUse->getOpcode() != Instruction::Add)
    930     return 0;
    931 
    932   // One operand (NarrowDef) has already been extended to WideDef. Now determine
    933   // if extending the other will lead to a recurrence.
    934   unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
    935   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
    936 
    937   const SCEV *ExtendOperExpr = 0;
    938   const OverflowingBinaryOperator *OBO =
    939     cast<OverflowingBinaryOperator>(DU.NarrowUse);
    940   if (IsSigned && OBO->hasNoSignedWrap())
    941     ExtendOperExpr = SE->getSignExtendExpr(
    942       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
    943   else if(!IsSigned && OBO->hasNoUnsignedWrap())
    944     ExtendOperExpr = SE->getZeroExtendExpr(
    945       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
    946   else
    947     return 0;
    948 
    949   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(
    950     SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr,
    951                    IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW));
    952 
    953   if (!AddRec || AddRec->getLoop() != L)
    954     return 0;
    955   return AddRec;
    956 }
    957 
    958 /// GetWideRecurrence - Is this instruction potentially interesting from
    959 /// IVUsers' perspective after widening it's type? In other words, can the
    960 /// extend be safely hoisted out of the loop with SCEV reducing the value to a
    961 /// recurrence on the same loop. If so, return the sign or zero extended
    962 /// recurrence. Otherwise return NULL.
    963 const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
    964   if (!SE->isSCEVable(NarrowUse->getType()))
    965     return 0;
    966 
    967   const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
    968   if (SE->getTypeSizeInBits(NarrowExpr->getType())
    969       >= SE->getTypeSizeInBits(WideType)) {
    970     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
    971     // index. So don't follow this use.
    972     return 0;
    973   }
    974 
    975   const SCEV *WideExpr = IsSigned ?
    976     SE->getSignExtendExpr(NarrowExpr, WideType) :
    977     SE->getZeroExtendExpr(NarrowExpr, WideType);
    978   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
    979   if (!AddRec || AddRec->getLoop() != L)
    980     return 0;
    981   return AddRec;
    982 }
    983 
    984 /// WidenIVUse - Determine whether an individual user of the narrow IV can be
    985 /// widened. If so, return the wide clone of the user.
    986 Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU) {
    987 
    988   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
    989   if (isa<PHINode>(DU.NarrowUse) &&
    990       LI->getLoopFor(DU.NarrowUse->getParent()) != L)
    991     return 0;
    992 
    993   // Our raison d'etre! Eliminate sign and zero extension.
    994   if (IsSigned ? isa<SExtInst>(DU.NarrowUse) : isa<ZExtInst>(DU.NarrowUse)) {
    995     Value *NewDef = DU.WideDef;
    996     if (DU.NarrowUse->getType() != WideType) {
    997       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
    998       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
    999       if (CastWidth < IVWidth) {
   1000         // The cast isn't as wide as the IV, so insert a Trunc.
   1001         IRBuilder<> Builder(DU.NarrowUse);
   1002         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
   1003       }
   1004       else {
   1005         // A wider extend was hidden behind a narrower one. This may induce
   1006         // another round of IV widening in which the intermediate IV becomes
   1007         // dead. It should be very rare.
   1008         DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
   1009               << " not wide enough to subsume " << *DU.NarrowUse << "\n");
   1010         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
   1011         NewDef = DU.NarrowUse;
   1012       }
   1013     }
   1014     if (NewDef != DU.NarrowUse) {
   1015       DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
   1016             << " replaced by " << *DU.WideDef << "\n");
   1017       ++NumElimExt;
   1018       DU.NarrowUse->replaceAllUsesWith(NewDef);
   1019       DeadInsts.push_back(DU.NarrowUse);
   1020     }
   1021     // Now that the extend is gone, we want to expose it's uses for potential
   1022     // further simplification. We don't need to directly inform SimplifyIVUsers
   1023     // of the new users, because their parent IV will be processed later as a
   1024     // new loop phi. If we preserved IVUsers analysis, we would also want to
   1025     // push the uses of WideDef here.
   1026 
   1027     // No further widening is needed. The deceased [sz]ext had done it for us.
   1028     return 0;
   1029   }
   1030 
   1031   // Does this user itself evaluate to a recurrence after widening?
   1032   const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse);
   1033   if (!WideAddRec) {
   1034       WideAddRec = GetExtendedOperandRecurrence(DU);
   1035   }
   1036   if (!WideAddRec) {
   1037     // This user does not evaluate to a recurence after widening, so don't
   1038     // follow it. Instead insert a Trunc to kill off the original use,
   1039     // eventually isolating the original narrow IV so it can be removed.
   1040     IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
   1041     Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
   1042     DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
   1043     return 0;
   1044   }
   1045   // Assume block terminators cannot evaluate to a recurrence. We can't to
   1046   // insert a Trunc after a terminator if there happens to be a critical edge.
   1047   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
   1048          "SCEV is not expected to evaluate a block terminator");
   1049 
   1050   // Reuse the IV increment that SCEVExpander created as long as it dominates
   1051   // NarrowUse.
   1052   Instruction *WideUse = 0;
   1053   if (WideAddRec == WideIncExpr
   1054       && SCEVExpander::hoistStep(WideInc, DU.NarrowUse, DT))
   1055     WideUse = WideInc;
   1056   else {
   1057     WideUse = CloneIVUser(DU);
   1058     if (!WideUse)
   1059       return 0;
   1060   }
   1061   // Evaluation of WideAddRec ensured that the narrow expression could be
   1062   // extended outside the loop without overflow. This suggests that the wide use
   1063   // evaluates to the same expression as the extended narrow use, but doesn't
   1064   // absolutely guarantee it. Hence the following failsafe check. In rare cases
   1065   // where it fails, we simply throw away the newly created wide use.
   1066   if (WideAddRec != SE->getSCEV(WideUse)) {
   1067     DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
   1068           << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
   1069     DeadInsts.push_back(WideUse);
   1070     return 0;
   1071   }
   1072 
   1073   // Returning WideUse pushes it on the worklist.
   1074   return WideUse;
   1075 }
   1076 
   1077 /// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers.
   1078 ///
   1079 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
   1080   for (Value::use_iterator UI = NarrowDef->use_begin(),
   1081          UE = NarrowDef->use_end(); UI != UE; ++UI) {
   1082     Instruction *NarrowUse = cast<Instruction>(*UI);
   1083 
   1084     // Handle data flow merges and bizarre phi cycles.
   1085     if (!Widened.insert(NarrowUse))
   1086       continue;
   1087 
   1088     NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUse, WideDef));
   1089   }
   1090 }
   1091 
   1092 /// CreateWideIV - Process a single induction variable. First use the
   1093 /// SCEVExpander to create a wide induction variable that evaluates to the same
   1094 /// recurrence as the original narrow IV. Then use a worklist to forward
   1095 /// traverse the narrow IV's def-use chain. After WidenIVUse has processed all
   1096 /// interesting IV users, the narrow IV will be isolated for removal by
   1097 /// DeleteDeadPHIs.
   1098 ///
   1099 /// It would be simpler to delete uses as they are processed, but we must avoid
   1100 /// invalidating SCEV expressions.
   1101 ///
   1102 PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
   1103   // Is this phi an induction variable?
   1104   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
   1105   if (!AddRec)
   1106     return NULL;
   1107 
   1108   // Widen the induction variable expression.
   1109   const SCEV *WideIVExpr = IsSigned ?
   1110     SE->getSignExtendExpr(AddRec, WideType) :
   1111     SE->getZeroExtendExpr(AddRec, WideType);
   1112 
   1113   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
   1114          "Expect the new IV expression to preserve its type");
   1115 
   1116   // Can the IV be extended outside the loop without overflow?
   1117   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
   1118   if (!AddRec || AddRec->getLoop() != L)
   1119     return NULL;
   1120 
   1121   // An AddRec must have loop-invariant operands. Since this AddRec is
   1122   // materialized by a loop header phi, the expression cannot have any post-loop
   1123   // operands, so they must dominate the loop header.
   1124   assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
   1125          SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader())
   1126          && "Loop header phi recurrence inputs do not dominate the loop");
   1127 
   1128   // The rewriter provides a value for the desired IV expression. This may
   1129   // either find an existing phi or materialize a new one. Either way, we
   1130   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
   1131   // of the phi-SCC dominates the loop entry.
   1132   Instruction *InsertPt = L->getHeader()->begin();
   1133   WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
   1134 
   1135   // Remembering the WideIV increment generated by SCEVExpander allows
   1136   // WidenIVUse to reuse it when widening the narrow IV's increment. We don't
   1137   // employ a general reuse mechanism because the call above is the only call to
   1138   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
   1139   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
   1140     WideInc =
   1141       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
   1142     WideIncExpr = SE->getSCEV(WideInc);
   1143   }
   1144 
   1145   DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
   1146   ++NumWidened;
   1147 
   1148   // Traverse the def-use chain using a worklist starting at the original IV.
   1149   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
   1150 
   1151   Widened.insert(OrigPhi);
   1152   pushNarrowIVUsers(OrigPhi, WidePhi);
   1153 
   1154   while (!NarrowIVUsers.empty()) {
   1155     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
   1156 
   1157     // Process a def-use edge. This may replace the use, so don't hold a
   1158     // use_iterator across it.
   1159     Instruction *WideUse = WidenIVUse(DU);
   1160 
   1161     // Follow all def-use edges from the previous narrow use.
   1162     if (WideUse)
   1163       pushNarrowIVUsers(DU.NarrowUse, WideUse);
   1164 
   1165     // WidenIVUse may have removed the def-use edge.
   1166     if (DU.NarrowDef->use_empty())
   1167       DeadInsts.push_back(DU.NarrowDef);
   1168   }
   1169   return WidePhi;
   1170 }
   1171 
   1172 //===----------------------------------------------------------------------===//
   1173 //  Simplification of IV users based on SCEV evaluation.
   1174 //===----------------------------------------------------------------------===//
   1175 
   1176 
   1177 /// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV
   1178 /// users. Each successive simplification may push more users which may
   1179 /// themselves be candidates for simplification.
   1180 ///
   1181 /// Sign/Zero extend elimination is interleaved with IV simplification.
   1182 ///
   1183 void IndVarSimplify::SimplifyAndExtend(Loop *L,
   1184                                        SCEVExpander &Rewriter,
   1185                                        LPPassManager &LPM) {
   1186   SmallVector<WideIVInfo, 8> WideIVs;
   1187 
   1188   SmallVector<PHINode*, 8> LoopPhis;
   1189   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
   1190     LoopPhis.push_back(cast<PHINode>(I));
   1191   }
   1192   // Each round of simplification iterates through the SimplifyIVUsers worklist
   1193   // for all current phis, then determines whether any IVs can be
   1194   // widened. Widening adds new phis to LoopPhis, inducing another round of
   1195   // simplification on the wide IVs.
   1196   while (!LoopPhis.empty()) {
   1197     // Evaluate as many IV expressions as possible before widening any IVs. This
   1198     // forces SCEV to set no-wrap flags before evaluating sign/zero
   1199     // extension. The first time SCEV attempts to normalize sign/zero extension,
   1200     // the result becomes final. So for the most predictable results, we delay
   1201     // evaluation of sign/zero extend evaluation until needed, and avoid running
   1202     // other SCEV based analysis prior to SimplifyAndExtend.
   1203     do {
   1204       PHINode *CurrIV = LoopPhis.pop_back_val();
   1205 
   1206       // Information about sign/zero extensions of CurrIV.
   1207       WideIVVisitor WIV(CurrIV, SE, TD);
   1208 
   1209       Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &WIV);
   1210 
   1211       if (WIV.WI.WidestNativeType) {
   1212         WideIVs.push_back(WIV.WI);
   1213       }
   1214     } while(!LoopPhis.empty());
   1215 
   1216     for (; !WideIVs.empty(); WideIVs.pop_back()) {
   1217       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts);
   1218       if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) {
   1219         Changed = true;
   1220         LoopPhis.push_back(WidePhi);
   1221       }
   1222     }
   1223   }
   1224 }
   1225 
   1226 //===----------------------------------------------------------------------===//
   1227 //  LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
   1228 //===----------------------------------------------------------------------===//
   1229 
   1230 /// Check for expressions that ScalarEvolution generates to compute
   1231 /// BackedgeTakenInfo. If these expressions have not been reduced, then
   1232 /// expanding them may incur additional cost (albeit in the loop preheader).
   1233 static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
   1234                                 ScalarEvolution *SE) {
   1235   // If the backedge-taken count is a UDiv, it's very likely a UDiv that
   1236   // ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a
   1237   // precise expression, rather than a UDiv from the user's code. If we can't
   1238   // find a UDiv in the code with some simple searching, assume the former and
   1239   // forego rewriting the loop.
   1240   if (isa<SCEVUDivExpr>(S)) {
   1241     ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
   1242     if (!OrigCond) return true;
   1243     const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
   1244     R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
   1245     if (R != S) {
   1246       const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
   1247       L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
   1248       if (L != S)
   1249         return true;
   1250     }
   1251   }
   1252 
   1253   if (EnableIVRewrite)
   1254     return false;
   1255 
   1256   // Recurse past add expressions, which commonly occur in the
   1257   // BackedgeTakenCount. They may already exist in program code, and if not,
   1258   // they are not too expensive rematerialize.
   1259   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
   1260     for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
   1261          I != E; ++I) {
   1262       if (isHighCostExpansion(*I, BI, SE))
   1263         return true;
   1264     }
   1265     return false;
   1266   }
   1267 
   1268   // HowManyLessThans uses a Max expression whenever the loop is not guarded by
   1269   // the exit condition.
   1270   if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
   1271     return true;
   1272 
   1273   // If we haven't recognized an expensive SCEV patter, assume its an expression
   1274   // produced by program code.
   1275   return false;
   1276 }
   1277 
   1278 /// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
   1279 /// count expression can be safely and cheaply expanded into an instruction
   1280 /// sequence that can be used by LinearFunctionTestReplace.
   1281 static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
   1282   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
   1283   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
   1284       BackedgeTakenCount->isZero())
   1285     return false;
   1286 
   1287   if (!L->getExitingBlock())
   1288     return false;
   1289 
   1290   // Can't rewrite non-branch yet.
   1291   BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
   1292   if (!BI)
   1293     return false;
   1294 
   1295   if (isHighCostExpansion(BackedgeTakenCount, BI, SE))
   1296     return false;
   1297 
   1298   return true;
   1299 }
   1300 
   1301 /// getBackedgeIVType - Get the widest type used by the loop test after peeking
   1302 /// through Truncs.
   1303 ///
   1304 /// TODO: Unnecessary when ForceLFTR is removed.
   1305 static Type *getBackedgeIVType(Loop *L) {
   1306   if (!L->getExitingBlock())
   1307     return 0;
   1308 
   1309   // Can't rewrite non-branch yet.
   1310   BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
   1311   if (!BI)
   1312     return 0;
   1313 
   1314   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
   1315   if (!Cond)
   1316     return 0;
   1317 
   1318   Type *Ty = 0;
   1319   for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end();
   1320       OI != OE; ++OI) {
   1321     assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types");
   1322     TruncInst *Trunc = dyn_cast<TruncInst>(*OI);
   1323     if (!Trunc)
   1324       continue;
   1325 
   1326     return Trunc->getSrcTy();
   1327   }
   1328   return Ty;
   1329 }
   1330 
   1331 /// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop
   1332 /// invariant value to the phi.
   1333 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
   1334   Instruction *IncI = dyn_cast<Instruction>(IncV);
   1335   if (!IncI)
   1336     return 0;
   1337 
   1338   switch (IncI->getOpcode()) {
   1339   case Instruction::Add:
   1340   case Instruction::Sub:
   1341     break;
   1342   case Instruction::GetElementPtr:
   1343     // An IV counter must preserve its type.
   1344     if (IncI->getNumOperands() == 2)
   1345       break;
   1346   default:
   1347     return 0;
   1348   }
   1349 
   1350   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
   1351   if (Phi && Phi->getParent() == L->getHeader()) {
   1352     if (isLoopInvariant(IncI->getOperand(1), L, DT))
   1353       return Phi;
   1354     return 0;
   1355   }
   1356   if (IncI->getOpcode() == Instruction::GetElementPtr)
   1357     return 0;
   1358 
   1359   // Allow add/sub to be commuted.
   1360   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
   1361   if (Phi && Phi->getParent() == L->getHeader()) {
   1362     if (isLoopInvariant(IncI->getOperand(0), L, DT))
   1363       return Phi;
   1364   }
   1365   return 0;
   1366 }
   1367 
   1368 /// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show
   1369 /// that the current exit test is already sufficiently canonical.
   1370 static bool needsLFTR(Loop *L, DominatorTree *DT) {
   1371   assert(L->getExitingBlock() && "expected loop exit");
   1372 
   1373   BasicBlock *LatchBlock = L->getLoopLatch();
   1374   // Don't bother with LFTR if the loop is not properly simplified.
   1375   if (!LatchBlock)
   1376     return false;
   1377 
   1378   BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
   1379   assert(BI && "expected exit branch");
   1380 
   1381   // Do LFTR to simplify the exit condition to an ICMP.
   1382   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
   1383   if (!Cond)
   1384     return true;
   1385 
   1386   // Do LFTR to simplify the exit ICMP to EQ/NE
   1387   ICmpInst::Predicate Pred = Cond->getPredicate();
   1388   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
   1389     return true;
   1390 
   1391   // Look for a loop invariant RHS
   1392   Value *LHS = Cond->getOperand(0);
   1393   Value *RHS = Cond->getOperand(1);
   1394   if (!isLoopInvariant(RHS, L, DT)) {
   1395     if (!isLoopInvariant(LHS, L, DT))
   1396       return true;
   1397     std::swap(LHS, RHS);
   1398   }
   1399   // Look for a simple IV counter LHS
   1400   PHINode *Phi = dyn_cast<PHINode>(LHS);
   1401   if (!Phi)
   1402     Phi = getLoopPhiForCounter(LHS, L, DT);
   1403 
   1404   if (!Phi)
   1405     return true;
   1406 
   1407   // Do LFTR if the exit condition's IV is *not* a simple counter.
   1408   Value *IncV = Phi->getIncomingValueForBlock(L->getLoopLatch());
   1409   return Phi != getLoopPhiForCounter(IncV, L, DT);
   1410 }
   1411 
   1412 /// AlmostDeadIV - Return true if this IV has any uses other than the (soon to
   1413 /// be rewritten) loop exit test.
   1414 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
   1415   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
   1416   Value *IncV = Phi->getIncomingValue(LatchIdx);
   1417 
   1418   for (Value::use_iterator UI = Phi->use_begin(), UE = Phi->use_end();
   1419        UI != UE; ++UI) {
   1420     if (*UI != Cond && *UI != IncV) return false;
   1421   }
   1422 
   1423   for (Value::use_iterator UI = IncV->use_begin(), UE = IncV->use_end();
   1424        UI != UE; ++UI) {
   1425     if (*UI != Cond && *UI != Phi) return false;
   1426   }
   1427   return true;
   1428 }
   1429 
   1430 /// FindLoopCounter - Find an affine IV in canonical form.
   1431 ///
   1432 /// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
   1433 ///
   1434 /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
   1435 /// This is difficult in general for SCEV because of potential overflow. But we
   1436 /// could at least handle constant BECounts.
   1437 static PHINode *
   1438 FindLoopCounter(Loop *L, const SCEV *BECount,
   1439                 ScalarEvolution *SE, DominatorTree *DT, const TargetData *TD) {
   1440   // I'm not sure how BECount could be a pointer type, but we definitely don't
   1441   // want to LFTR that.
   1442   if (BECount->getType()->isPointerTy())
   1443     return 0;
   1444 
   1445   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
   1446 
   1447   Value *Cond =
   1448     cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
   1449 
   1450   // Loop over all of the PHI nodes, looking for a simple counter.
   1451   PHINode *BestPhi = 0;
   1452   const SCEV *BestInit = 0;
   1453   BasicBlock *LatchBlock = L->getLoopLatch();
   1454   assert(LatchBlock && "needsLFTR should guarantee a loop latch");
   1455 
   1456   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
   1457     PHINode *Phi = cast<PHINode>(I);
   1458     if (!SE->isSCEVable(Phi->getType()))
   1459       continue;
   1460 
   1461     const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
   1462     if (!AR || AR->getLoop() != L || !AR->isAffine())
   1463       continue;
   1464 
   1465     // AR may be a pointer type, while BECount is an integer type.
   1466     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
   1467     // AR may not be a narrower type, or we may never exit.
   1468     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
   1469     if (PhiWidth < BCWidth || (TD && !TD->isLegalInteger(PhiWidth)))
   1470       continue;
   1471 
   1472     const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
   1473     if (!Step || !Step->isOne())
   1474       continue;
   1475 
   1476     int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
   1477     Value *IncV = Phi->getIncomingValue(LatchIdx);
   1478     if (getLoopPhiForCounter(IncV, L, DT) != Phi)
   1479       continue;
   1480 
   1481     const SCEV *Init = AR->getStart();
   1482 
   1483     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
   1484       // Don't force a live loop counter if another IV can be used.
   1485       if (AlmostDeadIV(Phi, LatchBlock, Cond))
   1486         continue;
   1487 
   1488       // Prefer to count-from-zero. This is a more "canonical" counter form. It
   1489       // also prefers integer to pointer IVs.
   1490       if (BestInit->isZero() != Init->isZero()) {
   1491         if (BestInit->isZero())
   1492           continue;
   1493       }
   1494       // If two IVs both count from zero or both count from nonzero then the
   1495       // narrower is likely a dead phi that has been widened. Use the wider phi
   1496       // to allow the other to be eliminated.
   1497       if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
   1498         continue;
   1499     }
   1500     BestPhi = Phi;
   1501     BestInit = Init;
   1502   }
   1503   return BestPhi;
   1504 }
   1505 
   1506 /// LinearFunctionTestReplace - This method rewrites the exit condition of the
   1507 /// loop to be a canonical != comparison against the incremented loop induction
   1508 /// variable.  This pass is able to rewrite the exit tests of any loop where the
   1509 /// SCEV analysis can determine a loop-invariant trip count of the loop, which
   1510 /// is actually a much broader range than just linear tests.
   1511 Value *IndVarSimplify::
   1512 LinearFunctionTestReplace(Loop *L,
   1513                           const SCEV *BackedgeTakenCount,
   1514                           PHINode *IndVar,
   1515                           SCEVExpander &Rewriter) {
   1516   assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
   1517   BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
   1518 
   1519   // LFTR can ignore IV overflow and truncate to the width of
   1520   // BECount. This avoids materializing the add(zext(add)) expression.
   1521   Type *CntTy = !EnableIVRewrite ?
   1522     BackedgeTakenCount->getType() : IndVar->getType();
   1523 
   1524   const SCEV *IVLimit = BackedgeTakenCount;
   1525 
   1526   // If the exiting block is not the same as the backedge block, we must compare
   1527   // against the preincremented value, otherwise we prefer to compare against
   1528   // the post-incremented value.
   1529   Value *CmpIndVar;
   1530   if (L->getExitingBlock() == L->getLoopLatch()) {
   1531     // Add one to the "backedge-taken" count to get the trip count.
   1532     // If this addition may overflow, we have to be more pessimistic and
   1533     // cast the induction variable before doing the add.
   1534     const SCEV *N =
   1535       SE->getAddExpr(IVLimit, SE->getConstant(IVLimit->getType(), 1));
   1536     if (CntTy == IVLimit->getType())
   1537       IVLimit = N;
   1538     else {
   1539       const SCEV *Zero = SE->getConstant(IVLimit->getType(), 0);
   1540       if ((isa<SCEVConstant>(N) && !N->isZero()) ||
   1541           SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
   1542         // No overflow. Cast the sum.
   1543         IVLimit = SE->getTruncateOrZeroExtend(N, CntTy);
   1544       } else {
   1545         // Potential overflow. Cast before doing the add.
   1546         IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy);
   1547         IVLimit = SE->getAddExpr(IVLimit, SE->getConstant(CntTy, 1));
   1548       }
   1549     }
   1550     // The BackedgeTaken expression contains the number of times that the
   1551     // backedge branches to the loop header.  This is one less than the
   1552     // number of times the loop executes, so use the incremented indvar.
   1553     CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
   1554   } else {
   1555     // We have to use the preincremented value...
   1556     IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy);
   1557     CmpIndVar = IndVar;
   1558   }
   1559 
   1560   // For unit stride, IVLimit = Start + BECount with 2's complement overflow.
   1561   // So for, non-zero start compute the IVLimit here.
   1562   bool isPtrIV = false;
   1563   Type *CmpTy = CntTy;
   1564   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
   1565   assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
   1566   if (!AR->getStart()->isZero()) {
   1567     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
   1568     const SCEV *IVInit = AR->getStart();
   1569 
   1570     // For pointer types, sign extend BECount in order to materialize a GEP.
   1571     // Note that for without EnableIVRewrite, we never run SCEVExpander on a
   1572     // pointer type, because we must preserve the existing GEPs. Instead we
   1573     // directly generate a GEP later.
   1574     if (IVInit->getType()->isPointerTy()) {
   1575       isPtrIV = true;
   1576       CmpTy = SE->getEffectiveSCEVType(IVInit->getType());
   1577       IVLimit = SE->getTruncateOrSignExtend(IVLimit, CmpTy);
   1578     }
   1579     // For integer types, truncate the IV before computing IVInit + BECount.
   1580     else {
   1581       if (SE->getTypeSizeInBits(IVInit->getType())
   1582           > SE->getTypeSizeInBits(CmpTy))
   1583         IVInit = SE->getTruncateExpr(IVInit, CmpTy);
   1584 
   1585       IVLimit = SE->getAddExpr(IVInit, IVLimit);
   1586     }
   1587   }
   1588   // Expand the code for the iteration count.
   1589   IRBuilder<> Builder(BI);
   1590 
   1591   assert(SE->isLoopInvariant(IVLimit, L) &&
   1592          "Computed iteration count is not loop invariant!");
   1593   Value *ExitCnt = Rewriter.expandCodeFor(IVLimit, CmpTy, BI);
   1594 
   1595   // Create a gep for IVInit + IVLimit from on an existing pointer base.
   1596   assert(isPtrIV == IndVar->getType()->isPointerTy() &&
   1597          "IndVar type must match IVInit type");
   1598   if (isPtrIV) {
   1599       Value *IVStart = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
   1600       assert(AR->getStart() == SE->getSCEV(IVStart) && "bad loop counter");
   1601       assert(SE->getSizeOfExpr(
   1602                cast<PointerType>(IVStart->getType())->getElementType())->isOne()
   1603              && "unit stride pointer IV must be i8*");
   1604 
   1605       Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
   1606       ExitCnt = Builder.CreateGEP(IVStart, ExitCnt, "lftr.limit");
   1607       Builder.SetInsertPoint(BI);
   1608   }
   1609 
   1610   // Insert a new icmp_ne or icmp_eq instruction before the branch.
   1611   ICmpInst::Predicate P;
   1612   if (L->contains(BI->getSuccessor(0)))
   1613     P = ICmpInst::ICMP_NE;
   1614   else
   1615     P = ICmpInst::ICMP_EQ;
   1616 
   1617   DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
   1618                << "      LHS:" << *CmpIndVar << '\n'
   1619                << "       op:\t"
   1620                << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
   1621                << "      RHS:\t" << *ExitCnt << "\n"
   1622                << "     Expr:\t" << *IVLimit << "\n");
   1623 
   1624   if (SE->getTypeSizeInBits(CmpIndVar->getType())
   1625       > SE->getTypeSizeInBits(CmpTy)) {
   1626     CmpIndVar = Builder.CreateTrunc(CmpIndVar, CmpTy, "lftr.wideiv");
   1627   }
   1628 
   1629   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
   1630   Value *OrigCond = BI->getCondition();
   1631   // It's tempting to use replaceAllUsesWith here to fully replace the old
   1632   // comparison, but that's not immediately safe, since users of the old
   1633   // comparison may not be dominated by the new comparison. Instead, just
   1634   // update the branch to use the new comparison; in the common case this
   1635   // will make old comparison dead.
   1636   BI->setCondition(Cond);
   1637   DeadInsts.push_back(OrigCond);
   1638 
   1639   ++NumLFTR;
   1640   Changed = true;
   1641   return Cond;
   1642 }
   1643 
   1644 //===----------------------------------------------------------------------===//
   1645 //  SinkUnusedInvariants. A late subpass to cleanup loop preheaders.
   1646 //===----------------------------------------------------------------------===//
   1647 
   1648 /// If there's a single exit block, sink any loop-invariant values that
   1649 /// were defined in the preheader but not used inside the loop into the
   1650 /// exit block to reduce register pressure in the loop.
   1651 void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
   1652   BasicBlock *ExitBlock = L->getExitBlock();
   1653   if (!ExitBlock) return;
   1654 
   1655   BasicBlock *Preheader = L->getLoopPreheader();
   1656   if (!Preheader) return;
   1657 
   1658   Instruction *InsertPt = ExitBlock->getFirstInsertionPt();
   1659   BasicBlock::iterator I = Preheader->getTerminator();
   1660   while (I != Preheader->begin()) {
   1661     --I;
   1662     // New instructions were inserted at the end of the preheader.
   1663     if (isa<PHINode>(I))
   1664       break;
   1665 
   1666     // Don't move instructions which might have side effects, since the side
   1667     // effects need to complete before instructions inside the loop.  Also don't
   1668     // move instructions which might read memory, since the loop may modify
   1669     // memory. Note that it's okay if the instruction might have undefined
   1670     // behavior: LoopSimplify guarantees that the preheader dominates the exit
   1671     // block.
   1672     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
   1673       continue;
   1674 
   1675     // Skip debug info intrinsics.
   1676     if (isa<DbgInfoIntrinsic>(I))
   1677       continue;
   1678 
   1679     // Skip landingpad instructions.
   1680     if (isa<LandingPadInst>(I))
   1681       continue;
   1682 
   1683     // Don't sink static AllocaInsts out of the entry block, which would
   1684     // turn them into dynamic allocas!
   1685     if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
   1686       if (AI->isStaticAlloca())
   1687         continue;
   1688 
   1689     // Determine if there is a use in or before the loop (direct or
   1690     // otherwise).
   1691     bool UsedInLoop = false;
   1692     for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
   1693          UI != UE; ++UI) {
   1694       User *U = *UI;
   1695       BasicBlock *UseBB = cast<Instruction>(U)->getParent();
   1696       if (PHINode *P = dyn_cast<PHINode>(U)) {
   1697         unsigned i =
   1698           PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
   1699         UseBB = P->getIncomingBlock(i);
   1700       }
   1701       if (UseBB == Preheader || L->contains(UseBB)) {
   1702         UsedInLoop = true;
   1703         break;
   1704       }
   1705     }
   1706 
   1707     // If there is, the def must remain in the preheader.
   1708     if (UsedInLoop)
   1709       continue;
   1710 
   1711     // Otherwise, sink it to the exit block.
   1712     Instruction *ToMove = I;
   1713     bool Done = false;
   1714 
   1715     if (I != Preheader->begin()) {
   1716       // Skip debug info intrinsics.
   1717       do {
   1718         --I;
   1719       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
   1720 
   1721       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
   1722         Done = true;
   1723     } else {
   1724       Done = true;
   1725     }
   1726 
   1727     ToMove->moveBefore(InsertPt);
   1728     if (Done) break;
   1729     InsertPt = ToMove;
   1730   }
   1731 }
   1732 
   1733 //===----------------------------------------------------------------------===//
   1734 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
   1735 //===----------------------------------------------------------------------===//
   1736 
   1737 bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   1738   // If LoopSimplify form is not available, stay out of trouble. Some notes:
   1739   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
   1740   //    canonicalization can be a pessimization without LSR to "clean up"
   1741   //    afterwards.
   1742   //  - We depend on having a preheader; in particular,
   1743   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
   1744   //    and we're in trouble if we can't find the induction variable even when
   1745   //    we've manually inserted one.
   1746   if (!L->isLoopSimplifyForm())
   1747     return false;
   1748 
   1749   if (EnableIVRewrite)
   1750     IU = &getAnalysis<IVUsers>();
   1751   LI = &getAnalysis<LoopInfo>();
   1752   SE = &getAnalysis<ScalarEvolution>();
   1753   DT = &getAnalysis<DominatorTree>();
   1754   TD = getAnalysisIfAvailable<TargetData>();
   1755 
   1756   DeadInsts.clear();
   1757   Changed = false;
   1758 
   1759   // If there are any floating-point recurrences, attempt to
   1760   // transform them to use integer recurrences.
   1761   RewriteNonIntegerIVs(L);
   1762 
   1763   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
   1764 
   1765   // Create a rewriter object which we'll use to transform the code with.
   1766   SCEVExpander Rewriter(*SE, "indvars");
   1767 #ifndef NDEBUG
   1768   Rewriter.setDebugType(DEBUG_TYPE);
   1769 #endif
   1770 
   1771   // Eliminate redundant IV users.
   1772   //
   1773   // Simplification works best when run before other consumers of SCEV. We
   1774   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
   1775   // other expressions involving loop IVs have been evaluated. This helps SCEV
   1776   // set no-wrap flags before normalizing sign/zero extension.
   1777   if (!EnableIVRewrite) {
   1778     Rewriter.disableCanonicalMode();
   1779     SimplifyAndExtend(L, Rewriter, LPM);
   1780   }
   1781 
   1782   // Check to see if this loop has a computable loop-invariant execution count.
   1783   // If so, this means that we can compute the final value of any expressions
   1784   // that are recurrent in the loop, and substitute the exit values from the
   1785   // loop into any instructions outside of the loop that use the final values of
   1786   // the current expressions.
   1787   //
   1788   if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
   1789     RewriteLoopExitValues(L, Rewriter);
   1790 
   1791   // Eliminate redundant IV users.
   1792   if (EnableIVRewrite)
   1793     Changed |= simplifyIVUsers(IU, SE, &LPM, DeadInsts);
   1794 
   1795   // Eliminate redundant IV cycles.
   1796   if (!EnableIVRewrite)
   1797     NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
   1798 
   1799   // Compute the type of the largest recurrence expression, and decide whether
   1800   // a canonical induction variable should be inserted.
   1801   Type *LargestType = 0;
   1802   bool NeedCannIV = false;
   1803   bool ExpandBECount = canExpandBackedgeTakenCount(L, SE);
   1804   if (EnableIVRewrite && ExpandBECount) {
   1805     // If we have a known trip count and a single exit block, we'll be
   1806     // rewriting the loop exit test condition below, which requires a
   1807     // canonical induction variable.
   1808     NeedCannIV = true;
   1809     Type *Ty = BackedgeTakenCount->getType();
   1810     if (!EnableIVRewrite) {
   1811       // In this mode, SimplifyIVUsers may have already widened the IV used by
   1812       // the backedge test and inserted a Trunc on the compare's operand. Get
   1813       // the wider type to avoid creating a redundant narrow IV only used by the
   1814       // loop test.
   1815       LargestType = getBackedgeIVType(L);
   1816     }
   1817     if (!LargestType ||
   1818         SE->getTypeSizeInBits(Ty) >
   1819         SE->getTypeSizeInBits(LargestType))
   1820       LargestType = SE->getEffectiveSCEVType(Ty);
   1821   }
   1822   if (EnableIVRewrite) {
   1823     for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
   1824       NeedCannIV = true;
   1825       Type *Ty =
   1826         SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType());
   1827       if (!LargestType ||
   1828           SE->getTypeSizeInBits(Ty) >
   1829           SE->getTypeSizeInBits(LargestType))
   1830         LargestType = Ty;
   1831     }
   1832   }
   1833 
   1834   // Now that we know the largest of the induction variable expressions
   1835   // in this loop, insert a canonical induction variable of the largest size.
   1836   PHINode *IndVar = 0;
   1837   if (NeedCannIV) {
   1838     // Check to see if the loop already has any canonical-looking induction
   1839     // variables. If any are present and wider than the planned canonical
   1840     // induction variable, temporarily remove them, so that the Rewriter
   1841     // doesn't attempt to reuse them.
   1842     SmallVector<PHINode *, 2> OldCannIVs;
   1843     while (PHINode *OldCannIV = L->getCanonicalInductionVariable()) {
   1844       if (SE->getTypeSizeInBits(OldCannIV->getType()) >
   1845           SE->getTypeSizeInBits(LargestType))
   1846         OldCannIV->removeFromParent();
   1847       else
   1848         break;
   1849       OldCannIVs.push_back(OldCannIV);
   1850     }
   1851 
   1852     IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L, LargestType);
   1853 
   1854     ++NumInserted;
   1855     Changed = true;
   1856     DEBUG(dbgs() << "INDVARS: New CanIV: " << *IndVar << '\n');
   1857 
   1858     // Now that the official induction variable is established, reinsert
   1859     // any old canonical-looking variables after it so that the IR remains
   1860     // consistent. They will be deleted as part of the dead-PHI deletion at
   1861     // the end of the pass.
   1862     while (!OldCannIVs.empty()) {
   1863       PHINode *OldCannIV = OldCannIVs.pop_back_val();
   1864       OldCannIV->insertBefore(L->getHeader()->getFirstInsertionPt());
   1865     }
   1866   }
   1867   else if (!EnableIVRewrite && ExpandBECount && needsLFTR(L, DT)) {
   1868     IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD);
   1869   }
   1870   // If we have a trip count expression, rewrite the loop's exit condition
   1871   // using it.  We can currently only handle loops with a single exit.
   1872   Value *NewICmp = 0;
   1873   if (ExpandBECount && IndVar) {
   1874     // Check preconditions for proper SCEVExpander operation. SCEV does not
   1875     // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
   1876     // pass that uses the SCEVExpander must do it. This does not work well for
   1877     // loop passes because SCEVExpander makes assumptions about all loops, while
   1878     // LoopPassManager only forces the current loop to be simplified.
   1879     //
   1880     // FIXME: SCEV expansion has no way to bail out, so the caller must
   1881     // explicitly check any assumptions made by SCEV. Brittle.
   1882     const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
   1883     if (!AR || AR->getLoop()->getLoopPreheader())
   1884       NewICmp =
   1885         LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter);
   1886   }
   1887   // Rewrite IV-derived expressions.
   1888   if (EnableIVRewrite)
   1889     RewriteIVExpressions(L, Rewriter);
   1890 
   1891   // Clear the rewriter cache, because values that are in the rewriter's cache
   1892   // can be deleted in the loop below, causing the AssertingVH in the cache to
   1893   // trigger.
   1894   Rewriter.clear();
   1895 
   1896   // Now that we're done iterating through lists, clean up any instructions
   1897   // which are now dead.
   1898   while (!DeadInsts.empty())
   1899     if (Instruction *Inst =
   1900           dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
   1901       RecursivelyDeleteTriviallyDeadInstructions(Inst);
   1902 
   1903   // The Rewriter may not be used from this point on.
   1904 
   1905   // Loop-invariant instructions in the preheader that aren't used in the
   1906   // loop may be sunk below the loop to reduce register pressure.
   1907   SinkUnusedInvariants(L);
   1908 
   1909   // For completeness, inform IVUsers of the IV use in the newly-created
   1910   // loop exit test instruction.
   1911   if (IU && NewICmp) {
   1912     ICmpInst *NewICmpInst = dyn_cast<ICmpInst>(NewICmp);
   1913     if (NewICmpInst)
   1914       IU->AddUsersIfInteresting(cast<Instruction>(NewICmpInst->getOperand(0)));
   1915   }
   1916   // Clean up dead instructions.
   1917   Changed |= DeleteDeadPHIs(L->getHeader());
   1918   // Check a post-condition.
   1919   assert(L->isLCSSAForm(*DT) &&
   1920          "Indvars did not leave the loop in lcssa form!");
   1921 
   1922   // Verify that LFTR, and any other change have not interfered with SCEV's
   1923   // ability to compute trip count.
   1924 #ifndef NDEBUG
   1925   if (!EnableIVRewrite && VerifyIndvars &&
   1926       !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
   1927     SE->forgetLoop(L);
   1928     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
   1929     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
   1930         SE->getTypeSizeInBits(NewBECount->getType()))
   1931       NewBECount = SE->getTruncateOrNoop(NewBECount,
   1932                                          BackedgeTakenCount->getType());
   1933     else
   1934       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
   1935                                                  NewBECount->getType());
   1936     assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
   1937   }
   1938 #endif
   1939 
   1940   return Changed;
   1941 }
   1942