Home | History | Annotate | Download | only in Analysis
      1 //===- ScalarEvolution.cpp - Scalar Evolution Analysis ----------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains the implementation of the scalar evolution analysis
     11 // engine, which is used primarily to analyze expressions involving induction
     12 // variables in loops.
     13 //
     14 // There are several aspects to this library.  First is the representation of
     15 // scalar expressions, which are represented as subclasses of the SCEV class.
     16 // These classes are used to represent certain types of subexpressions that we
     17 // can handle. We only create one SCEV of a particular shape, so
     18 // pointer-comparisons for equality are legal.
     19 //
     20 // One important aspect of the SCEV objects is that they are never cyclic, even
     21 // if there is a cycle in the dataflow for an expression (ie, a PHI node).  If
     22 // the PHI node is one of the idioms that we can represent (e.g., a polynomial
     23 // recurrence) then we represent it directly as a recurrence node, otherwise we
     24 // represent it as a SCEVUnknown node.
     25 //
     26 // In addition to being able to represent expressions of various types, we also
     27 // have folders that are used to build the *canonical* representation for a
     28 // particular expression.  These folders are capable of using a variety of
     29 // rewrite rules to simplify the expressions.
     30 //
     31 // Once the folders are defined, we can implement the more interesting
     32 // higher-level code, such as the code that recognizes PHI nodes of various
     33 // types, computes the execution count of a loop, etc.
     34 //
     35 // TODO: We should use these routines and value representations to implement
     36 // dependence analysis!
     37 //
     38 //===----------------------------------------------------------------------===//
     39 //
     40 // There are several good references for the techniques used in this analysis.
     41 //
     42 //  Chains of recurrences -- a method to expedite the evaluation
     43 //  of closed-form functions
     44 //  Olaf Bachmann, Paul S. Wang, Eugene V. Zima
     45 //
     46 //  On computational properties of chains of recurrences
     47 //  Eugene V. Zima
     48 //
     49 //  Symbolic Evaluation of Chains of Recurrences for Loop Optimization
     50 //  Robert A. van Engelen
     51 //
     52 //  Efficient Symbolic Analysis for Optimizing Compilers
     53 //  Robert A. van Engelen
     54 //
     55 //  Using the chains of recurrences algebra for data dependence testing and
     56 //  induction variable substitution
     57 //  MS Thesis, Johnie Birch
     58 //
     59 //===----------------------------------------------------------------------===//
     60 
     61 #define DEBUG_TYPE "scalar-evolution"
     62 #include "llvm/Analysis/ScalarEvolution.h"
     63 #include "llvm/ADT/STLExtras.h"
     64 #include "llvm/ADT/SmallPtrSet.h"
     65 #include "llvm/ADT/Statistic.h"
     66 #include "llvm/Analysis/ConstantFolding.h"
     67 #include "llvm/Analysis/Dominators.h"
     68 #include "llvm/Analysis/InstructionSimplify.h"
     69 #include "llvm/Analysis/LoopInfo.h"
     70 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
     71 #include "llvm/Analysis/ValueTracking.h"
     72 #include "llvm/Assembly/Writer.h"
     73 #include "llvm/IR/Constants.h"
     74 #include "llvm/IR/DataLayout.h"
     75 #include "llvm/IR/DerivedTypes.h"
     76 #include "llvm/IR/GlobalAlias.h"
     77 #include "llvm/IR/GlobalVariable.h"
     78 #include "llvm/IR/Instructions.h"
     79 #include "llvm/IR/LLVMContext.h"
     80 #include "llvm/IR/Operator.h"
     81 #include "llvm/Support/CommandLine.h"
     82 #include "llvm/Support/ConstantRange.h"
     83 #include "llvm/Support/Debug.h"
     84 #include "llvm/Support/ErrorHandling.h"
     85 #include "llvm/Support/GetElementPtrTypeIterator.h"
     86 #include "llvm/Support/InstIterator.h"
     87 #include "llvm/Support/MathExtras.h"
     88 #include "llvm/Support/raw_ostream.h"
     89 #include "llvm/Target/TargetLibraryInfo.h"
     90 #include <algorithm>
     91 using namespace llvm;
     92 
     93 STATISTIC(NumArrayLenItCounts,
     94           "Number of trip counts computed with array length");
     95 STATISTIC(NumTripCountsComputed,
     96           "Number of loops with predictable loop counts");
     97 STATISTIC(NumTripCountsNotComputed,
     98           "Number of loops without predictable loop counts");
     99 STATISTIC(NumBruteForceTripCountsComputed,
    100           "Number of loops with trip counts computed by force");
    101 
    102 static cl::opt<unsigned>
    103 MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
    104                         cl::desc("Maximum number of iterations SCEV will "
    105                                  "symbolically execute a constant "
    106                                  "derived loop"),
    107                         cl::init(100));
    108 
    109 // FIXME: Enable this with XDEBUG when the test suite is clean.
    110 static cl::opt<bool>
    111 VerifySCEV("verify-scev",
    112            cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
    113 
    114 INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
    115                 "Scalar Evolution Analysis", false, true)
    116 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
    117 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
    118 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
    119 INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
    120                 "Scalar Evolution Analysis", false, true)
    121 char ScalarEvolution::ID = 0;
    122 
    123 //===----------------------------------------------------------------------===//
    124 //                           SCEV class definitions
    125 //===----------------------------------------------------------------------===//
    126 
    127 //===----------------------------------------------------------------------===//
    128 // Implementation of the SCEV class.
    129 //
    130 
    131 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    132 void SCEV::dump() const {
    133   print(dbgs());
    134   dbgs() << '\n';
    135 }
    136 #endif
    137 
    138 void SCEV::print(raw_ostream &OS) const {
    139   switch (getSCEVType()) {
    140   case scConstant:
    141     WriteAsOperand(OS, cast<SCEVConstant>(this)->getValue(), false);
    142     return;
    143   case scTruncate: {
    144     const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this);
    145     const SCEV *Op = Trunc->getOperand();
    146     OS << "(trunc " << *Op->getType() << " " << *Op << " to "
    147        << *Trunc->getType() << ")";
    148     return;
    149   }
    150   case scZeroExtend: {
    151     const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this);
    152     const SCEV *Op = ZExt->getOperand();
    153     OS << "(zext " << *Op->getType() << " " << *Op << " to "
    154        << *ZExt->getType() << ")";
    155     return;
    156   }
    157   case scSignExtend: {
    158     const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this);
    159     const SCEV *Op = SExt->getOperand();
    160     OS << "(sext " << *Op->getType() << " " << *Op << " to "
    161        << *SExt->getType() << ")";
    162     return;
    163   }
    164   case scAddRecExpr: {
    165     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this);
    166     OS << "{" << *AR->getOperand(0);
    167     for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i)
    168       OS << ",+," << *AR->getOperand(i);
    169     OS << "}<";
    170     if (AR->getNoWrapFlags(FlagNUW))
    171       OS << "nuw><";
    172     if (AR->getNoWrapFlags(FlagNSW))
    173       OS << "nsw><";
    174     if (AR->getNoWrapFlags(FlagNW) &&
    175         !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)))
    176       OS << "nw><";
    177     WriteAsOperand(OS, AR->getLoop()->getHeader(), /*PrintType=*/false);
    178     OS << ">";
    179     return;
    180   }
    181   case scAddExpr:
    182   case scMulExpr:
    183   case scUMaxExpr:
    184   case scSMaxExpr: {
    185     const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
    186     const char *OpStr = 0;
    187     switch (NAry->getSCEVType()) {
    188     case scAddExpr: OpStr = " + "; break;
    189     case scMulExpr: OpStr = " * "; break;
    190     case scUMaxExpr: OpStr = " umax "; break;
    191     case scSMaxExpr: OpStr = " smax "; break;
    192     }
    193     OS << "(";
    194     for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
    195          I != E; ++I) {
    196       OS << **I;
    197       if (llvm::next(I) != E)
    198         OS << OpStr;
    199     }
    200     OS << ")";
    201     switch (NAry->getSCEVType()) {
    202     case scAddExpr:
    203     case scMulExpr:
    204       if (NAry->getNoWrapFlags(FlagNUW))
    205         OS << "<nuw>";
    206       if (NAry->getNoWrapFlags(FlagNSW))
    207         OS << "<nsw>";
    208     }
    209     return;
    210   }
    211   case scUDivExpr: {
    212     const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this);
    213     OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")";
    214     return;
    215   }
    216   case scUnknown: {
    217     const SCEVUnknown *U = cast<SCEVUnknown>(this);
    218     Type *AllocTy;
    219     if (U->isSizeOf(AllocTy)) {
    220       OS << "sizeof(" << *AllocTy << ")";
    221       return;
    222     }
    223     if (U->isAlignOf(AllocTy)) {
    224       OS << "alignof(" << *AllocTy << ")";
    225       return;
    226     }
    227 
    228     Type *CTy;
    229     Constant *FieldNo;
    230     if (U->isOffsetOf(CTy, FieldNo)) {
    231       OS << "offsetof(" << *CTy << ", ";
    232       WriteAsOperand(OS, FieldNo, false);
    233       OS << ")";
    234       return;
    235     }
    236 
    237     // Otherwise just print it normally.
    238     WriteAsOperand(OS, U->getValue(), false);
    239     return;
    240   }
    241   case scCouldNotCompute:
    242     OS << "***COULDNOTCOMPUTE***";
    243     return;
    244   default: break;
    245   }
    246   llvm_unreachable("Unknown SCEV kind!");
    247 }
    248 
    249 Type *SCEV::getType() const {
    250   switch (getSCEVType()) {
    251   case scConstant:
    252     return cast<SCEVConstant>(this)->getType();
    253   case scTruncate:
    254   case scZeroExtend:
    255   case scSignExtend:
    256     return cast<SCEVCastExpr>(this)->getType();
    257   case scAddRecExpr:
    258   case scMulExpr:
    259   case scUMaxExpr:
    260   case scSMaxExpr:
    261     return cast<SCEVNAryExpr>(this)->getType();
    262   case scAddExpr:
    263     return cast<SCEVAddExpr>(this)->getType();
    264   case scUDivExpr:
    265     return cast<SCEVUDivExpr>(this)->getType();
    266   case scUnknown:
    267     return cast<SCEVUnknown>(this)->getType();
    268   case scCouldNotCompute:
    269     llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
    270   default:
    271     llvm_unreachable("Unknown SCEV kind!");
    272   }
    273 }
    274 
    275 bool SCEV::isZero() const {
    276   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
    277     return SC->getValue()->isZero();
    278   return false;
    279 }
    280 
    281 bool SCEV::isOne() const {
    282   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
    283     return SC->getValue()->isOne();
    284   return false;
    285 }
    286 
    287 bool SCEV::isAllOnesValue() const {
    288   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
    289     return SC->getValue()->isAllOnesValue();
    290   return false;
    291 }
    292 
    293 /// isNonConstantNegative - Return true if the specified scev is negated, but
    294 /// not a constant.
    295 bool SCEV::isNonConstantNegative() const {
    296   const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(this);
    297   if (!Mul) return false;
    298 
    299   // If there is a constant factor, it will be first.
    300   const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
    301   if (!SC) return false;
    302 
    303   // Return true if the value is negative, this matches things like (-42 * V).
    304   return SC->getValue()->getValue().isNegative();
    305 }
    306 
    307 SCEVCouldNotCompute::SCEVCouldNotCompute() :
    308   SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
    309 
    310 bool SCEVCouldNotCompute::classof(const SCEV *S) {
    311   return S->getSCEVType() == scCouldNotCompute;
    312 }
    313 
    314 const SCEV *ScalarEvolution::getConstant(ConstantInt *V) {
    315   FoldingSetNodeID ID;
    316   ID.AddInteger(scConstant);
    317   ID.AddPointer(V);
    318   void *IP = 0;
    319   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
    320   SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
    321   UniqueSCEVs.InsertNode(S, IP);
    322   return S;
    323 }
    324 
    325 const SCEV *ScalarEvolution::getConstant(const APInt& Val) {
    326   return getConstant(ConstantInt::get(getContext(), Val));
    327 }
    328 
    329 const SCEV *
    330 ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) {
    331   IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
    332   return getConstant(ConstantInt::get(ITy, V, isSigned));
    333 }
    334 
    335 SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
    336                            unsigned SCEVTy, const SCEV *op, Type *ty)
    337   : SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
    338 
    339 SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
    340                                    const SCEV *op, Type *ty)
    341   : SCEVCastExpr(ID, scTruncate, op, ty) {
    342   assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
    343          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
    344          "Cannot truncate non-integer value!");
    345 }
    346 
    347 SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
    348                                        const SCEV *op, Type *ty)
    349   : SCEVCastExpr(ID, scZeroExtend, op, ty) {
    350   assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
    351          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
    352          "Cannot zero extend non-integer value!");
    353 }
    354 
    355 SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
    356                                        const SCEV *op, Type *ty)
    357   : SCEVCastExpr(ID, scSignExtend, op, ty) {
    358   assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
    359          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
    360          "Cannot sign extend non-integer value!");
    361 }
    362 
    363 void SCEVUnknown::deleted() {
    364   // Clear this SCEVUnknown from various maps.
    365   SE->forgetMemoizedResults(this);
    366 
    367   // Remove this SCEVUnknown from the uniquing map.
    368   SE->UniqueSCEVs.RemoveNode(this);
    369 
    370   // Release the value.
    371   setValPtr(0);
    372 }
    373 
    374 void SCEVUnknown::allUsesReplacedWith(Value *New) {
    375   // Clear this SCEVUnknown from various maps.
    376   SE->forgetMemoizedResults(this);
    377 
    378   // Remove this SCEVUnknown from the uniquing map.
    379   SE->UniqueSCEVs.RemoveNode(this);
    380 
    381   // Update this SCEVUnknown to point to the new value. This is needed
    382   // because there may still be outstanding SCEVs which still point to
    383   // this SCEVUnknown.
    384   setValPtr(New);
    385 }
    386 
    387 bool SCEVUnknown::isSizeOf(Type *&AllocTy) const {
    388   if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
    389     if (VCE->getOpcode() == Instruction::PtrToInt)
    390       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
    391         if (CE->getOpcode() == Instruction::GetElementPtr &&
    392             CE->getOperand(0)->isNullValue() &&
    393             CE->getNumOperands() == 2)
    394           if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1)))
    395             if (CI->isOne()) {
    396               AllocTy = cast<PointerType>(CE->getOperand(0)->getType())
    397                                  ->getElementType();
    398               return true;
    399             }
    400 
    401   return false;
    402 }
    403 
    404 bool SCEVUnknown::isAlignOf(Type *&AllocTy) const {
    405   if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
    406     if (VCE->getOpcode() == Instruction::PtrToInt)
    407       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
    408         if (CE->getOpcode() == Instruction::GetElementPtr &&
    409             CE->getOperand(0)->isNullValue()) {
    410           Type *Ty =
    411             cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
    412           if (StructType *STy = dyn_cast<StructType>(Ty))
    413             if (!STy->isPacked() &&
    414                 CE->getNumOperands() == 3 &&
    415                 CE->getOperand(1)->isNullValue()) {
    416               if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2)))
    417                 if (CI->isOne() &&
    418                     STy->getNumElements() == 2 &&
    419                     STy->getElementType(0)->isIntegerTy(1)) {
    420                   AllocTy = STy->getElementType(1);
    421                   return true;
    422                 }
    423             }
    424         }
    425 
    426   return false;
    427 }
    428 
    429 bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {
    430   if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
    431     if (VCE->getOpcode() == Instruction::PtrToInt)
    432       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
    433         if (CE->getOpcode() == Instruction::GetElementPtr &&
    434             CE->getNumOperands() == 3 &&
    435             CE->getOperand(0)->isNullValue() &&
    436             CE->getOperand(1)->isNullValue()) {
    437           Type *Ty =
    438             cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
    439           // Ignore vector types here so that ScalarEvolutionExpander doesn't
    440           // emit getelementptrs that index into vectors.
    441           if (Ty->isStructTy() || Ty->isArrayTy()) {
    442             CTy = Ty;
    443             FieldNo = CE->getOperand(2);
    444             return true;
    445           }
    446         }
    447 
    448   return false;
    449 }
    450 
    451 //===----------------------------------------------------------------------===//
    452 //                               SCEV Utilities
    453 //===----------------------------------------------------------------------===//
    454 
    455 namespace {
    456   /// SCEVComplexityCompare - Return true if the complexity of the LHS is less
    457   /// than the complexity of the RHS.  This comparator is used to canonicalize
    458   /// expressions.
    459   class SCEVComplexityCompare {
    460     const LoopInfo *const LI;
    461   public:
    462     explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {}
    463 
    464     // Return true or false if LHS is less than, or at least RHS, respectively.
    465     bool operator()(const SCEV *LHS, const SCEV *RHS) const {
    466       return compare(LHS, RHS) < 0;
    467     }
    468 
    469     // Return negative, zero, or positive, if LHS is less than, equal to, or
    470     // greater than RHS, respectively. A three-way result allows recursive
    471     // comparisons to be more efficient.
    472     int compare(const SCEV *LHS, const SCEV *RHS) const {
    473       // Fast-path: SCEVs are uniqued so we can do a quick equality check.
    474       if (LHS == RHS)
    475         return 0;
    476 
    477       // Primarily, sort the SCEVs by their getSCEVType().
    478       unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
    479       if (LType != RType)
    480         return (int)LType - (int)RType;
    481 
    482       // Aside from the getSCEVType() ordering, the particular ordering
    483       // isn't very important except that it's beneficial to be consistent,
    484       // so that (a + b) and (b + a) don't end up as different expressions.
    485       switch (LType) {
    486       case scUnknown: {
    487         const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
    488         const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
    489 
    490         // Sort SCEVUnknown values with some loose heuristics. TODO: This is
    491         // not as complete as it could be.
    492         const Value *LV = LU->getValue(), *RV = RU->getValue();
    493 
    494         // Order pointer values after integer values. This helps SCEVExpander
    495         // form GEPs.
    496         bool LIsPointer = LV->getType()->isPointerTy(),
    497              RIsPointer = RV->getType()->isPointerTy();
    498         if (LIsPointer != RIsPointer)
    499           return (int)LIsPointer - (int)RIsPointer;
    500 
    501         // Compare getValueID values.
    502         unsigned LID = LV->getValueID(),
    503                  RID = RV->getValueID();
    504         if (LID != RID)
    505           return (int)LID - (int)RID;
    506 
    507         // Sort arguments by their position.
    508         if (const Argument *LA = dyn_cast<Argument>(LV)) {
    509           const Argument *RA = cast<Argument>(RV);
    510           unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
    511           return (int)LArgNo - (int)RArgNo;
    512         }
    513 
    514         // For instructions, compare their loop depth, and their operand
    515         // count.  This is pretty loose.
    516         if (const Instruction *LInst = dyn_cast<Instruction>(LV)) {
    517           const Instruction *RInst = cast<Instruction>(RV);
    518 
    519           // Compare loop depths.
    520           const BasicBlock *LParent = LInst->getParent(),
    521                            *RParent = RInst->getParent();
    522           if (LParent != RParent) {
    523             unsigned LDepth = LI->getLoopDepth(LParent),
    524                      RDepth = LI->getLoopDepth(RParent);
    525             if (LDepth != RDepth)
    526               return (int)LDepth - (int)RDepth;
    527           }
    528 
    529           // Compare the number of operands.
    530           unsigned LNumOps = LInst->getNumOperands(),
    531                    RNumOps = RInst->getNumOperands();
    532           return (int)LNumOps - (int)RNumOps;
    533         }
    534 
    535         return 0;
    536       }
    537 
    538       case scConstant: {
    539         const SCEVConstant *LC = cast<SCEVConstant>(LHS);
    540         const SCEVConstant *RC = cast<SCEVConstant>(RHS);
    541 
    542         // Compare constant values.
    543         const APInt &LA = LC->getValue()->getValue();
    544         const APInt &RA = RC->getValue()->getValue();
    545         unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
    546         if (LBitWidth != RBitWidth)
    547           return (int)LBitWidth - (int)RBitWidth;
    548         return LA.ult(RA) ? -1 : 1;
    549       }
    550 
    551       case scAddRecExpr: {
    552         const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
    553         const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
    554 
    555         // Compare addrec loop depths.
    556         const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
    557         if (LLoop != RLoop) {
    558           unsigned LDepth = LLoop->getLoopDepth(),
    559                    RDepth = RLoop->getLoopDepth();
    560           if (LDepth != RDepth)
    561             return (int)LDepth - (int)RDepth;
    562         }
    563 
    564         // Addrec complexity grows with operand count.
    565         unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
    566         if (LNumOps != RNumOps)
    567           return (int)LNumOps - (int)RNumOps;
    568 
    569         // Lexicographically compare.
    570         for (unsigned i = 0; i != LNumOps; ++i) {
    571           long X = compare(LA->getOperand(i), RA->getOperand(i));
    572           if (X != 0)
    573             return X;
    574         }
    575 
    576         return 0;
    577       }
    578 
    579       case scAddExpr:
    580       case scMulExpr:
    581       case scSMaxExpr:
    582       case scUMaxExpr: {
    583         const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
    584         const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
    585 
    586         // Lexicographically compare n-ary expressions.
    587         unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
    588         if (LNumOps != RNumOps)
    589           return (int)LNumOps - (int)RNumOps;
    590 
    591         for (unsigned i = 0; i != LNumOps; ++i) {
    592           if (i >= RNumOps)
    593             return 1;
    594           long X = compare(LC->getOperand(i), RC->getOperand(i));
    595           if (X != 0)
    596             return X;
    597         }
    598         return (int)LNumOps - (int)RNumOps;
    599       }
    600 
    601       case scUDivExpr: {
    602         const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
    603         const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
    604 
    605         // Lexicographically compare udiv expressions.
    606         long X = compare(LC->getLHS(), RC->getLHS());
    607         if (X != 0)
    608           return X;
    609         return compare(LC->getRHS(), RC->getRHS());
    610       }
    611 
    612       case scTruncate:
    613       case scZeroExtend:
    614       case scSignExtend: {
    615         const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
    616         const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
    617 
    618         // Compare cast expressions by operand.
    619         return compare(LC->getOperand(), RC->getOperand());
    620       }
    621 
    622       default:
    623         llvm_unreachable("Unknown SCEV kind!");
    624       }
    625     }
    626   };
    627 }
    628 
    629 /// GroupByComplexity - Given a list of SCEV objects, order them by their
    630 /// complexity, and group objects of the same complexity together by value.
    631 /// When this routine is finished, we know that any duplicates in the vector are
    632 /// consecutive and that complexity is monotonically increasing.
    633 ///
    634 /// Note that we go take special precautions to ensure that we get deterministic
    635 /// results from this routine.  In other words, we don't want the results of
    636 /// this to depend on where the addresses of various SCEV objects happened to
    637 /// land in memory.
    638 ///
    639 static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
    640                               LoopInfo *LI) {
    641   if (Ops.size() < 2) return;  // Noop
    642   if (Ops.size() == 2) {
    643     // This is the common case, which also happens to be trivially simple.
    644     // Special case it.
    645     const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
    646     if (SCEVComplexityCompare(LI)(RHS, LHS))
    647       std::swap(LHS, RHS);
    648     return;
    649   }
    650 
    651   // Do the rough sort by complexity.
    652   std::stable_sort(Ops.begin(), Ops.end(), SCEVComplexityCompare(LI));
    653 
    654   // Now that we are sorted by complexity, group elements of the same
    655   // complexity.  Note that this is, at worst, N^2, but the vector is likely to
    656   // be extremely short in practice.  Note that we take this approach because we
    657   // do not want to depend on the addresses of the objects we are grouping.
    658   for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) {
    659     const SCEV *S = Ops[i];
    660     unsigned Complexity = S->getSCEVType();
    661 
    662     // If there are any objects of the same complexity and same value as this
    663     // one, group them.
    664     for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) {
    665       if (Ops[j] == S) { // Found a duplicate.
    666         // Move it to immediately after i'th element.
    667         std::swap(Ops[i+1], Ops[j]);
    668         ++i;   // no need to rescan it.
    669         if (i == e-2) return;  // Done!
    670       }
    671     }
    672   }
    673 }
    674 
    675 
    676 
    677 //===----------------------------------------------------------------------===//
    678 //                      Simple SCEV method implementations
    679 //===----------------------------------------------------------------------===//
    680 
    681 /// BinomialCoefficient - Compute BC(It, K).  The result has width W.
    682 /// Assume, K > 0.
    683 static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
    684                                        ScalarEvolution &SE,
    685                                        Type *ResultTy) {
    686   // Handle the simplest case efficiently.
    687   if (K == 1)
    688     return SE.getTruncateOrZeroExtend(It, ResultTy);
    689 
    690   // We are using the following formula for BC(It, K):
    691   //
    692   //   BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K!
    693   //
    694   // Suppose, W is the bitwidth of the return value.  We must be prepared for
    695   // overflow.  Hence, we must assure that the result of our computation is
    696   // equal to the accurate one modulo 2^W.  Unfortunately, division isn't
    697   // safe in modular arithmetic.
    698   //
    699   // However, this code doesn't use exactly that formula; the formula it uses
    700   // is something like the following, where T is the number of factors of 2 in
    701   // K! (i.e. trailing zeros in the binary representation of K!), and ^ is
    702   // exponentiation:
    703   //
    704   //   BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T)
    705   //
    706   // This formula is trivially equivalent to the previous formula.  However,
    707   // this formula can be implemented much more efficiently.  The trick is that
    708   // K! / 2^T is odd, and exact division by an odd number *is* safe in modular
    709   // arithmetic.  To do exact division in modular arithmetic, all we have
    710   // to do is multiply by the inverse.  Therefore, this step can be done at
    711   // width W.
    712   //
    713   // The next issue is how to safely do the division by 2^T.  The way this
    714   // is done is by doing the multiplication step at a width of at least W + T
    715   // bits.  This way, the bottom W+T bits of the product are accurate. Then,
    716   // when we perform the division by 2^T (which is equivalent to a right shift
    717   // by T), the bottom W bits are accurate.  Extra bits are okay; they'll get
    718   // truncated out after the division by 2^T.
    719   //
    720   // In comparison to just directly using the first formula, this technique
    721   // is much more efficient; using the first formula requires W * K bits,
    722   // but this formula less than W + K bits. Also, the first formula requires
    723   // a division step, whereas this formula only requires multiplies and shifts.
    724   //
    725   // It doesn't matter whether the subtraction step is done in the calculation
    726   // width or the input iteration count's width; if the subtraction overflows,
    727   // the result must be zero anyway.  We prefer here to do it in the width of
    728   // the induction variable because it helps a lot for certain cases; CodeGen
    729   // isn't smart enough to ignore the overflow, which leads to much less
    730   // efficient code if the width of the subtraction is wider than the native
    731   // register width.
    732   //
    733   // (It's possible to not widen at all by pulling out factors of 2 before
    734   // the multiplication; for example, K=2 can be calculated as
    735   // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires
    736   // extra arithmetic, so it's not an obvious win, and it gets
    737   // much more complicated for K > 3.)
    738 
    739   // Protection from insane SCEVs; this bound is conservative,
    740   // but it probably doesn't matter.
    741   if (K > 1000)
    742     return SE.getCouldNotCompute();
    743 
    744   unsigned W = SE.getTypeSizeInBits(ResultTy);
    745 
    746   // Calculate K! / 2^T and T; we divide out the factors of two before
    747   // multiplying for calculating K! / 2^T to avoid overflow.
    748   // Other overflow doesn't matter because we only care about the bottom
    749   // W bits of the result.
    750   APInt OddFactorial(W, 1);
    751   unsigned T = 1;
    752   for (unsigned i = 3; i <= K; ++i) {
    753     APInt Mult(W, i);
    754     unsigned TwoFactors = Mult.countTrailingZeros();
    755     T += TwoFactors;
    756     Mult = Mult.lshr(TwoFactors);
    757     OddFactorial *= Mult;
    758   }
    759 
    760   // We need at least W + T bits for the multiplication step
    761   unsigned CalculationBits = W + T;
    762 
    763   // Calculate 2^T, at width T+W.
    764   APInt DivFactor = APInt::getOneBitSet(CalculationBits, T);
    765 
    766   // Calculate the multiplicative inverse of K! / 2^T;
    767   // this multiplication factor will perform the exact division by
    768   // K! / 2^T.
    769   APInt Mod = APInt::getSignedMinValue(W+1);
    770   APInt MultiplyFactor = OddFactorial.zext(W+1);
    771   MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod);
    772   MultiplyFactor = MultiplyFactor.trunc(W);
    773 
    774   // Calculate the product, at width T+W
    775   IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
    776                                                       CalculationBits);
    777   const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
    778   for (unsigned i = 1; i != K; ++i) {
    779     const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i));
    780     Dividend = SE.getMulExpr(Dividend,
    781                              SE.getTruncateOrZeroExtend(S, CalculationTy));
    782   }
    783 
    784   // Divide by 2^T
    785   const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor));
    786 
    787   // Truncate the result, and divide by K! / 2^T.
    788 
    789   return SE.getMulExpr(SE.getConstant(MultiplyFactor),
    790                        SE.getTruncateOrZeroExtend(DivResult, ResultTy));
    791 }
    792 
    793 /// evaluateAtIteration - Return the value of this chain of recurrences at
    794 /// the specified iteration number.  We can evaluate this recurrence by
    795 /// multiplying each element in the chain by the binomial coefficient
    796 /// corresponding to it.  In other words, we can evaluate {A,+,B,+,C,+,D} as:
    797 ///
    798 ///   A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3)
    799 ///
    800 /// where BC(It, k) stands for binomial coefficient.
    801 ///
    802 const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
    803                                                 ScalarEvolution &SE) const {
    804   const SCEV *Result = getStart();
    805   for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
    806     // The computation is correct in the face of overflow provided that the
    807     // multiplication is performed _after_ the evaluation of the binomial
    808     // coefficient.
    809     const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType());
    810     if (isa<SCEVCouldNotCompute>(Coeff))
    811       return Coeff;
    812 
    813     Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff));
    814   }
    815   return Result;
    816 }
    817 
    818 //===----------------------------------------------------------------------===//
    819 //                    SCEV Expression folder implementations
    820 //===----------------------------------------------------------------------===//
    821 
    822 const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
    823                                              Type *Ty) {
    824   assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
    825          "This is not a truncating conversion!");
    826   assert(isSCEVable(Ty) &&
    827          "This is not a conversion to a SCEVable type!");
    828   Ty = getEffectiveSCEVType(Ty);
    829 
    830   FoldingSetNodeID ID;
    831   ID.AddInteger(scTruncate);
    832   ID.AddPointer(Op);
    833   ID.AddPointer(Ty);
    834   void *IP = 0;
    835   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
    836 
    837   // Fold if the operand is constant.
    838   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
    839     return getConstant(
    840       cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty)));
    841 
    842   // trunc(trunc(x)) --> trunc(x)
    843   if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
    844     return getTruncateExpr(ST->getOperand(), Ty);
    845 
    846   // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
    847   if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
    848     return getTruncateOrSignExtend(SS->getOperand(), Ty);
    849 
    850   // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
    851   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
    852     return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
    853 
    854   // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can
    855   // eliminate all the truncates.
    856   if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) {
    857     SmallVector<const SCEV *, 4> Operands;
    858     bool hasTrunc = false;
    859     for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) {
    860       const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty);
    861       hasTrunc = isa<SCEVTruncateExpr>(S);
    862       Operands.push_back(S);
    863     }
    864     if (!hasTrunc)
    865       return getAddExpr(Operands);
    866     UniqueSCEVs.FindNodeOrInsertPos(ID, IP);  // Mutates IP, returns NULL.
    867   }
    868 
    869   // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can
    870   // eliminate all the truncates.
    871   if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) {
    872     SmallVector<const SCEV *, 4> Operands;
    873     bool hasTrunc = false;
    874     for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) {
    875       const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty);
    876       hasTrunc = isa<SCEVTruncateExpr>(S);
    877       Operands.push_back(S);
    878     }
    879     if (!hasTrunc)
    880       return getMulExpr(Operands);
    881     UniqueSCEVs.FindNodeOrInsertPos(ID, IP);  // Mutates IP, returns NULL.
    882   }
    883 
    884   // If the input value is a chrec scev, truncate the chrec's operands.
    885   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
    886     SmallVector<const SCEV *, 4> Operands;
    887     for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
    888       Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty));
    889     return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
    890   }
    891 
    892   // The cast wasn't folded; create an explicit cast node. We can reuse
    893   // the existing insert position since if we get here, we won't have
    894   // made any changes which would invalidate it.
    895   SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator),
    896                                                  Op, Ty);
    897   UniqueSCEVs.InsertNode(S, IP);
    898   return S;
    899 }
    900 
    901 const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
    902                                                Type *Ty) {
    903   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
    904          "This is not an extending conversion!");
    905   assert(isSCEVable(Ty) &&
    906          "This is not a conversion to a SCEVable type!");
    907   Ty = getEffectiveSCEVType(Ty);
    908 
    909   // Fold if the operand is constant.
    910   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
    911     return getConstant(
    912       cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));
    913 
    914   // zext(zext(x)) --> zext(x)
    915   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
    916     return getZeroExtendExpr(SZ->getOperand(), Ty);
    917 
    918   // Before doing any expensive analysis, check to see if we've already
    919   // computed a SCEV for this Op and Ty.
    920   FoldingSetNodeID ID;
    921   ID.AddInteger(scZeroExtend);
    922   ID.AddPointer(Op);
    923   ID.AddPointer(Ty);
    924   void *IP = 0;
    925   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
    926 
    927   // zext(trunc(x)) --> zext(x) or x or trunc(x)
    928   if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
    929     // It's possible the bits taken off by the truncate were all zero bits. If
    930     // so, we should be able to simplify this further.
    931     const SCEV *X = ST->getOperand();
    932     ConstantRange CR = getUnsignedRange(X);
    933     unsigned TruncBits = getTypeSizeInBits(ST->getType());
    934     unsigned NewBits = getTypeSizeInBits(Ty);
    935     if (CR.truncate(TruncBits).zeroExtend(NewBits).contains(
    936             CR.zextOrTrunc(NewBits)))
    937       return getTruncateOrZeroExtend(X, Ty);
    938   }
    939 
    940   // If the input value is a chrec scev, and we can prove that the value
    941   // did not overflow the old, smaller, value, we can zero extend all of the
    942   // operands (often constants).  This allows analysis of something like
    943   // this:  for (unsigned char X = 0; X < 100; ++X) { int Y = X; }
    944   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
    945     if (AR->isAffine()) {
    946       const SCEV *Start = AR->getStart();
    947       const SCEV *Step = AR->getStepRecurrence(*this);
    948       unsigned BitWidth = getTypeSizeInBits(AR->getType());
    949       const Loop *L = AR->getLoop();
    950 
    951       // If we have special knowledge that this addrec won't overflow,
    952       // we don't need to do any further analysis.
    953       if (AR->getNoWrapFlags(SCEV::FlagNUW))
    954         return getAddRecExpr(getZeroExtendExpr(Start, Ty),
    955                              getZeroExtendExpr(Step, Ty),
    956                              L, AR->getNoWrapFlags());
    957 
    958       // Check whether the backedge-taken count is SCEVCouldNotCompute.
    959       // Note that this serves two purposes: It filters out loops that are
    960       // simply not analyzable, and it covers the case where this code is
    961       // being called from within backedge-taken count analysis, such that
    962       // attempting to ask for the backedge-taken count would likely result
    963       // in infinite recursion. In the later case, the analysis code will
    964       // cope with a conservative value, and it will take care to purge
    965       // that value once it has finished.
    966       const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
    967       if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
    968         // Manually compute the final value for AR, checking for
    969         // overflow.
    970 
    971         // Check whether the backedge-taken count can be losslessly casted to
    972         // the addrec's type. The count is always unsigned.
    973         const SCEV *CastedMaxBECount =
    974           getTruncateOrZeroExtend(MaxBECount, Start->getType());
    975         const SCEV *RecastedMaxBECount =
    976           getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
    977         if (MaxBECount == RecastedMaxBECount) {
    978           Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
    979           // Check whether Start+Step*MaxBECount has no unsigned overflow.
    980           const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
    981           const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul), WideTy);
    982           const SCEV *WideStart = getZeroExtendExpr(Start, WideTy);
    983           const SCEV *WideMaxBECount =
    984             getZeroExtendExpr(CastedMaxBECount, WideTy);
    985           const SCEV *OperandExtendedAdd =
    986             getAddExpr(WideStart,
    987                        getMulExpr(WideMaxBECount,
    988                                   getZeroExtendExpr(Step, WideTy)));
    989           if (ZAdd == OperandExtendedAdd) {
    990             // Cache knowledge of AR NUW, which is propagated to this AddRec.
    991             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
    992             // Return the expression with the addrec on the outside.
    993             return getAddRecExpr(getZeroExtendExpr(Start, Ty),
    994                                  getZeroExtendExpr(Step, Ty),
    995                                  L, AR->getNoWrapFlags());
    996           }
    997           // Similar to above, only this time treat the step value as signed.
    998           // This covers loops that count down.
    999           OperandExtendedAdd =
   1000             getAddExpr(WideStart,
   1001                        getMulExpr(WideMaxBECount,
   1002                                   getSignExtendExpr(Step, WideTy)));
   1003           if (ZAdd == OperandExtendedAdd) {
   1004             // Cache knowledge of AR NW, which is propagated to this AddRec.
   1005             // Negative step causes unsigned wrap, but it still can't self-wrap.
   1006             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
   1007             // Return the expression with the addrec on the outside.
   1008             return getAddRecExpr(getZeroExtendExpr(Start, Ty),
   1009                                  getSignExtendExpr(Step, Ty),
   1010                                  L, AR->getNoWrapFlags());
   1011           }
   1012         }
   1013 
   1014         // If the backedge is guarded by a comparison with the pre-inc value
   1015         // the addrec is safe. Also, if the entry is guarded by a comparison
   1016         // with the start value and the backedge is guarded by a comparison
   1017         // with the post-inc value, the addrec is safe.
   1018         if (isKnownPositive(Step)) {
   1019           const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
   1020                                       getUnsignedRange(Step).getUnsignedMax());
   1021           if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
   1022               (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
   1023                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT,
   1024                                            AR->getPostIncExpr(*this), N))) {
   1025             // Cache knowledge of AR NUW, which is propagated to this AddRec.
   1026             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
   1027             // Return the expression with the addrec on the outside.
   1028             return getAddRecExpr(getZeroExtendExpr(Start, Ty),
   1029                                  getZeroExtendExpr(Step, Ty),
   1030                                  L, AR->getNoWrapFlags());
   1031           }
   1032         } else if (isKnownNegative(Step)) {
   1033           const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
   1034                                       getSignedRange(Step).getSignedMin());
   1035           if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
   1036               (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) &&
   1037                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT,
   1038                                            AR->getPostIncExpr(*this), N))) {
   1039             // Cache knowledge of AR NW, which is propagated to this AddRec.
   1040             // Negative step causes unsigned wrap, but it still can't self-wrap.
   1041             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
   1042             // Return the expression with the addrec on the outside.
   1043             return getAddRecExpr(getZeroExtendExpr(Start, Ty),
   1044                                  getSignExtendExpr(Step, Ty),
   1045                                  L, AR->getNoWrapFlags());
   1046           }
   1047         }
   1048       }
   1049     }
   1050 
   1051   // The cast wasn't folded; create an explicit cast node.
   1052   // Recompute the insert position, as it may have been invalidated.
   1053   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   1054   SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
   1055                                                    Op, Ty);
   1056   UniqueSCEVs.InsertNode(S, IP);
   1057   return S;
   1058 }
   1059 
   1060 // Get the limit of a recurrence such that incrementing by Step cannot cause
   1061 // signed overflow as long as the value of the recurrence within the loop does
   1062 // not exceed this limit before incrementing.
   1063 static const SCEV *getOverflowLimitForStep(const SCEV *Step,
   1064                                            ICmpInst::Predicate *Pred,
   1065                                            ScalarEvolution *SE) {
   1066   unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
   1067   if (SE->isKnownPositive(Step)) {
   1068     *Pred = ICmpInst::ICMP_SLT;
   1069     return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
   1070                            SE->getSignedRange(Step).getSignedMax());
   1071   }
   1072   if (SE->isKnownNegative(Step)) {
   1073     *Pred = ICmpInst::ICMP_SGT;
   1074     return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
   1075                        SE->getSignedRange(Step).getSignedMin());
   1076   }
   1077   return 0;
   1078 }
   1079 
   1080 // The recurrence AR has been shown to have no signed wrap. Typically, if we can
   1081 // prove NSW for AR, then we can just as easily prove NSW for its preincrement
   1082 // or postincrement sibling. This allows normalizing a sign extended AddRec as
   1083 // such: {sext(Step + Start),+,Step} => {(Step + sext(Start),+,Step} As a
   1084 // result, the expression "Step + sext(PreIncAR)" is congruent with
   1085 // "sext(PostIncAR)"
   1086 static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
   1087                                             Type *Ty,
   1088                                             ScalarEvolution *SE) {
   1089   const Loop *L = AR->getLoop();
   1090   const SCEV *Start = AR->getStart();
   1091   const SCEV *Step = AR->getStepRecurrence(*SE);
   1092 
   1093   // Check for a simple looking step prior to loop entry.
   1094   const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
   1095   if (!SA)
   1096     return 0;
   1097 
   1098   // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
   1099   // subtraction is expensive. For this purpose, perform a quick and dirty
   1100   // difference, by checking for Step in the operand list.
   1101   SmallVector<const SCEV *, 4> DiffOps;
   1102   for (SCEVAddExpr::op_iterator I = SA->op_begin(), E = SA->op_end();
   1103        I != E; ++I) {
   1104     if (*I != Step)
   1105       DiffOps.push_back(*I);
   1106   }
   1107   if (DiffOps.size() == SA->getNumOperands())
   1108     return 0;
   1109 
   1110   // This is a postinc AR. Check for overflow on the preinc recurrence using the
   1111   // same three conditions that getSignExtendedExpr checks.
   1112 
   1113   // 1. NSW flags on the step increment.
   1114   const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags());
   1115   const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
   1116     SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
   1117 
   1118   if (PreAR && PreAR->getNoWrapFlags(SCEV::FlagNSW))
   1119     return PreStart;
   1120 
   1121   // 2. Direct overflow check on the step operation's expression.
   1122   unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
   1123   Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
   1124   const SCEV *OperandExtendedStart =
   1125     SE->getAddExpr(SE->getSignExtendExpr(PreStart, WideTy),
   1126                    SE->getSignExtendExpr(Step, WideTy));
   1127   if (SE->getSignExtendExpr(Start, WideTy) == OperandExtendedStart) {
   1128     // Cache knowledge of PreAR NSW.
   1129     if (PreAR)
   1130       const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(SCEV::FlagNSW);
   1131     // FIXME: this optimization needs a unit test
   1132     DEBUG(dbgs() << "SCEV: untested prestart overflow check\n");
   1133     return PreStart;
   1134   }
   1135 
   1136   // 3. Loop precondition.
   1137   ICmpInst::Predicate Pred;
   1138   const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, SE);
   1139 
   1140   if (OverflowLimit &&
   1141       SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
   1142     return PreStart;
   1143   }
   1144   return 0;
   1145 }
   1146 
   1147 // Get the normalized sign-extended expression for this AddRec's Start.
   1148 static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR,
   1149                                             Type *Ty,
   1150                                             ScalarEvolution *SE) {
   1151   const SCEV *PreStart = getPreStartForSignExtend(AR, Ty, SE);
   1152   if (!PreStart)
   1153     return SE->getSignExtendExpr(AR->getStart(), Ty);
   1154 
   1155   return SE->getAddExpr(SE->getSignExtendExpr(AR->getStepRecurrence(*SE), Ty),
   1156                         SE->getSignExtendExpr(PreStart, Ty));
   1157 }
   1158 
   1159 const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
   1160                                                Type *Ty) {
   1161   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
   1162          "This is not an extending conversion!");
   1163   assert(isSCEVable(Ty) &&
   1164          "This is not a conversion to a SCEVable type!");
   1165   Ty = getEffectiveSCEVType(Ty);
   1166 
   1167   // Fold if the operand is constant.
   1168   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
   1169     return getConstant(
   1170       cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));
   1171 
   1172   // sext(sext(x)) --> sext(x)
   1173   if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
   1174     return getSignExtendExpr(SS->getOperand(), Ty);
   1175 
   1176   // sext(zext(x)) --> zext(x)
   1177   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
   1178     return getZeroExtendExpr(SZ->getOperand(), Ty);
   1179 
   1180   // Before doing any expensive analysis, check to see if we've already
   1181   // computed a SCEV for this Op and Ty.
   1182   FoldingSetNodeID ID;
   1183   ID.AddInteger(scSignExtend);
   1184   ID.AddPointer(Op);
   1185   ID.AddPointer(Ty);
   1186   void *IP = 0;
   1187   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   1188 
   1189   // If the input value is provably positive, build a zext instead.
   1190   if (isKnownNonNegative(Op))
   1191     return getZeroExtendExpr(Op, Ty);
   1192 
   1193   // sext(trunc(x)) --> sext(x) or x or trunc(x)
   1194   if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
   1195     // It's possible the bits taken off by the truncate were all sign bits. If
   1196     // so, we should be able to simplify this further.
   1197     const SCEV *X = ST->getOperand();
   1198     ConstantRange CR = getSignedRange(X);
   1199     unsigned TruncBits = getTypeSizeInBits(ST->getType());
   1200     unsigned NewBits = getTypeSizeInBits(Ty);
   1201     if (CR.truncate(TruncBits).signExtend(NewBits).contains(
   1202             CR.sextOrTrunc(NewBits)))
   1203       return getTruncateOrSignExtend(X, Ty);
   1204   }
   1205 
   1206   // If the input value is a chrec scev, and we can prove that the value
   1207   // did not overflow the old, smaller, value, we can sign extend all of the
   1208   // operands (often constants).  This allows analysis of something like
   1209   // this:  for (signed char X = 0; X < 100; ++X) { int Y = X; }
   1210   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
   1211     if (AR->isAffine()) {
   1212       const SCEV *Start = AR->getStart();
   1213       const SCEV *Step = AR->getStepRecurrence(*this);
   1214       unsigned BitWidth = getTypeSizeInBits(AR->getType());
   1215       const Loop *L = AR->getLoop();
   1216 
   1217       // If we have special knowledge that this addrec won't overflow,
   1218       // we don't need to do any further analysis.
   1219       if (AR->getNoWrapFlags(SCEV::FlagNSW))
   1220         return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
   1221                              getSignExtendExpr(Step, Ty),
   1222                              L, SCEV::FlagNSW);
   1223 
   1224       // Check whether the backedge-taken count is SCEVCouldNotCompute.
   1225       // Note that this serves two purposes: It filters out loops that are
   1226       // simply not analyzable, and it covers the case where this code is
   1227       // being called from within backedge-taken count analysis, such that
   1228       // attempting to ask for the backedge-taken count would likely result
   1229       // in infinite recursion. In the later case, the analysis code will
   1230       // cope with a conservative value, and it will take care to purge
   1231       // that value once it has finished.
   1232       const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
   1233       if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
   1234         // Manually compute the final value for AR, checking for
   1235         // overflow.
   1236 
   1237         // Check whether the backedge-taken count can be losslessly casted to
   1238         // the addrec's type. The count is always unsigned.
   1239         const SCEV *CastedMaxBECount =
   1240           getTruncateOrZeroExtend(MaxBECount, Start->getType());
   1241         const SCEV *RecastedMaxBECount =
   1242           getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
   1243         if (MaxBECount == RecastedMaxBECount) {
   1244           Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
   1245           // Check whether Start+Step*MaxBECount has no signed overflow.
   1246           const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
   1247           const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul), WideTy);
   1248           const SCEV *WideStart = getSignExtendExpr(Start, WideTy);
   1249           const SCEV *WideMaxBECount =
   1250             getZeroExtendExpr(CastedMaxBECount, WideTy);
   1251           const SCEV *OperandExtendedAdd =
   1252             getAddExpr(WideStart,
   1253                        getMulExpr(WideMaxBECount,
   1254                                   getSignExtendExpr(Step, WideTy)));
   1255           if (SAdd == OperandExtendedAdd) {
   1256             // Cache knowledge of AR NSW, which is propagated to this AddRec.
   1257             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
   1258             // Return the expression with the addrec on the outside.
   1259             return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
   1260                                  getSignExtendExpr(Step, Ty),
   1261                                  L, AR->getNoWrapFlags());
   1262           }
   1263           // Similar to above, only this time treat the step value as unsigned.
   1264           // This covers loops that count up with an unsigned step.
   1265           OperandExtendedAdd =
   1266             getAddExpr(WideStart,
   1267                        getMulExpr(WideMaxBECount,
   1268                                   getZeroExtendExpr(Step, WideTy)));
   1269           if (SAdd == OperandExtendedAdd) {
   1270             // Cache knowledge of AR NSW, which is propagated to this AddRec.
   1271             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
   1272             // Return the expression with the addrec on the outside.
   1273             return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
   1274                                  getZeroExtendExpr(Step, Ty),
   1275                                  L, AR->getNoWrapFlags());
   1276           }
   1277         }
   1278 
   1279         // If the backedge is guarded by a comparison with the pre-inc value
   1280         // the addrec is safe. Also, if the entry is guarded by a comparison
   1281         // with the start value and the backedge is guarded by a comparison
   1282         // with the post-inc value, the addrec is safe.
   1283         ICmpInst::Predicate Pred;
   1284         const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, this);
   1285         if (OverflowLimit &&
   1286             (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
   1287              (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) &&
   1288               isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this),
   1289                                           OverflowLimit)))) {
   1290           // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
   1291           const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
   1292           return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
   1293                                getSignExtendExpr(Step, Ty),
   1294                                L, AR->getNoWrapFlags());
   1295         }
   1296       }
   1297     }
   1298 
   1299   // The cast wasn't folded; create an explicit cast node.
   1300   // Recompute the insert position, as it may have been invalidated.
   1301   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   1302   SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
   1303                                                    Op, Ty);
   1304   UniqueSCEVs.InsertNode(S, IP);
   1305   return S;
   1306 }
   1307 
   1308 /// getAnyExtendExpr - Return a SCEV for the given operand extended with
   1309 /// unspecified bits out to the given type.
   1310 ///
   1311 const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
   1312                                               Type *Ty) {
   1313   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
   1314          "This is not an extending conversion!");
   1315   assert(isSCEVable(Ty) &&
   1316          "This is not a conversion to a SCEVable type!");
   1317   Ty = getEffectiveSCEVType(Ty);
   1318 
   1319   // Sign-extend negative constants.
   1320   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
   1321     if (SC->getValue()->getValue().isNegative())
   1322       return getSignExtendExpr(Op, Ty);
   1323 
   1324   // Peel off a truncate cast.
   1325   if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) {
   1326     const SCEV *NewOp = T->getOperand();
   1327     if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty))
   1328       return getAnyExtendExpr(NewOp, Ty);
   1329     return getTruncateOrNoop(NewOp, Ty);
   1330   }
   1331 
   1332   // Next try a zext cast. If the cast is folded, use it.
   1333   const SCEV *ZExt = getZeroExtendExpr(Op, Ty);
   1334   if (!isa<SCEVZeroExtendExpr>(ZExt))
   1335     return ZExt;
   1336 
   1337   // Next try a sext cast. If the cast is folded, use it.
   1338   const SCEV *SExt = getSignExtendExpr(Op, Ty);
   1339   if (!isa<SCEVSignExtendExpr>(SExt))
   1340     return SExt;
   1341 
   1342   // Force the cast to be folded into the operands of an addrec.
   1343   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
   1344     SmallVector<const SCEV *, 4> Ops;
   1345     for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
   1346          I != E; ++I)
   1347       Ops.push_back(getAnyExtendExpr(*I, Ty));
   1348     return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW);
   1349   }
   1350 
   1351   // If the expression is obviously signed, use the sext cast value.
   1352   if (isa<SCEVSMaxExpr>(Op))
   1353     return SExt;
   1354 
   1355   // Absent any other information, use the zext cast value.
   1356   return ZExt;
   1357 }
   1358 
   1359 /// CollectAddOperandsWithScales - Process the given Ops list, which is
   1360 /// a list of operands to be added under the given scale, update the given
   1361 /// map. This is a helper function for getAddRecExpr. As an example of
   1362 /// what it does, given a sequence of operands that would form an add
   1363 /// expression like this:
   1364 ///
   1365 ///    m + n + 13 + (A * (o + p + (B * q + m + 29))) + r + (-1 * r)
   1366 ///
   1367 /// where A and B are constants, update the map with these values:
   1368 ///
   1369 ///    (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0)
   1370 ///
   1371 /// and add 13 + A*B*29 to AccumulatedConstant.
   1372 /// This will allow getAddRecExpr to produce this:
   1373 ///
   1374 ///    13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B)
   1375 ///
   1376 /// This form often exposes folding opportunities that are hidden in
   1377 /// the original operand list.
   1378 ///
   1379 /// Return true iff it appears that any interesting folding opportunities
   1380 /// may be exposed. This helps getAddRecExpr short-circuit extra work in
   1381 /// the common case where no interesting opportunities are present, and
   1382 /// is also used as a check to avoid infinite recursion.
   1383 ///
   1384 static bool
   1385 CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
   1386                              SmallVectorImpl<const SCEV *> &NewOps,
   1387                              APInt &AccumulatedConstant,
   1388                              const SCEV *const *Ops, size_t NumOperands,
   1389                              const APInt &Scale,
   1390                              ScalarEvolution &SE) {
   1391   bool Interesting = false;
   1392 
   1393   // Iterate over the add operands. They are sorted, with constants first.
   1394   unsigned i = 0;
   1395   while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
   1396     ++i;
   1397     // Pull a buried constant out to the outside.
   1398     if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero())
   1399       Interesting = true;
   1400     AccumulatedConstant += Scale * C->getValue()->getValue();
   1401   }
   1402 
   1403   // Next comes everything else. We're especially interested in multiplies
   1404   // here, but they're in the middle, so just visit the rest with one loop.
   1405   for (; i != NumOperands; ++i) {
   1406     const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
   1407     if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
   1408       APInt NewScale =
   1409         Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue();
   1410       if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
   1411         // A multiplication of a constant with another add; recurse.
   1412         const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1));
   1413         Interesting |=
   1414           CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
   1415                                        Add->op_begin(), Add->getNumOperands(),
   1416                                        NewScale, SE);
   1417       } else {
   1418         // A multiplication of a constant with some other value. Update
   1419         // the map.
   1420         SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
   1421         const SCEV *Key = SE.getMulExpr(MulOps);
   1422         std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
   1423           M.insert(std::make_pair(Key, NewScale));
   1424         if (Pair.second) {
   1425           NewOps.push_back(Pair.first->first);
   1426         } else {
   1427           Pair.first->second += NewScale;
   1428           // The map already had an entry for this value, which may indicate
   1429           // a folding opportunity.
   1430           Interesting = true;
   1431         }
   1432       }
   1433     } else {
   1434       // An ordinary operand. Update the map.
   1435       std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
   1436         M.insert(std::make_pair(Ops[i], Scale));
   1437       if (Pair.second) {
   1438         NewOps.push_back(Pair.first->first);
   1439       } else {
   1440         Pair.first->second += Scale;
   1441         // The map already had an entry for this value, which may indicate
   1442         // a folding opportunity.
   1443         Interesting = true;
   1444       }
   1445     }
   1446   }
   1447 
   1448   return Interesting;
   1449 }
   1450 
   1451 namespace {
   1452   struct APIntCompare {
   1453     bool operator()(const APInt &LHS, const APInt &RHS) const {
   1454       return LHS.ult(RHS);
   1455     }
   1456   };
   1457 }
   1458 
   1459 /// getAddExpr - Get a canonical add expression, or something simpler if
   1460 /// possible.
   1461 const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
   1462                                         SCEV::NoWrapFlags Flags) {
   1463   assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) &&
   1464          "only nuw or nsw allowed");
   1465   assert(!Ops.empty() && "Cannot get empty add!");
   1466   if (Ops.size() == 1) return Ops[0];
   1467 #ifndef NDEBUG
   1468   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
   1469   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
   1470     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
   1471            "SCEVAddExpr operand types don't match!");
   1472 #endif
   1473 
   1474   // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
   1475   // And vice-versa.
   1476   int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
   1477   SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
   1478   if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
   1479     bool All = true;
   1480     for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
   1481          E = Ops.end(); I != E; ++I)
   1482       if (!isKnownNonNegative(*I)) {
   1483         All = false;
   1484         break;
   1485       }
   1486     if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
   1487   }
   1488 
   1489   // Sort by complexity, this groups all similar expression types together.
   1490   GroupByComplexity(Ops, LI);
   1491 
   1492   // If there are any constants, fold them together.
   1493   unsigned Idx = 0;
   1494   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
   1495     ++Idx;
   1496     assert(Idx < Ops.size());
   1497     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
   1498       // We found two constants, fold them together!
   1499       Ops[0] = getConstant(LHSC->getValue()->getValue() +
   1500                            RHSC->getValue()->getValue());
   1501       if (Ops.size() == 2) return Ops[0];
   1502       Ops.erase(Ops.begin()+1);  // Erase the folded element
   1503       LHSC = cast<SCEVConstant>(Ops[0]);
   1504     }
   1505 
   1506     // If we are left with a constant zero being added, strip it off.
   1507     if (LHSC->getValue()->isZero()) {
   1508       Ops.erase(Ops.begin());
   1509       --Idx;
   1510     }
   1511 
   1512     if (Ops.size() == 1) return Ops[0];
   1513   }
   1514 
   1515   // Okay, check to see if the same value occurs in the operand list more than
   1516   // once.  If so, merge them together into an multiply expression.  Since we
   1517   // sorted the list, these values are required to be adjacent.
   1518   Type *Ty = Ops[0]->getType();
   1519   bool FoundMatch = false;
   1520   for (unsigned i = 0, e = Ops.size(); i != e-1; ++i)
   1521     if (Ops[i] == Ops[i+1]) {      //  X + Y + Y  -->  X + Y*2
   1522       // Scan ahead to count how many equal operands there are.
   1523       unsigned Count = 2;
   1524       while (i+Count != e && Ops[i+Count] == Ops[i])
   1525         ++Count;
   1526       // Merge the values into a multiply.
   1527       const SCEV *Scale = getConstant(Ty, Count);
   1528       const SCEV *Mul = getMulExpr(Scale, Ops[i]);
   1529       if (Ops.size() == Count)
   1530         return Mul;
   1531       Ops[i] = Mul;
   1532       Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count);
   1533       --i; e -= Count - 1;
   1534       FoundMatch = true;
   1535     }
   1536   if (FoundMatch)
   1537     return getAddExpr(Ops, Flags);
   1538 
   1539   // Check for truncates. If all the operands are truncated from the same
   1540   // type, see if factoring out the truncate would permit the result to be
   1541   // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n)
   1542   // if the contents of the resulting outer trunc fold to something simple.
   1543   for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) {
   1544     const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
   1545     Type *DstType = Trunc->getType();
   1546     Type *SrcType = Trunc->getOperand()->getType();
   1547     SmallVector<const SCEV *, 8> LargeOps;
   1548     bool Ok = true;
   1549     // Check all the operands to see if they can be represented in the
   1550     // source type of the truncate.
   1551     for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
   1552       if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) {
   1553         if (T->getOperand()->getType() != SrcType) {
   1554           Ok = false;
   1555           break;
   1556         }
   1557         LargeOps.push_back(T->getOperand());
   1558       } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
   1559         LargeOps.push_back(getAnyExtendExpr(C, SrcType));
   1560       } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
   1561         SmallVector<const SCEV *, 8> LargeMulOps;
   1562         for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
   1563           if (const SCEVTruncateExpr *T =
   1564                 dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
   1565             if (T->getOperand()->getType() != SrcType) {
   1566               Ok = false;
   1567               break;
   1568             }
   1569             LargeMulOps.push_back(T->getOperand());
   1570           } else if (const SCEVConstant *C =
   1571                        dyn_cast<SCEVConstant>(M->getOperand(j))) {
   1572             LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
   1573           } else {
   1574             Ok = false;
   1575             break;
   1576           }
   1577         }
   1578         if (Ok)
   1579           LargeOps.push_back(getMulExpr(LargeMulOps));
   1580       } else {
   1581         Ok = false;
   1582         break;
   1583       }
   1584     }
   1585     if (Ok) {
   1586       // Evaluate the expression in the larger type.
   1587       const SCEV *Fold = getAddExpr(LargeOps, Flags);
   1588       // If it folds to something simple, use it. Otherwise, don't.
   1589       if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
   1590         return getTruncateExpr(Fold, DstType);
   1591     }
   1592   }
   1593 
   1594   // Skip past any other cast SCEVs.
   1595   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr)
   1596     ++Idx;
   1597 
   1598   // If there are add operands they would be next.
   1599   if (Idx < Ops.size()) {
   1600     bool DeletedAdd = false;
   1601     while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) {
   1602       // If we have an add, expand the add operands onto the end of the operands
   1603       // list.
   1604       Ops.erase(Ops.begin()+Idx);
   1605       Ops.append(Add->op_begin(), Add->op_end());
   1606       DeletedAdd = true;
   1607     }
   1608 
   1609     // If we deleted at least one add, we added operands to the end of the list,
   1610     // and they are not necessarily sorted.  Recurse to resort and resimplify
   1611     // any operands we just acquired.
   1612     if (DeletedAdd)
   1613       return getAddExpr(Ops);
   1614   }
   1615 
   1616   // Skip over the add expression until we get to a multiply.
   1617   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
   1618     ++Idx;
   1619 
   1620   // Check to see if there are any folding opportunities present with
   1621   // operands multiplied by constant values.
   1622   if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) {
   1623     uint64_t BitWidth = getTypeSizeInBits(Ty);
   1624     DenseMap<const SCEV *, APInt> M;
   1625     SmallVector<const SCEV *, 8> NewOps;
   1626     APInt AccumulatedConstant(BitWidth, 0);
   1627     if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
   1628                                      Ops.data(), Ops.size(),
   1629                                      APInt(BitWidth, 1), *this)) {
   1630       // Some interesting folding opportunity is present, so its worthwhile to
   1631       // re-generate the operands list. Group the operands by constant scale,
   1632       // to avoid multiplying by the same constant scale multiple times.
   1633       std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
   1634       for (SmallVectorImpl<const SCEV *>::const_iterator I = NewOps.begin(),
   1635            E = NewOps.end(); I != E; ++I)
   1636         MulOpLists[M.find(*I)->second].push_back(*I);
   1637       // Re-generate the operands list.
   1638       Ops.clear();
   1639       if (AccumulatedConstant != 0)
   1640         Ops.push_back(getConstant(AccumulatedConstant));
   1641       for (std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare>::iterator
   1642            I = MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I)
   1643         if (I->first != 0)
   1644           Ops.push_back(getMulExpr(getConstant(I->first),
   1645                                    getAddExpr(I->second)));
   1646       if (Ops.empty())
   1647         return getConstant(Ty, 0);
   1648       if (Ops.size() == 1)
   1649         return Ops[0];
   1650       return getAddExpr(Ops);
   1651     }
   1652   }
   1653 
   1654   // If we are adding something to a multiply expression, make sure the
   1655   // something is not already an operand of the multiply.  If so, merge it into
   1656   // the multiply.
   1657   for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) {
   1658     const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]);
   1659     for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
   1660       const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
   1661       if (isa<SCEVConstant>(MulOpSCEV))
   1662         continue;
   1663       for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
   1664         if (MulOpSCEV == Ops[AddOp]) {
   1665           // Fold W + X + (X * Y * Z)  -->  W + (X * ((Y*Z)+1))
   1666           const SCEV *InnerMul = Mul->getOperand(MulOp == 0);
   1667           if (Mul->getNumOperands() != 2) {
   1668             // If the multiply has more than two operands, we must get the
   1669             // Y*Z term.
   1670             SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
   1671                                                 Mul->op_begin()+MulOp);
   1672             MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
   1673             InnerMul = getMulExpr(MulOps);
   1674           }
   1675           const SCEV *One = getConstant(Ty, 1);
   1676           const SCEV *AddOne = getAddExpr(One, InnerMul);
   1677           const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV);
   1678           if (Ops.size() == 2) return OuterMul;
   1679           if (AddOp < Idx) {
   1680             Ops.erase(Ops.begin()+AddOp);
   1681             Ops.erase(Ops.begin()+Idx-1);
   1682           } else {
   1683             Ops.erase(Ops.begin()+Idx);
   1684             Ops.erase(Ops.begin()+AddOp-1);
   1685           }
   1686           Ops.push_back(OuterMul);
   1687           return getAddExpr(Ops);
   1688         }
   1689 
   1690       // Check this multiply against other multiplies being added together.
   1691       for (unsigned OtherMulIdx = Idx+1;
   1692            OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]);
   1693            ++OtherMulIdx) {
   1694         const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]);
   1695         // If MulOp occurs in OtherMul, we can fold the two multiplies
   1696         // together.
   1697         for (unsigned OMulOp = 0, e = OtherMul->getNumOperands();
   1698              OMulOp != e; ++OMulOp)
   1699           if (OtherMul->getOperand(OMulOp) == MulOpSCEV) {
   1700             // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
   1701             const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0);
   1702             if (Mul->getNumOperands() != 2) {
   1703               SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
   1704                                                   Mul->op_begin()+MulOp);
   1705               MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
   1706               InnerMul1 = getMulExpr(MulOps);
   1707             }
   1708             const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
   1709             if (OtherMul->getNumOperands() != 2) {
   1710               SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
   1711                                                   OtherMul->op_begin()+OMulOp);
   1712               MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end());
   1713               InnerMul2 = getMulExpr(MulOps);
   1714             }
   1715             const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2);
   1716             const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum);
   1717             if (Ops.size() == 2) return OuterMul;
   1718             Ops.erase(Ops.begin()+Idx);
   1719             Ops.erase(Ops.begin()+OtherMulIdx-1);
   1720             Ops.push_back(OuterMul);
   1721             return getAddExpr(Ops);
   1722           }
   1723       }
   1724     }
   1725   }
   1726 
   1727   // If there are any add recurrences in the operands list, see if any other
   1728   // added values are loop invariant.  If so, we can fold them into the
   1729   // recurrence.
   1730   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
   1731     ++Idx;
   1732 
   1733   // Scan over all recurrences, trying to fold loop invariants into them.
   1734   for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
   1735     // Scan all of the other operands to this add and add them to the vector if
   1736     // they are loop invariant w.r.t. the recurrence.
   1737     SmallVector<const SCEV *, 8> LIOps;
   1738     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
   1739     const Loop *AddRecLoop = AddRec->getLoop();
   1740     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
   1741       if (isLoopInvariant(Ops[i], AddRecLoop)) {
   1742         LIOps.push_back(Ops[i]);
   1743         Ops.erase(Ops.begin()+i);
   1744         --i; --e;
   1745       }
   1746 
   1747     // If we found some loop invariants, fold them into the recurrence.
   1748     if (!LIOps.empty()) {
   1749       //  NLI + LI + {Start,+,Step}  -->  NLI + {LI+Start,+,Step}
   1750       LIOps.push_back(AddRec->getStart());
   1751 
   1752       SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
   1753                                              AddRec->op_end());
   1754       AddRecOps[0] = getAddExpr(LIOps);
   1755 
   1756       // Build the new addrec. Propagate the NUW and NSW flags if both the
   1757       // outer add and the inner addrec are guaranteed to have no overflow.
   1758       // Always propagate NW.
   1759       Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW));
   1760       const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags);
   1761 
   1762       // If all of the other operands were loop invariant, we are done.
   1763       if (Ops.size() == 1) return NewRec;
   1764 
   1765       // Otherwise, add the folded AddRec by the non-invariant parts.
   1766       for (unsigned i = 0;; ++i)
   1767         if (Ops[i] == AddRec) {
   1768           Ops[i] = NewRec;
   1769           break;
   1770         }
   1771       return getAddExpr(Ops);
   1772     }
   1773 
   1774     // Okay, if there weren't any loop invariants to be folded, check to see if
   1775     // there are multiple AddRec's with the same loop induction variable being
   1776     // added together.  If so, we can fold them.
   1777     for (unsigned OtherIdx = Idx+1;
   1778          OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
   1779          ++OtherIdx)
   1780       if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
   1781         // Other + {A,+,B}<L> + {C,+,D}<L>  -->  Other + {A+C,+,B+D}<L>
   1782         SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
   1783                                                AddRec->op_end());
   1784         for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
   1785              ++OtherIdx)
   1786           if (const SCEVAddRecExpr *OtherAddRec =
   1787                 dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
   1788             if (OtherAddRec->getLoop() == AddRecLoop) {
   1789               for (unsigned i = 0, e = OtherAddRec->getNumOperands();
   1790                    i != e; ++i) {
   1791                 if (i >= AddRecOps.size()) {
   1792                   AddRecOps.append(OtherAddRec->op_begin()+i,
   1793                                    OtherAddRec->op_end());
   1794                   break;
   1795                 }
   1796                 AddRecOps[i] = getAddExpr(AddRecOps[i],
   1797                                           OtherAddRec->getOperand(i));
   1798               }
   1799               Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
   1800             }
   1801         // Step size has changed, so we cannot guarantee no self-wraparound.
   1802         Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
   1803         return getAddExpr(Ops);
   1804       }
   1805 
   1806     // Otherwise couldn't fold anything into this recurrence.  Move onto the
   1807     // next one.
   1808   }
   1809 
   1810   // Okay, it looks like we really DO need an add expr.  Check to see if we
   1811   // already have one, otherwise create a new one.
   1812   FoldingSetNodeID ID;
   1813   ID.AddInteger(scAddExpr);
   1814   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
   1815     ID.AddPointer(Ops[i]);
   1816   void *IP = 0;
   1817   SCEVAddExpr *S =
   1818     static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
   1819   if (!S) {
   1820     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
   1821     std::uninitialized_copy(Ops.begin(), Ops.end(), O);
   1822     S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator),
   1823                                         O, Ops.size());
   1824     UniqueSCEVs.InsertNode(S, IP);
   1825   }
   1826   S->setNoWrapFlags(Flags);
   1827   return S;
   1828 }
   1829 
   1830 static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) {
   1831   uint64_t k = i*j;
   1832   if (j > 1 && k / j != i) Overflow = true;
   1833   return k;
   1834 }
   1835 
   1836 /// Compute the result of "n choose k", the binomial coefficient.  If an
   1837 /// intermediate computation overflows, Overflow will be set and the return will
   1838 /// be garbage. Overflow is not cleared on absence of overflow.
   1839 static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) {
   1840   // We use the multiplicative formula:
   1841   //     n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 .
   1842   // At each iteration, we take the n-th term of the numeral and divide by the
   1843   // (k-n)th term of the denominator.  This division will always produce an
   1844   // integral result, and helps reduce the chance of overflow in the
   1845   // intermediate computations. However, we can still overflow even when the
   1846   // final result would fit.
   1847 
   1848   if (n == 0 || n == k) return 1;
   1849   if (k > n) return 0;
   1850 
   1851   if (k > n/2)
   1852     k = n-k;
   1853 
   1854   uint64_t r = 1;
   1855   for (uint64_t i = 1; i <= k; ++i) {
   1856     r = umul_ov(r, n-(i-1), Overflow);
   1857     r /= i;
   1858   }
   1859   return r;
   1860 }
   1861 
   1862 /// getMulExpr - Get a canonical multiply expression, or something simpler if
   1863 /// possible.
   1864 const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
   1865                                         SCEV::NoWrapFlags Flags) {
   1866   assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) &&
   1867          "only nuw or nsw allowed");
   1868   assert(!Ops.empty() && "Cannot get empty mul!");
   1869   if (Ops.size() == 1) return Ops[0];
   1870 #ifndef NDEBUG
   1871   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
   1872   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
   1873     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
   1874            "SCEVMulExpr operand types don't match!");
   1875 #endif
   1876 
   1877   // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
   1878   // And vice-versa.
   1879   int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
   1880   SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
   1881   if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
   1882     bool All = true;
   1883     for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
   1884          E = Ops.end(); I != E; ++I)
   1885       if (!isKnownNonNegative(*I)) {
   1886         All = false;
   1887         break;
   1888       }
   1889     if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
   1890   }
   1891 
   1892   // Sort by complexity, this groups all similar expression types together.
   1893   GroupByComplexity(Ops, LI);
   1894 
   1895   // If there are any constants, fold them together.
   1896   unsigned Idx = 0;
   1897   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
   1898 
   1899     // C1*(C2+V) -> C1*C2 + C1*V
   1900     if (Ops.size() == 2)
   1901       if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
   1902         if (Add->getNumOperands() == 2 &&
   1903             isa<SCEVConstant>(Add->getOperand(0)))
   1904           return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)),
   1905                             getMulExpr(LHSC, Add->getOperand(1)));
   1906 
   1907     ++Idx;
   1908     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
   1909       // We found two constants, fold them together!
   1910       ConstantInt *Fold = ConstantInt::get(getContext(),
   1911                                            LHSC->getValue()->getValue() *
   1912                                            RHSC->getValue()->getValue());
   1913       Ops[0] = getConstant(Fold);
   1914       Ops.erase(Ops.begin()+1);  // Erase the folded element
   1915       if (Ops.size() == 1) return Ops[0];
   1916       LHSC = cast<SCEVConstant>(Ops[0]);
   1917     }
   1918 
   1919     // If we are left with a constant one being multiplied, strip it off.
   1920     if (cast<SCEVConstant>(Ops[0])->getValue()->equalsInt(1)) {
   1921       Ops.erase(Ops.begin());
   1922       --Idx;
   1923     } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
   1924       // If we have a multiply of zero, it will always be zero.
   1925       return Ops[0];
   1926     } else if (Ops[0]->isAllOnesValue()) {
   1927       // If we have a mul by -1 of an add, try distributing the -1 among the
   1928       // add operands.
   1929       if (Ops.size() == 2) {
   1930         if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
   1931           SmallVector<const SCEV *, 4> NewOps;
   1932           bool AnyFolded = false;
   1933           for (SCEVAddRecExpr::op_iterator I = Add->op_begin(),
   1934                  E = Add->op_end(); I != E; ++I) {
   1935             const SCEV *Mul = getMulExpr(Ops[0], *I);
   1936             if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
   1937             NewOps.push_back(Mul);
   1938           }
   1939           if (AnyFolded)
   1940             return getAddExpr(NewOps);
   1941         }
   1942         else if (const SCEVAddRecExpr *
   1943                  AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
   1944           // Negation preserves a recurrence's no self-wrap property.
   1945           SmallVector<const SCEV *, 4> Operands;
   1946           for (SCEVAddRecExpr::op_iterator I = AddRec->op_begin(),
   1947                  E = AddRec->op_end(); I != E; ++I) {
   1948             Operands.push_back(getMulExpr(Ops[0], *I));
   1949           }
   1950           return getAddRecExpr(Operands, AddRec->getLoop(),
   1951                                AddRec->getNoWrapFlags(SCEV::FlagNW));
   1952         }
   1953       }
   1954     }
   1955 
   1956     if (Ops.size() == 1)
   1957       return Ops[0];
   1958   }
   1959 
   1960   // Skip over the add expression until we get to a multiply.
   1961   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
   1962     ++Idx;
   1963 
   1964   // If there are mul operands inline them all into this expression.
   1965   if (Idx < Ops.size()) {
   1966     bool DeletedMul = false;
   1967     while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
   1968       // If we have an mul, expand the mul operands onto the end of the operands
   1969       // list.
   1970       Ops.erase(Ops.begin()+Idx);
   1971       Ops.append(Mul->op_begin(), Mul->op_end());
   1972       DeletedMul = true;
   1973     }
   1974 
   1975     // If we deleted at least one mul, we added operands to the end of the list,
   1976     // and they are not necessarily sorted.  Recurse to resort and resimplify
   1977     // any operands we just acquired.
   1978     if (DeletedMul)
   1979       return getMulExpr(Ops);
   1980   }
   1981 
   1982   // If there are any add recurrences in the operands list, see if any other
   1983   // added values are loop invariant.  If so, we can fold them into the
   1984   // recurrence.
   1985   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
   1986     ++Idx;
   1987 
   1988   // Scan over all recurrences, trying to fold loop invariants into them.
   1989   for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
   1990     // Scan all of the other operands to this mul and add them to the vector if
   1991     // they are loop invariant w.r.t. the recurrence.
   1992     SmallVector<const SCEV *, 8> LIOps;
   1993     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
   1994     const Loop *AddRecLoop = AddRec->getLoop();
   1995     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
   1996       if (isLoopInvariant(Ops[i], AddRecLoop)) {
   1997         LIOps.push_back(Ops[i]);
   1998         Ops.erase(Ops.begin()+i);
   1999         --i; --e;
   2000       }
   2001 
   2002     // If we found some loop invariants, fold them into the recurrence.
   2003     if (!LIOps.empty()) {
   2004       //  NLI * LI * {Start,+,Step}  -->  NLI * {LI*Start,+,LI*Step}
   2005       SmallVector<const SCEV *, 4> NewOps;
   2006       NewOps.reserve(AddRec->getNumOperands());
   2007       const SCEV *Scale = getMulExpr(LIOps);
   2008       for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
   2009         NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i)));
   2010 
   2011       // Build the new addrec. Propagate the NUW and NSW flags if both the
   2012       // outer mul and the inner addrec are guaranteed to have no overflow.
   2013       //
   2014       // No self-wrap cannot be guaranteed after changing the step size, but
   2015       // will be inferred if either NUW or NSW is true.
   2016       Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW));
   2017       const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags);
   2018 
   2019       // If all of the other operands were loop invariant, we are done.
   2020       if (Ops.size() == 1) return NewRec;
   2021 
   2022       // Otherwise, multiply the folded AddRec by the non-invariant parts.
   2023       for (unsigned i = 0;; ++i)
   2024         if (Ops[i] == AddRec) {
   2025           Ops[i] = NewRec;
   2026           break;
   2027         }
   2028       return getMulExpr(Ops);
   2029     }
   2030 
   2031     // Okay, if there weren't any loop invariants to be folded, check to see if
   2032     // there are multiple AddRec's with the same loop induction variable being
   2033     // multiplied together.  If so, we can fold them.
   2034     for (unsigned OtherIdx = Idx+1;
   2035          OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
   2036          ++OtherIdx) {
   2037       if (AddRecLoop != cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop())
   2038         continue;
   2039 
   2040       // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
   2041       // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
   2042       //       choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
   2043       //   ]]],+,...up to x=2n}.
   2044       // Note that the arguments to choose() are always integers with values
   2045       // known at compile time, never SCEV objects.
   2046       //
   2047       // The implementation avoids pointless extra computations when the two
   2048       // addrec's are of different length (mathematically, it's equivalent to
   2049       // an infinite stream of zeros on the right).
   2050       bool OpsModified = false;
   2051       for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
   2052            ++OtherIdx) {
   2053         const SCEVAddRecExpr *OtherAddRec =
   2054           dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
   2055         if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
   2056           continue;
   2057 
   2058         bool Overflow = false;
   2059         Type *Ty = AddRec->getType();
   2060         bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
   2061         SmallVector<const SCEV*, 7> AddRecOps;
   2062         for (int x = 0, xe = AddRec->getNumOperands() +
   2063                OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
   2064           const SCEV *Term = getConstant(Ty, 0);
   2065           for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
   2066             uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
   2067             for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
   2068                    ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
   2069                  z < ze && !Overflow; ++z) {
   2070               uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
   2071               uint64_t Coeff;
   2072               if (LargerThan64Bits)
   2073                 Coeff = umul_ov(Coeff1, Coeff2, Overflow);
   2074               else
   2075                 Coeff = Coeff1*Coeff2;
   2076               const SCEV *CoeffTerm = getConstant(Ty, Coeff);
   2077               const SCEV *Term1 = AddRec->getOperand(y-z);
   2078               const SCEV *Term2 = OtherAddRec->getOperand(z);
   2079               Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2));
   2080             }
   2081           }
   2082           AddRecOps.push_back(Term);
   2083         }
   2084         if (!Overflow) {
   2085           const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
   2086                                                 SCEV::FlagAnyWrap);
   2087           if (Ops.size() == 2) return NewAddRec;
   2088           Ops[Idx] = NewAddRec;
   2089           Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
   2090           OpsModified = true;
   2091           AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec);
   2092           if (!AddRec)
   2093             break;
   2094         }
   2095       }
   2096       if (OpsModified)
   2097         return getMulExpr(Ops);
   2098     }
   2099 
   2100     // Otherwise couldn't fold anything into this recurrence.  Move onto the
   2101     // next one.
   2102   }
   2103 
   2104   // Okay, it looks like we really DO need an mul expr.  Check to see if we
   2105   // already have one, otherwise create a new one.
   2106   FoldingSetNodeID ID;
   2107   ID.AddInteger(scMulExpr);
   2108   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
   2109     ID.AddPointer(Ops[i]);
   2110   void *IP = 0;
   2111   SCEVMulExpr *S =
   2112     static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
   2113   if (!S) {
   2114     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
   2115     std::uninitialized_copy(Ops.begin(), Ops.end(), O);
   2116     S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
   2117                                         O, Ops.size());
   2118     UniqueSCEVs.InsertNode(S, IP);
   2119   }
   2120   S->setNoWrapFlags(Flags);
   2121   return S;
   2122 }
   2123 
   2124 /// getUDivExpr - Get a canonical unsigned division expression, or something
   2125 /// simpler if possible.
   2126 const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
   2127                                          const SCEV *RHS) {
   2128   assert(getEffectiveSCEVType(LHS->getType()) ==
   2129          getEffectiveSCEVType(RHS->getType()) &&
   2130          "SCEVUDivExpr operand types don't match!");
   2131 
   2132   if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
   2133     if (RHSC->getValue()->equalsInt(1))
   2134       return LHS;                               // X udiv 1 --> x
   2135     // If the denominator is zero, the result of the udiv is undefined. Don't
   2136     // try to analyze it, because the resolution chosen here may differ from
   2137     // the resolution chosen in other parts of the compiler.
   2138     if (!RHSC->getValue()->isZero()) {
   2139       // Determine if the division can be folded into the operands of
   2140       // its operands.
   2141       // TODO: Generalize this to non-constants by using known-bits information.
   2142       Type *Ty = LHS->getType();
   2143       unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros();
   2144       unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
   2145       // For non-power-of-two values, effectively round the value up to the
   2146       // nearest power of two.
   2147       if (!RHSC->getValue()->getValue().isPowerOf2())
   2148         ++MaxShiftAmt;
   2149       IntegerType *ExtTy =
   2150         IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
   2151       if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
   2152         if (const SCEVConstant *Step =
   2153             dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) {
   2154           // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
   2155           const APInt &StepInt = Step->getValue()->getValue();
   2156           const APInt &DivInt = RHSC->getValue()->getValue();
   2157           if (!StepInt.urem(DivInt) &&
   2158               getZeroExtendExpr(AR, ExtTy) ==
   2159               getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
   2160                             getZeroExtendExpr(Step, ExtTy),
   2161                             AR->getLoop(), SCEV::FlagAnyWrap)) {
   2162             SmallVector<const SCEV *, 4> Operands;
   2163             for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
   2164               Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
   2165             return getAddRecExpr(Operands, AR->getLoop(),
   2166                                  SCEV::FlagNW);
   2167           }
   2168           /// Get a canonical UDivExpr for a recurrence.
   2169           /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
   2170           // We can currently only fold X%N if X is constant.
   2171           const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
   2172           if (StartC && !DivInt.urem(StepInt) &&
   2173               getZeroExtendExpr(AR, ExtTy) ==
   2174               getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
   2175                             getZeroExtendExpr(Step, ExtTy),
   2176                             AR->getLoop(), SCEV::FlagAnyWrap)) {
   2177             const APInt &StartInt = StartC->getValue()->getValue();
   2178             const APInt &StartRem = StartInt.urem(StepInt);
   2179             if (StartRem != 0)
   2180               LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step,
   2181                                   AR->getLoop(), SCEV::FlagNW);
   2182           }
   2183         }
   2184       // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
   2185       if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
   2186         SmallVector<const SCEV *, 4> Operands;
   2187         for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i)
   2188           Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy));
   2189         if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
   2190           // Find an operand that's safely divisible.
   2191           for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
   2192             const SCEV *Op = M->getOperand(i);
   2193             const SCEV *Div = getUDivExpr(Op, RHSC);
   2194             if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
   2195               Operands = SmallVector<const SCEV *, 4>(M->op_begin(),
   2196                                                       M->op_end());
   2197               Operands[i] = Div;
   2198               return getMulExpr(Operands);
   2199             }
   2200           }
   2201       }
   2202       // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
   2203       if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {
   2204         SmallVector<const SCEV *, 4> Operands;
   2205         for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
   2206           Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
   2207         if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
   2208           Operands.clear();
   2209           for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
   2210             const SCEV *Op = getUDivExpr(A->getOperand(i), RHS);
   2211             if (isa<SCEVUDivExpr>(Op) ||
   2212                 getMulExpr(Op, RHS) != A->getOperand(i))
   2213               break;
   2214             Operands.push_back(Op);
   2215           }
   2216           if (Operands.size() == A->getNumOperands())
   2217             return getAddExpr(Operands);
   2218         }
   2219       }
   2220 
   2221       // Fold if both operands are constant.
   2222       if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
   2223         Constant *LHSCV = LHSC->getValue();
   2224         Constant *RHSCV = RHSC->getValue();
   2225         return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
   2226                                                                    RHSCV)));
   2227       }
   2228     }
   2229   }
   2230 
   2231   FoldingSetNodeID ID;
   2232   ID.AddInteger(scUDivExpr);
   2233   ID.AddPointer(LHS);
   2234   ID.AddPointer(RHS);
   2235   void *IP = 0;
   2236   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   2237   SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
   2238                                              LHS, RHS);
   2239   UniqueSCEVs.InsertNode(S, IP);
   2240   return S;
   2241 }
   2242 
   2243 
   2244 /// getAddRecExpr - Get an add recurrence expression for the specified loop.
   2245 /// Simplify the expression as much as possible.
   2246 const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step,
   2247                                            const Loop *L,
   2248                                            SCEV::NoWrapFlags Flags) {
   2249   SmallVector<const SCEV *, 4> Operands;
   2250   Operands.push_back(Start);
   2251   if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
   2252     if (StepChrec->getLoop() == L) {
   2253       Operands.append(StepChrec->op_begin(), StepChrec->op_end());
   2254       return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW));
   2255     }
   2256 
   2257   Operands.push_back(Step);
   2258   return getAddRecExpr(Operands, L, Flags);
   2259 }
   2260 
   2261 /// getAddRecExpr - Get an add recurrence expression for the specified loop.
   2262 /// Simplify the expression as much as possible.
   2263 const SCEV *
   2264 ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
   2265                                const Loop *L, SCEV::NoWrapFlags Flags) {
   2266   if (Operands.size() == 1) return Operands[0];
   2267 #ifndef NDEBUG
   2268   Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
   2269   for (unsigned i = 1, e = Operands.size(); i != e; ++i)
   2270     assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
   2271            "SCEVAddRecExpr operand types don't match!");
   2272   for (unsigned i = 0, e = Operands.size(); i != e; ++i)
   2273     assert(isLoopInvariant(Operands[i], L) &&
   2274            "SCEVAddRecExpr operand is not loop-invariant!");
   2275 #endif
   2276 
   2277   if (Operands.back()->isZero()) {
   2278     Operands.pop_back();
   2279     return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0}  -->  X
   2280   }
   2281 
   2282   // It's tempting to want to call getMaxBackedgeTakenCount count here and
   2283   // use that information to infer NUW and NSW flags. However, computing a
   2284   // BE count requires calling getAddRecExpr, so we may not yet have a
   2285   // meaningful BE count at this point (and if we don't, we'd be stuck
   2286   // with a SCEVCouldNotCompute as the cached BE count).
   2287 
   2288   // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
   2289   // And vice-versa.
   2290   int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
   2291   SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
   2292   if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
   2293     bool All = true;
   2294     for (SmallVectorImpl<const SCEV *>::const_iterator I = Operands.begin(),
   2295          E = Operands.end(); I != E; ++I)
   2296       if (!isKnownNonNegative(*I)) {
   2297         All = false;
   2298         break;
   2299       }
   2300     if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
   2301   }
   2302 
   2303   // Canonicalize nested AddRecs in by nesting them in order of loop depth.
   2304   if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
   2305     const Loop *NestedLoop = NestedAR->getLoop();
   2306     if (L->contains(NestedLoop) ?
   2307         (L->getLoopDepth() < NestedLoop->getLoopDepth()) :
   2308         (!NestedLoop->contains(L) &&
   2309          DT->dominates(L->getHeader(), NestedLoop->getHeader()))) {
   2310       SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
   2311                                                   NestedAR->op_end());
   2312       Operands[0] = NestedAR->getStart();
   2313       // AddRecs require their operands be loop-invariant with respect to their
   2314       // loops. Don't perform this transformation if it would break this
   2315       // requirement.
   2316       bool AllInvariant = true;
   2317       for (unsigned i = 0, e = Operands.size(); i != e; ++i)
   2318         if (!isLoopInvariant(Operands[i], L)) {
   2319           AllInvariant = false;
   2320           break;
   2321         }
   2322       if (AllInvariant) {
   2323         // Create a recurrence for the outer loop with the same step size.
   2324         //
   2325         // The outer recurrence keeps its NW flag but only keeps NUW/NSW if the
   2326         // inner recurrence has the same property.
   2327         SCEV::NoWrapFlags OuterFlags =
   2328           maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags());
   2329 
   2330         NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags);
   2331         AllInvariant = true;
   2332         for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i)
   2333           if (!isLoopInvariant(NestedOperands[i], NestedLoop)) {
   2334             AllInvariant = false;
   2335             break;
   2336           }
   2337         if (AllInvariant) {
   2338           // Ok, both add recurrences are valid after the transformation.
   2339           //
   2340           // The inner recurrence keeps its NW flag but only keeps NUW/NSW if
   2341           // the outer recurrence has the same property.
   2342           SCEV::NoWrapFlags InnerFlags =
   2343             maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags);
   2344           return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags);
   2345         }
   2346       }
   2347       // Reset Operands to its original state.
   2348       Operands[0] = NestedAR;
   2349     }
   2350   }
   2351 
   2352   // Okay, it looks like we really DO need an addrec expr.  Check to see if we
   2353   // already have one, otherwise create a new one.
   2354   FoldingSetNodeID ID;
   2355   ID.AddInteger(scAddRecExpr);
   2356   for (unsigned i = 0, e = Operands.size(); i != e; ++i)
   2357     ID.AddPointer(Operands[i]);
   2358   ID.AddPointer(L);
   2359   void *IP = 0;
   2360   SCEVAddRecExpr *S =
   2361     static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
   2362   if (!S) {
   2363     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size());
   2364     std::uninitialized_copy(Operands.begin(), Operands.end(), O);
   2365     S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator),
   2366                                            O, Operands.size(), L);
   2367     UniqueSCEVs.InsertNode(S, IP);
   2368   }
   2369   S->setNoWrapFlags(Flags);
   2370   return S;
   2371 }
   2372 
   2373 const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
   2374                                          const SCEV *RHS) {
   2375   SmallVector<const SCEV *, 2> Ops;
   2376   Ops.push_back(LHS);
   2377   Ops.push_back(RHS);
   2378   return getSMaxExpr(Ops);
   2379 }
   2380 
   2381 const SCEV *
   2382 ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
   2383   assert(!Ops.empty() && "Cannot get empty smax!");
   2384   if (Ops.size() == 1) return Ops[0];
   2385 #ifndef NDEBUG
   2386   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
   2387   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
   2388     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
   2389            "SCEVSMaxExpr operand types don't match!");
   2390 #endif
   2391 
   2392   // Sort by complexity, this groups all similar expression types together.
   2393   GroupByComplexity(Ops, LI);
   2394 
   2395   // If there are any constants, fold them together.
   2396   unsigned Idx = 0;
   2397   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
   2398     ++Idx;
   2399     assert(Idx < Ops.size());
   2400     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
   2401       // We found two constants, fold them together!
   2402       ConstantInt *Fold = ConstantInt::get(getContext(),
   2403                               APIntOps::smax(LHSC->getValue()->getValue(),
   2404                                              RHSC->getValue()->getValue()));
   2405       Ops[0] = getConstant(Fold);
   2406       Ops.erase(Ops.begin()+1);  // Erase the folded element
   2407       if (Ops.size() == 1) return Ops[0];
   2408       LHSC = cast<SCEVConstant>(Ops[0]);
   2409     }
   2410 
   2411     // If we are left with a constant minimum-int, strip it off.
   2412     if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) {
   2413       Ops.erase(Ops.begin());
   2414       --Idx;
   2415     } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) {
   2416       // If we have an smax with a constant maximum-int, it will always be
   2417       // maximum-int.
   2418       return Ops[0];
   2419     }
   2420 
   2421     if (Ops.size() == 1) return Ops[0];
   2422   }
   2423 
   2424   // Find the first SMax
   2425   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr)
   2426     ++Idx;
   2427 
   2428   // Check to see if one of the operands is an SMax. If so, expand its operands
   2429   // onto our operand list, and recurse to simplify.
   2430   if (Idx < Ops.size()) {
   2431     bool DeletedSMax = false;
   2432     while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) {
   2433       Ops.erase(Ops.begin()+Idx);
   2434       Ops.append(SMax->op_begin(), SMax->op_end());
   2435       DeletedSMax = true;
   2436     }
   2437 
   2438     if (DeletedSMax)
   2439       return getSMaxExpr(Ops);
   2440   }
   2441 
   2442   // Okay, check to see if the same value occurs in the operand list twice.  If
   2443   // so, delete one.  Since we sorted the list, these values are required to
   2444   // be adjacent.
   2445   for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
   2446     //  X smax Y smax Y  -->  X smax Y
   2447     //  X smax Y         -->  X, if X is always greater than Y
   2448     if (Ops[i] == Ops[i+1] ||
   2449         isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) {
   2450       Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
   2451       --i; --e;
   2452     } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) {
   2453       Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
   2454       --i; --e;
   2455     }
   2456 
   2457   if (Ops.size() == 1) return Ops[0];
   2458 
   2459   assert(!Ops.empty() && "Reduced smax down to nothing!");
   2460 
   2461   // Okay, it looks like we really DO need an smax expr.  Check to see if we
   2462   // already have one, otherwise create a new one.
   2463   FoldingSetNodeID ID;
   2464   ID.AddInteger(scSMaxExpr);
   2465   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
   2466     ID.AddPointer(Ops[i]);
   2467   void *IP = 0;
   2468   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   2469   const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
   2470   std::uninitialized_copy(Ops.begin(), Ops.end(), O);
   2471   SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator),
   2472                                              O, Ops.size());
   2473   UniqueSCEVs.InsertNode(S, IP);
   2474   return S;
   2475 }
   2476 
   2477 const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS,
   2478                                          const SCEV *RHS) {
   2479   SmallVector<const SCEV *, 2> Ops;
   2480   Ops.push_back(LHS);
   2481   Ops.push_back(RHS);
   2482   return getUMaxExpr(Ops);
   2483 }
   2484 
   2485 const SCEV *
   2486 ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
   2487   assert(!Ops.empty() && "Cannot get empty umax!");
   2488   if (Ops.size() == 1) return Ops[0];
   2489 #ifndef NDEBUG
   2490   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
   2491   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
   2492     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
   2493            "SCEVUMaxExpr operand types don't match!");
   2494 #endif
   2495 
   2496   // Sort by complexity, this groups all similar expression types together.
   2497   GroupByComplexity(Ops, LI);
   2498 
   2499   // If there are any constants, fold them together.
   2500   unsigned Idx = 0;
   2501   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
   2502     ++Idx;
   2503     assert(Idx < Ops.size());
   2504     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
   2505       // We found two constants, fold them together!
   2506       ConstantInt *Fold = ConstantInt::get(getContext(),
   2507                               APIntOps::umax(LHSC->getValue()->getValue(),
   2508                                              RHSC->getValue()->getValue()));
   2509       Ops[0] = getConstant(Fold);
   2510       Ops.erase(Ops.begin()+1);  // Erase the folded element
   2511       if (Ops.size() == 1) return Ops[0];
   2512       LHSC = cast<SCEVConstant>(Ops[0]);
   2513     }
   2514 
   2515     // If we are left with a constant minimum-int, strip it off.
   2516     if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) {
   2517       Ops.erase(Ops.begin());
   2518       --Idx;
   2519     } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) {
   2520       // If we have an umax with a constant maximum-int, it will always be
   2521       // maximum-int.
   2522       return Ops[0];
   2523     }
   2524 
   2525     if (Ops.size() == 1) return Ops[0];
   2526   }
   2527 
   2528   // Find the first UMax
   2529   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr)
   2530     ++Idx;
   2531 
   2532   // Check to see if one of the operands is a UMax. If so, expand its operands
   2533   // onto our operand list, and recurse to simplify.
   2534   if (Idx < Ops.size()) {
   2535     bool DeletedUMax = false;
   2536     while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) {
   2537       Ops.erase(Ops.begin()+Idx);
   2538       Ops.append(UMax->op_begin(), UMax->op_end());
   2539       DeletedUMax = true;
   2540     }
   2541 
   2542     if (DeletedUMax)
   2543       return getUMaxExpr(Ops);
   2544   }
   2545 
   2546   // Okay, check to see if the same value occurs in the operand list twice.  If
   2547   // so, delete one.  Since we sorted the list, these values are required to
   2548   // be adjacent.
   2549   for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
   2550     //  X umax Y umax Y  -->  X umax Y
   2551     //  X umax Y         -->  X, if X is always greater than Y
   2552     if (Ops[i] == Ops[i+1] ||
   2553         isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) {
   2554       Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
   2555       --i; --e;
   2556     } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) {
   2557       Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
   2558       --i; --e;
   2559     }
   2560 
   2561   if (Ops.size() == 1) return Ops[0];
   2562 
   2563   assert(!Ops.empty() && "Reduced umax down to nothing!");
   2564 
   2565   // Okay, it looks like we really DO need a umax expr.  Check to see if we
   2566   // already have one, otherwise create a new one.
   2567   FoldingSetNodeID ID;
   2568   ID.AddInteger(scUMaxExpr);
   2569   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
   2570     ID.AddPointer(Ops[i]);
   2571   void *IP = 0;
   2572   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   2573   const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
   2574   std::uninitialized_copy(Ops.begin(), Ops.end(), O);
   2575   SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator),
   2576                                              O, Ops.size());
   2577   UniqueSCEVs.InsertNode(S, IP);
   2578   return S;
   2579 }
   2580 
   2581 const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
   2582                                          const SCEV *RHS) {
   2583   // ~smax(~x, ~y) == smin(x, y).
   2584   return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
   2585 }
   2586 
   2587 const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
   2588                                          const SCEV *RHS) {
   2589   // ~umax(~x, ~y) == umin(x, y)
   2590   return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
   2591 }
   2592 
   2593 const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) {
   2594   // If we have DataLayout, we can bypass creating a target-independent
   2595   // constant expression and then folding it back into a ConstantInt.
   2596   // This is just a compile-time optimization.
   2597   if (TD)
   2598     return getConstant(TD->getIntPtrType(getContext()),
   2599                        TD->getTypeAllocSize(AllocTy));
   2600 
   2601   Constant *C = ConstantExpr::getSizeOf(AllocTy);
   2602   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
   2603     if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
   2604       C = Folded;
   2605   Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
   2606   return getTruncateOrZeroExtend(getSCEV(C), Ty);
   2607 }
   2608 
   2609 const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) {
   2610   Constant *C = ConstantExpr::getAlignOf(AllocTy);
   2611   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
   2612     if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
   2613       C = Folded;
   2614   Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
   2615   return getTruncateOrZeroExtend(getSCEV(C), Ty);
   2616 }
   2617 
   2618 const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy,
   2619                                              unsigned FieldNo) {
   2620   // If we have DataLayout, we can bypass creating a target-independent
   2621   // constant expression and then folding it back into a ConstantInt.
   2622   // This is just a compile-time optimization.
   2623   if (TD)
   2624     return getConstant(TD->getIntPtrType(getContext()),
   2625                        TD->getStructLayout(STy)->getElementOffset(FieldNo));
   2626 
   2627   Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
   2628   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
   2629     if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
   2630       C = Folded;
   2631   Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
   2632   return getTruncateOrZeroExtend(getSCEV(C), Ty);
   2633 }
   2634 
   2635 const SCEV *ScalarEvolution::getOffsetOfExpr(Type *CTy,
   2636                                              Constant *FieldNo) {
   2637   Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo);
   2638   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
   2639     if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
   2640       C = Folded;
   2641   Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy));
   2642   return getTruncateOrZeroExtend(getSCEV(C), Ty);
   2643 }
   2644 
   2645 const SCEV *ScalarEvolution::getUnknown(Value *V) {
   2646   // Don't attempt to do anything other than create a SCEVUnknown object
   2647   // here.  createSCEV only calls getUnknown after checking for all other
   2648   // interesting possibilities, and any other code that calls getUnknown
   2649   // is doing so in order to hide a value from SCEV canonicalization.
   2650 
   2651   FoldingSetNodeID ID;
   2652   ID.AddInteger(scUnknown);
   2653   ID.AddPointer(V);
   2654   void *IP = 0;
   2655   if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
   2656     assert(cast<SCEVUnknown>(S)->getValue() == V &&
   2657            "Stale SCEVUnknown in uniquing map!");
   2658     return S;
   2659   }
   2660   SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this,
   2661                                             FirstUnknown);
   2662   FirstUnknown = cast<SCEVUnknown>(S);
   2663   UniqueSCEVs.InsertNode(S, IP);
   2664   return S;
   2665 }
   2666 
   2667 //===----------------------------------------------------------------------===//
   2668 //            Basic SCEV Analysis and PHI Idiom Recognition Code
   2669 //
   2670 
   2671 /// isSCEVable - Test if values of the given type are analyzable within
   2672 /// the SCEV framework. This primarily includes integer types, and it
   2673 /// can optionally include pointer types if the ScalarEvolution class
   2674 /// has access to target-specific information.
   2675 bool ScalarEvolution::isSCEVable(Type *Ty) const {
   2676   // Integers and pointers are always SCEVable.
   2677   return Ty->isIntegerTy() || Ty->isPointerTy();
   2678 }
   2679 
   2680 /// getTypeSizeInBits - Return the size in bits of the specified type,
   2681 /// for which isSCEVable must return true.
   2682 uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
   2683   assert(isSCEVable(Ty) && "Type is not SCEVable!");
   2684 
   2685   // If we have a DataLayout, use it!
   2686   if (TD)
   2687     return TD->getTypeSizeInBits(Ty);
   2688 
   2689   // Integer types have fixed sizes.
   2690   if (Ty->isIntegerTy())
   2691     return Ty->getPrimitiveSizeInBits();
   2692 
   2693   // The only other support type is pointer. Without DataLayout, conservatively
   2694   // assume pointers are 64-bit.
   2695   assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!");
   2696   return 64;
   2697 }
   2698 
   2699 /// getEffectiveSCEVType - Return a type with the same bitwidth as
   2700 /// the given type and which represents how SCEV will treat the given
   2701 /// type, for which isSCEVable must return true. For pointer types,
   2702 /// this is the pointer-sized integer type.
   2703 Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
   2704   assert(isSCEVable(Ty) && "Type is not SCEVable!");
   2705 
   2706   if (Ty->isIntegerTy())
   2707     return Ty;
   2708 
   2709   // The only other support type is pointer.
   2710   assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
   2711   if (TD) return TD->getIntPtrType(getContext());
   2712 
   2713   // Without DataLayout, conservatively assume pointers are 64-bit.
   2714   return Type::getInt64Ty(getContext());
   2715 }
   2716 
   2717 const SCEV *ScalarEvolution::getCouldNotCompute() {
   2718   return &CouldNotCompute;
   2719 }
   2720 
   2721 namespace {
   2722   // Helper class working with SCEVTraversal to figure out if a SCEV contains
   2723   // a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne
   2724   // is set iff if find such SCEVUnknown.
   2725   //
   2726   struct FindInvalidSCEVUnknown {
   2727     bool FindOne;
   2728     FindInvalidSCEVUnknown() { FindOne = false; }
   2729     bool follow(const SCEV *S) {
   2730       switch (S->getSCEVType()) {
   2731       case scConstant:
   2732         return false;
   2733       case scUnknown:
   2734         if (!cast<SCEVUnknown>(S)->getValue())
   2735           FindOne = true;
   2736         return false;
   2737       default:
   2738         return true;
   2739       }
   2740     }
   2741     bool isDone() const { return FindOne; }
   2742   };
   2743 }
   2744 
   2745 bool ScalarEvolution::checkValidity(const SCEV *S) const {
   2746   FindInvalidSCEVUnknown F;
   2747   SCEVTraversal<FindInvalidSCEVUnknown> ST(F);
   2748   ST.visitAll(S);
   2749 
   2750   return !F.FindOne;
   2751 }
   2752 
   2753 /// getSCEV - Return an existing SCEV if it exists, otherwise analyze the
   2754 /// expression and create a new one.
   2755 const SCEV *ScalarEvolution::getSCEV(Value *V) {
   2756   assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
   2757 
   2758   ValueExprMapType::iterator I = ValueExprMap.find_as(V);
   2759   if (I != ValueExprMap.end()) {
   2760     const SCEV *S = I->second;
   2761     if (checkValidity(S))
   2762       return S;
   2763     else
   2764       ValueExprMap.erase(I);
   2765   }
   2766   const SCEV *S = createSCEV(V);
   2767 
   2768   // The process of creating a SCEV for V may have caused other SCEVs
   2769   // to have been created, so it's necessary to insert the new entry
   2770   // from scratch, rather than trying to remember the insert position
   2771   // above.
   2772   ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
   2773   return S;
   2774 }
   2775 
   2776 /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
   2777 ///
   2778 const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) {
   2779   if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
   2780     return getConstant(
   2781                cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
   2782 
   2783   Type *Ty = V->getType();
   2784   Ty = getEffectiveSCEVType(Ty);
   2785   return getMulExpr(V,
   2786                   getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))));
   2787 }
   2788 
   2789 /// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
   2790 const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
   2791   if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
   2792     return getConstant(
   2793                 cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
   2794 
   2795   Type *Ty = V->getType();
   2796   Ty = getEffectiveSCEVType(Ty);
   2797   const SCEV *AllOnes =
   2798                    getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)));
   2799   return getMinusSCEV(AllOnes, V);
   2800 }
   2801 
   2802 /// getMinusSCEV - Return LHS-RHS.  Minus is represented in SCEV as A+B*-1.
   2803 const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
   2804                                           SCEV::NoWrapFlags Flags) {
   2805   assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW");
   2806 
   2807   // Fast path: X - X --> 0.
   2808   if (LHS == RHS)
   2809     return getConstant(LHS->getType(), 0);
   2810 
   2811   // X - Y --> X + -Y
   2812   return getAddExpr(LHS, getNegativeSCEV(RHS), Flags);
   2813 }
   2814 
   2815 /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
   2816 /// input value to the specified type.  If the type must be extended, it is zero
   2817 /// extended.
   2818 const SCEV *
   2819 ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) {
   2820   Type *SrcTy = V->getType();
   2821   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
   2822          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
   2823          "Cannot truncate or zero extend with non-integer arguments!");
   2824   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
   2825     return V;  // No conversion
   2826   if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
   2827     return getTruncateExpr(V, Ty);
   2828   return getZeroExtendExpr(V, Ty);
   2829 }
   2830 
   2831 /// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion of the
   2832 /// input value to the specified type.  If the type must be extended, it is sign
   2833 /// extended.
   2834 const SCEV *
   2835 ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
   2836                                          Type *Ty) {
   2837   Type *SrcTy = V->getType();
   2838   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
   2839          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
   2840          "Cannot truncate or zero extend with non-integer arguments!");
   2841   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
   2842     return V;  // No conversion
   2843   if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
   2844     return getTruncateExpr(V, Ty);
   2845   return getSignExtendExpr(V, Ty);
   2846 }
   2847 
   2848 /// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of the
   2849 /// input value to the specified type.  If the type must be extended, it is zero
   2850 /// extended.  The conversion must not be narrowing.
   2851 const SCEV *
   2852 ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) {
   2853   Type *SrcTy = V->getType();
   2854   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
   2855          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
   2856          "Cannot noop or zero extend with non-integer arguments!");
   2857   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
   2858          "getNoopOrZeroExtend cannot truncate!");
   2859   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
   2860     return V;  // No conversion
   2861   return getZeroExtendExpr(V, Ty);
   2862 }
   2863 
   2864 /// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the
   2865 /// input value to the specified type.  If the type must be extended, it is sign
   2866 /// extended.  The conversion must not be narrowing.
   2867 const SCEV *
   2868 ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) {
   2869   Type *SrcTy = V->getType();
   2870   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
   2871          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
   2872          "Cannot noop or sign extend with non-integer arguments!");
   2873   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
   2874          "getNoopOrSignExtend cannot truncate!");
   2875   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
   2876     return V;  // No conversion
   2877   return getSignExtendExpr(V, Ty);
   2878 }
   2879 
   2880 /// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of
   2881 /// the input value to the specified type. If the type must be extended,
   2882 /// it is extended with unspecified bits. The conversion must not be
   2883 /// narrowing.
   2884 const SCEV *
   2885 ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) {
   2886   Type *SrcTy = V->getType();
   2887   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
   2888          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
   2889          "Cannot noop or any extend with non-integer arguments!");
   2890   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
   2891          "getNoopOrAnyExtend cannot truncate!");
   2892   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
   2893     return V;  // No conversion
   2894   return getAnyExtendExpr(V, Ty);
   2895 }
   2896 
   2897 /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
   2898 /// input value to the specified type.  The conversion must not be widening.
   2899 const SCEV *
   2900 ScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) {
   2901   Type *SrcTy = V->getType();
   2902   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
   2903          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
   2904          "Cannot truncate or noop with non-integer arguments!");
   2905   assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
   2906          "getTruncateOrNoop cannot extend!");
   2907   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
   2908     return V;  // No conversion
   2909   return getTruncateExpr(V, Ty);
   2910 }
   2911 
   2912 /// getUMaxFromMismatchedTypes - Promote the operands to the wider of
   2913 /// the types using zero-extension, and then perform a umax operation
   2914 /// with them.
   2915 const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS,
   2916                                                         const SCEV *RHS) {
   2917   const SCEV *PromotedLHS = LHS;
   2918   const SCEV *PromotedRHS = RHS;
   2919 
   2920   if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
   2921     PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
   2922   else
   2923     PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
   2924 
   2925   return getUMaxExpr(PromotedLHS, PromotedRHS);
   2926 }
   2927 
   2928 /// getUMinFromMismatchedTypes - Promote the operands to the wider of
   2929 /// the types using zero-extension, and then perform a umin operation
   2930 /// with them.
   2931 const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS,
   2932                                                         const SCEV *RHS) {
   2933   const SCEV *PromotedLHS = LHS;
   2934   const SCEV *PromotedRHS = RHS;
   2935 
   2936   if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
   2937     PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
   2938   else
   2939     PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
   2940 
   2941   return getUMinExpr(PromotedLHS, PromotedRHS);
   2942 }
   2943 
   2944 /// getPointerBase - Transitively follow the chain of pointer-type operands
   2945 /// until reaching a SCEV that does not have a single pointer operand. This
   2946 /// returns a SCEVUnknown pointer for well-formed pointer-type expressions,
   2947 /// but corner cases do exist.
   2948 const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
   2949   // A pointer operand may evaluate to a nonpointer expression, such as null.
   2950   if (!V->getType()->isPointerTy())
   2951     return V;
   2952 
   2953   if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) {
   2954     return getPointerBase(Cast->getOperand());
   2955   }
   2956   else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
   2957     const SCEV *PtrOp = 0;
   2958     for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
   2959          I != E; ++I) {
   2960       if ((*I)->getType()->isPointerTy()) {
   2961         // Cannot find the base of an expression with multiple pointer operands.
   2962         if (PtrOp)
   2963           return V;
   2964         PtrOp = *I;
   2965       }
   2966     }
   2967     if (!PtrOp)
   2968       return V;
   2969     return getPointerBase(PtrOp);
   2970   }
   2971   return V;
   2972 }
   2973 
   2974 /// PushDefUseChildren - Push users of the given Instruction
   2975 /// onto the given Worklist.
   2976 static void
   2977 PushDefUseChildren(Instruction *I,
   2978                    SmallVectorImpl<Instruction *> &Worklist) {
   2979   // Push the def-use children onto the Worklist stack.
   2980   for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
   2981        UI != UE; ++UI)
   2982     Worklist.push_back(cast<Instruction>(*UI));
   2983 }
   2984 
   2985 /// ForgetSymbolicValue - This looks up computed SCEV values for all
   2986 /// instructions that depend on the given instruction and removes them from
   2987 /// the ValueExprMapType map if they reference SymName. This is used during PHI
   2988 /// resolution.
   2989 void
   2990 ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
   2991   SmallVector<Instruction *, 16> Worklist;
   2992   PushDefUseChildren(PN, Worklist);
   2993 
   2994   SmallPtrSet<Instruction *, 8> Visited;
   2995   Visited.insert(PN);
   2996   while (!Worklist.empty()) {
   2997     Instruction *I = Worklist.pop_back_val();
   2998     if (!Visited.insert(I)) continue;
   2999 
   3000     ValueExprMapType::iterator It =
   3001       ValueExprMap.find_as(static_cast<Value *>(I));
   3002     if (It != ValueExprMap.end()) {
   3003       const SCEV *Old = It->second;
   3004 
   3005       // Short-circuit the def-use traversal if the symbolic name
   3006       // ceases to appear in expressions.
   3007       if (Old != SymName && !hasOperand(Old, SymName))
   3008         continue;
   3009 
   3010       // SCEVUnknown for a PHI either means that it has an unrecognized
   3011       // structure, it's a PHI that's in the progress of being computed
   3012       // by createNodeForPHI, or it's a single-value PHI. In the first case,
   3013       // additional loop trip count information isn't going to change anything.
   3014       // In the second case, createNodeForPHI will perform the necessary
   3015       // updates on its own when it gets to that point. In the third, we do
   3016       // want to forget the SCEVUnknown.
   3017       if (!isa<PHINode>(I) ||
   3018           !isa<SCEVUnknown>(Old) ||
   3019           (I != PN && Old == SymName)) {
   3020         forgetMemoizedResults(Old);
   3021         ValueExprMap.erase(It);
   3022       }
   3023     }
   3024 
   3025     PushDefUseChildren(I, Worklist);
   3026   }
   3027 }
   3028 
   3029 /// createNodeForPHI - PHI nodes have two cases.  Either the PHI node exists in
   3030 /// a loop header, making it a potential recurrence, or it doesn't.
   3031 ///
   3032 const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
   3033   if (const Loop *L = LI->getLoopFor(PN->getParent()))
   3034     if (L->getHeader() == PN->getParent()) {
   3035       // The loop may have multiple entrances or multiple exits; we can analyze
   3036       // this phi as an addrec if it has a unique entry value and a unique
   3037       // backedge value.
   3038       Value *BEValueV = 0, *StartValueV = 0;
   3039       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
   3040         Value *V = PN->getIncomingValue(i);
   3041         if (L->contains(PN->getIncomingBlock(i))) {
   3042           if (!BEValueV) {
   3043             BEValueV = V;
   3044           } else if (BEValueV != V) {
   3045             BEValueV = 0;
   3046             break;
   3047           }
   3048         } else if (!StartValueV) {
   3049           StartValueV = V;
   3050         } else if (StartValueV != V) {
   3051           StartValueV = 0;
   3052           break;
   3053         }
   3054       }
   3055       if (BEValueV && StartValueV) {
   3056         // While we are analyzing this PHI node, handle its value symbolically.
   3057         const SCEV *SymbolicName = getUnknown(PN);
   3058         assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
   3059                "PHI node already processed?");
   3060         ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
   3061 
   3062         // Using this symbolic name for the PHI, analyze the value coming around
   3063         // the back-edge.
   3064         const SCEV *BEValue = getSCEV(BEValueV);
   3065 
   3066         // NOTE: If BEValue is loop invariant, we know that the PHI node just
   3067         // has a special value for the first iteration of the loop.
   3068 
   3069         // If the value coming around the backedge is an add with the symbolic
   3070         // value we just inserted, then we found a simple induction variable!
   3071         if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
   3072           // If there is a single occurrence of the symbolic value, replace it
   3073           // with a recurrence.
   3074           unsigned FoundIndex = Add->getNumOperands();
   3075           for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
   3076             if (Add->getOperand(i) == SymbolicName)
   3077               if (FoundIndex == e) {
   3078                 FoundIndex = i;
   3079                 break;
   3080               }
   3081 
   3082           if (FoundIndex != Add->getNumOperands()) {
   3083             // Create an add with everything but the specified operand.
   3084             SmallVector<const SCEV *, 8> Ops;
   3085             for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
   3086               if (i != FoundIndex)
   3087                 Ops.push_back(Add->getOperand(i));
   3088             const SCEV *Accum = getAddExpr(Ops);
   3089 
   3090             // This is not a valid addrec if the step amount is varying each
   3091             // loop iteration, but is not itself an addrec in this loop.
   3092             if (isLoopInvariant(Accum, L) ||
   3093                 (isa<SCEVAddRecExpr>(Accum) &&
   3094                  cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
   3095               SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
   3096 
   3097               // If the increment doesn't overflow, then neither the addrec nor
   3098               // the post-increment will overflow.
   3099               if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
   3100                 if (OBO->hasNoUnsignedWrap())
   3101                   Flags = setFlags(Flags, SCEV::FlagNUW);
   3102                 if (OBO->hasNoSignedWrap())
   3103                   Flags = setFlags(Flags, SCEV::FlagNSW);
   3104               } else if (const GEPOperator *GEP =
   3105                          dyn_cast<GEPOperator>(BEValueV)) {
   3106                 // If the increment is an inbounds GEP, then we know the address
   3107                 // space cannot be wrapped around. We cannot make any guarantee
   3108                 // about signed or unsigned overflow because pointers are
   3109                 // unsigned but we may have a negative index from the base
   3110                 // pointer.
   3111                 if (GEP->isInBounds())
   3112                   Flags = setFlags(Flags, SCEV::FlagNW);
   3113               }
   3114 
   3115               const SCEV *StartVal = getSCEV(StartValueV);
   3116               const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
   3117 
   3118               // Since the no-wrap flags are on the increment, they apply to the
   3119               // post-incremented value as well.
   3120               if (isLoopInvariant(Accum, L))
   3121                 (void)getAddRecExpr(getAddExpr(StartVal, Accum),
   3122                                     Accum, L, Flags);
   3123 
   3124               // Okay, for the entire analysis of this edge we assumed the PHI
   3125               // to be symbolic.  We now need to go back and purge all of the
   3126               // entries for the scalars that use the symbolic expression.
   3127               ForgetSymbolicName(PN, SymbolicName);
   3128               ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
   3129               return PHISCEV;
   3130             }
   3131           }
   3132         } else if (const SCEVAddRecExpr *AddRec =
   3133                      dyn_cast<SCEVAddRecExpr>(BEValue)) {
   3134           // Otherwise, this could be a loop like this:
   3135           //     i = 0;  for (j = 1; ..; ++j) { ....  i = j; }
   3136           // In this case, j = {1,+,1}  and BEValue is j.
   3137           // Because the other in-value of i (0) fits the evolution of BEValue
   3138           // i really is an addrec evolution.
   3139           if (AddRec->getLoop() == L && AddRec->isAffine()) {
   3140             const SCEV *StartVal = getSCEV(StartValueV);
   3141 
   3142             // If StartVal = j.start - j.stride, we can use StartVal as the
   3143             // initial step of the addrec evolution.
   3144             if (StartVal == getMinusSCEV(AddRec->getOperand(0),
   3145                                          AddRec->getOperand(1))) {
   3146               // FIXME: For constant StartVal, we should be able to infer
   3147               // no-wrap flags.
   3148               const SCEV *PHISCEV =
   3149                 getAddRecExpr(StartVal, AddRec->getOperand(1), L,
   3150                               SCEV::FlagAnyWrap);
   3151 
   3152               // Okay, for the entire analysis of this edge we assumed the PHI
   3153               // to be symbolic.  We now need to go back and purge all of the
   3154               // entries for the scalars that use the symbolic expression.
   3155               ForgetSymbolicName(PN, SymbolicName);
   3156               ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
   3157               return PHISCEV;
   3158             }
   3159           }
   3160         }
   3161       }
   3162     }
   3163 
   3164   // If the PHI has a single incoming value, follow that value, unless the
   3165   // PHI's incoming blocks are in a different loop, in which case doing so
   3166   // risks breaking LCSSA form. Instcombine would normally zap these, but
   3167   // it doesn't have DominatorTree information, so it may miss cases.
   3168   if (Value *V = SimplifyInstruction(PN, TD, TLI, DT))
   3169     if (LI->replacementPreservesLCSSAForm(PN, V))
   3170       return getSCEV(V);
   3171 
   3172   // If it's not a loop phi, we can't handle it yet.
   3173   return getUnknown(PN);
   3174 }
   3175 
   3176 /// createNodeForGEP - Expand GEP instructions into add and multiply
   3177 /// operations. This allows them to be analyzed by regular SCEV code.
   3178 ///
   3179 const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
   3180 
   3181   // Don't blindly transfer the inbounds flag from the GEP instruction to the
   3182   // Add expression, because the Instruction may be guarded by control flow
   3183   // and the no-overflow bits may not be valid for the expression in any
   3184   // context.
   3185   bool isInBounds = GEP->isInBounds();
   3186 
   3187   Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
   3188   Value *Base = GEP->getOperand(0);
   3189   // Don't attempt to analyze GEPs over unsized objects.
   3190   if (!cast<PointerType>(Base->getType())->getElementType()->isSized())
   3191     return getUnknown(GEP);
   3192   const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
   3193   gep_type_iterator GTI = gep_type_begin(GEP);
   3194   for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()),
   3195                                       E = GEP->op_end();
   3196        I != E; ++I) {
   3197     Value *Index = *I;
   3198     // Compute the (potentially symbolic) offset in bytes for this index.
   3199     if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
   3200       // For a struct, add the member offset.
   3201       unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
   3202       const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo);
   3203 
   3204       // Add the field offset to the running total offset.
   3205       TotalOffset = getAddExpr(TotalOffset, FieldOffset);
   3206     } else {
   3207       // For an array, add the element offset, explicitly scaled.
   3208       const SCEV *ElementSize = getSizeOfExpr(*GTI);
   3209       const SCEV *IndexS = getSCEV(Index);
   3210       // Getelementptr indices are signed.
   3211       IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy);
   3212 
   3213       // Multiply the index by the element size to compute the element offset.
   3214       const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize,
   3215                                            isInBounds ? SCEV::FlagNSW :
   3216                                            SCEV::FlagAnyWrap);
   3217 
   3218       // Add the element offset to the running total offset.
   3219       TotalOffset = getAddExpr(TotalOffset, LocalOffset);
   3220     }
   3221   }
   3222 
   3223   // Get the SCEV for the GEP base.
   3224   const SCEV *BaseS = getSCEV(Base);
   3225 
   3226   // Add the total offset from all the GEP indices to the base.
   3227   return getAddExpr(BaseS, TotalOffset,
   3228                     isInBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap);
   3229 }
   3230 
   3231 /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
   3232 /// guaranteed to end in (at every loop iteration).  It is, at the same time,
   3233 /// the minimum number of times S is divisible by 2.  For example, given {4,+,8}
   3234 /// it returns 2.  If S is guaranteed to be 0, it returns the bitwidth of S.
   3235 uint32_t
   3236 ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
   3237   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
   3238     return C->getValue()->getValue().countTrailingZeros();
   3239 
   3240   if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
   3241     return std::min(GetMinTrailingZeros(T->getOperand()),
   3242                     (uint32_t)getTypeSizeInBits(T->getType()));
   3243 
   3244   if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) {
   3245     uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
   3246     return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
   3247              getTypeSizeInBits(E->getType()) : OpRes;
   3248   }
   3249 
   3250   if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) {
   3251     uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
   3252     return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
   3253              getTypeSizeInBits(E->getType()) : OpRes;
   3254   }
   3255 
   3256   if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
   3257     // The result is the min of all operands results.
   3258     uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
   3259     for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
   3260       MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
   3261     return MinOpRes;
   3262   }
   3263 
   3264   if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
   3265     // The result is the sum of all operands results.
   3266     uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0));
   3267     uint32_t BitWidth = getTypeSizeInBits(M->getType());
   3268     for (unsigned i = 1, e = M->getNumOperands();
   3269          SumOpRes != BitWidth && i != e; ++i)
   3270       SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)),
   3271                           BitWidth);
   3272     return SumOpRes;
   3273   }
   3274 
   3275   if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
   3276     // The result is the min of all operands results.
   3277     uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
   3278     for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
   3279       MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
   3280     return MinOpRes;
   3281   }
   3282 
   3283   if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) {
   3284     // The result is the min of all operands results.
   3285     uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
   3286     for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
   3287       MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
   3288     return MinOpRes;
   3289   }
   3290 
   3291   if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) {
   3292     // The result is the min of all operands results.
   3293     uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
   3294     for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
   3295       MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
   3296     return MinOpRes;
   3297   }
   3298 
   3299   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
   3300     // For a SCEVUnknown, ask ValueTracking.
   3301     unsigned BitWidth = getTypeSizeInBits(U->getType());
   3302     APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
   3303     ComputeMaskedBits(U->getValue(), Zeros, Ones);
   3304     return Zeros.countTrailingOnes();
   3305   }
   3306 
   3307   // SCEVUDivExpr
   3308   return 0;
   3309 }
   3310 
   3311 /// getUnsignedRange - Determine the unsigned range for a particular SCEV.
   3312 ///
   3313 ConstantRange
   3314 ScalarEvolution::getUnsignedRange(const SCEV *S) {
   3315   // See if we've computed this range already.
   3316   DenseMap<const SCEV *, ConstantRange>::iterator I = UnsignedRanges.find(S);
   3317   if (I != UnsignedRanges.end())
   3318     return I->second;
   3319 
   3320   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
   3321     return setUnsignedRange(C, ConstantRange(C->getValue()->getValue()));
   3322 
   3323   unsigned BitWidth = getTypeSizeInBits(S->getType());
   3324   ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
   3325 
   3326   // If the value has known zeros, the maximum unsigned value will have those
   3327   // known zeros as well.
   3328   uint32_t TZ = GetMinTrailingZeros(S);
   3329   if (TZ != 0)
   3330     ConservativeResult =
   3331       ConstantRange(APInt::getMinValue(BitWidth),
   3332                     APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
   3333 
   3334   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
   3335     ConstantRange X = getUnsignedRange(Add->getOperand(0));
   3336     for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
   3337       X = X.add(getUnsignedRange(Add->getOperand(i)));
   3338     return setUnsignedRange(Add, ConservativeResult.intersectWith(X));
   3339   }
   3340 
   3341   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
   3342     ConstantRange X = getUnsignedRange(Mul->getOperand(0));
   3343     for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
   3344       X = X.multiply(getUnsignedRange(Mul->getOperand(i)));
   3345     return setUnsignedRange(Mul, ConservativeResult.intersectWith(X));
   3346   }
   3347 
   3348   if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
   3349     ConstantRange X = getUnsignedRange(SMax->getOperand(0));
   3350     for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
   3351       X = X.smax(getUnsignedRange(SMax->getOperand(i)));
   3352     return setUnsignedRange(SMax, ConservativeResult.intersectWith(X));
   3353   }
   3354 
   3355   if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
   3356     ConstantRange X = getUnsignedRange(UMax->getOperand(0));
   3357     for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
   3358       X = X.umax(getUnsignedRange(UMax->getOperand(i)));
   3359     return setUnsignedRange(UMax, ConservativeResult.intersectWith(X));
   3360   }
   3361 
   3362   if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
   3363     ConstantRange X = getUnsignedRange(UDiv->getLHS());
   3364     ConstantRange Y = getUnsignedRange(UDiv->getRHS());
   3365     return setUnsignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
   3366   }
   3367 
   3368   if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
   3369     ConstantRange X = getUnsignedRange(ZExt->getOperand());
   3370     return setUnsignedRange(ZExt,
   3371       ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
   3372   }
   3373 
   3374   if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
   3375     ConstantRange X = getUnsignedRange(SExt->getOperand());
   3376     return setUnsignedRange(SExt,
   3377       ConservativeResult.intersectWith(X.signExtend(BitWidth)));
   3378   }
   3379 
   3380   if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
   3381     ConstantRange X = getUnsignedRange(Trunc->getOperand());
   3382     return setUnsignedRange(Trunc,
   3383       ConservativeResult.intersectWith(X.truncate(BitWidth)));
   3384   }
   3385 
   3386   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
   3387     // If there's no unsigned wrap, the value will never be less than its
   3388     // initial value.
   3389     if (AddRec->getNoWrapFlags(SCEV::FlagNUW))
   3390       if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
   3391         if (!C->getValue()->isZero())
   3392           ConservativeResult =
   3393             ConservativeResult.intersectWith(
   3394               ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0)));
   3395 
   3396     // TODO: non-affine addrec
   3397     if (AddRec->isAffine()) {
   3398       Type *Ty = AddRec->getType();
   3399       const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
   3400       if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
   3401           getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
   3402         MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
   3403 
   3404         const SCEV *Start = AddRec->getStart();
   3405         const SCEV *Step = AddRec->getStepRecurrence(*this);
   3406 
   3407         ConstantRange StartRange = getUnsignedRange(Start);
   3408         ConstantRange StepRange = getSignedRange(Step);
   3409         ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
   3410         ConstantRange EndRange =
   3411           StartRange.add(MaxBECountRange.multiply(StepRange));
   3412 
   3413         // Check for overflow. This must be done with ConstantRange arithmetic
   3414         // because we could be called from within the ScalarEvolution overflow
   3415         // checking code.
   3416         ConstantRange ExtStartRange = StartRange.zextOrTrunc(BitWidth*2+1);
   3417         ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
   3418         ConstantRange ExtMaxBECountRange =
   3419           MaxBECountRange.zextOrTrunc(BitWidth*2+1);
   3420         ConstantRange ExtEndRange = EndRange.zextOrTrunc(BitWidth*2+1);
   3421         if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
   3422             ExtEndRange)
   3423           return setUnsignedRange(AddRec, ConservativeResult);
   3424 
   3425         APInt Min = APIntOps::umin(StartRange.getUnsignedMin(),
   3426                                    EndRange.getUnsignedMin());
   3427         APInt Max = APIntOps::umax(StartRange.getUnsignedMax(),
   3428                                    EndRange.getUnsignedMax());
   3429         if (Min.isMinValue() && Max.isMaxValue())
   3430           return setUnsignedRange(AddRec, ConservativeResult);
   3431         return setUnsignedRange(AddRec,
   3432           ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
   3433       }
   3434     }
   3435 
   3436     return setUnsignedRange(AddRec, ConservativeResult);
   3437   }
   3438 
   3439   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
   3440     // For a SCEVUnknown, ask ValueTracking.
   3441     APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
   3442     ComputeMaskedBits(U->getValue(), Zeros, Ones, TD);
   3443     if (Ones == ~Zeros + 1)
   3444       return setUnsignedRange(U, ConservativeResult);
   3445     return setUnsignedRange(U,
   3446       ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)));
   3447   }
   3448 
   3449   return setUnsignedRange(S, ConservativeResult);
   3450 }
   3451 
   3452 /// getSignedRange - Determine the signed range for a particular SCEV.
   3453 ///
   3454 ConstantRange
   3455 ScalarEvolution::getSignedRange(const SCEV *S) {
   3456   // See if we've computed this range already.
   3457   DenseMap<const SCEV *, ConstantRange>::iterator I = SignedRanges.find(S);
   3458   if (I != SignedRanges.end())
   3459     return I->second;
   3460 
   3461   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
   3462     return setSignedRange(C, ConstantRange(C->getValue()->getValue()));
   3463 
   3464   unsigned BitWidth = getTypeSizeInBits(S->getType());
   3465   ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
   3466 
   3467   // If the value has known zeros, the maximum signed value will have those
   3468   // known zeros as well.
   3469   uint32_t TZ = GetMinTrailingZeros(S);
   3470   if (TZ != 0)
   3471     ConservativeResult =
   3472       ConstantRange(APInt::getSignedMinValue(BitWidth),
   3473                     APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
   3474 
   3475   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
   3476     ConstantRange X = getSignedRange(Add->getOperand(0));
   3477     for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
   3478       X = X.add(getSignedRange(Add->getOperand(i)));
   3479     return setSignedRange(Add, ConservativeResult.intersectWith(X));
   3480   }
   3481 
   3482   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
   3483     ConstantRange X = getSignedRange(Mul->getOperand(0));
   3484     for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
   3485       X = X.multiply(getSignedRange(Mul->getOperand(i)));
   3486     return setSignedRange(Mul, ConservativeResult.intersectWith(X));
   3487   }
   3488 
   3489   if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
   3490     ConstantRange X = getSignedRange(SMax->getOperand(0));
   3491     for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
   3492       X = X.smax(getSignedRange(SMax->getOperand(i)));
   3493     return setSignedRange(SMax, ConservativeResult.intersectWith(X));
   3494   }
   3495 
   3496   if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
   3497     ConstantRange X = getSignedRange(UMax->getOperand(0));
   3498     for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
   3499       X = X.umax(getSignedRange(UMax->getOperand(i)));
   3500     return setSignedRange(UMax, ConservativeResult.intersectWith(X));
   3501   }
   3502 
   3503   if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
   3504     ConstantRange X = getSignedRange(UDiv->getLHS());
   3505     ConstantRange Y = getSignedRange(UDiv->getRHS());
   3506     return setSignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
   3507   }
   3508 
   3509   if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
   3510     ConstantRange X = getSignedRange(ZExt->getOperand());
   3511     return setSignedRange(ZExt,
   3512       ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
   3513   }
   3514 
   3515   if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
   3516     ConstantRange X = getSignedRange(SExt->getOperand());
   3517     return setSignedRange(SExt,
   3518       ConservativeResult.intersectWith(X.signExtend(BitWidth)));
   3519   }
   3520 
   3521   if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
   3522     ConstantRange X = getSignedRange(Trunc->getOperand());
   3523     return setSignedRange(Trunc,
   3524       ConservativeResult.intersectWith(X.truncate(BitWidth)));
   3525   }
   3526 
   3527   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
   3528     // If there's no signed wrap, and all the operands have the same sign or
   3529     // zero, the value won't ever change sign.
   3530     if (AddRec->getNoWrapFlags(SCEV::FlagNSW)) {
   3531       bool AllNonNeg = true;
   3532       bool AllNonPos = true;
   3533       for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
   3534         if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false;
   3535         if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false;
   3536       }
   3537       if (AllNonNeg)
   3538         ConservativeResult = ConservativeResult.intersectWith(
   3539           ConstantRange(APInt(BitWidth, 0),
   3540                         APInt::getSignedMinValue(BitWidth)));
   3541       else if (AllNonPos)
   3542         ConservativeResult = ConservativeResult.intersectWith(
   3543           ConstantRange(APInt::getSignedMinValue(BitWidth),
   3544                         APInt(BitWidth, 1)));
   3545     }
   3546 
   3547     // TODO: non-affine addrec
   3548     if (AddRec->isAffine()) {
   3549       Type *Ty = AddRec->getType();
   3550       const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
   3551       if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
   3552           getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
   3553         MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
   3554 
   3555         const SCEV *Start = AddRec->getStart();
   3556         const SCEV *Step = AddRec->getStepRecurrence(*this);
   3557 
   3558         ConstantRange StartRange = getSignedRange(Start);
   3559         ConstantRange StepRange = getSignedRange(Step);
   3560         ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
   3561         ConstantRange EndRange =
   3562           StartRange.add(MaxBECountRange.multiply(StepRange));
   3563 
   3564         // Check for overflow. This must be done with ConstantRange arithmetic
   3565         // because we could be called from within the ScalarEvolution overflow
   3566         // checking code.
   3567         ConstantRange ExtStartRange = StartRange.sextOrTrunc(BitWidth*2+1);
   3568         ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
   3569         ConstantRange ExtMaxBECountRange =
   3570           MaxBECountRange.zextOrTrunc(BitWidth*2+1);
   3571         ConstantRange ExtEndRange = EndRange.sextOrTrunc(BitWidth*2+1);
   3572         if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
   3573             ExtEndRange)
   3574           return setSignedRange(AddRec, ConservativeResult);
   3575 
   3576         APInt Min = APIntOps::smin(StartRange.getSignedMin(),
   3577                                    EndRange.getSignedMin());
   3578         APInt Max = APIntOps::smax(StartRange.getSignedMax(),
   3579                                    EndRange.getSignedMax());
   3580         if (Min.isMinSignedValue() && Max.isMaxSignedValue())
   3581           return setSignedRange(AddRec, ConservativeResult);
   3582         return setSignedRange(AddRec,
   3583           ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
   3584       }
   3585     }
   3586 
   3587     return setSignedRange(AddRec, ConservativeResult);
   3588   }
   3589 
   3590   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
   3591     // For a SCEVUnknown, ask ValueTracking.
   3592     if (!U->getValue()->getType()->isIntegerTy() && !TD)
   3593       return setSignedRange(U, ConservativeResult);
   3594     unsigned NS = ComputeNumSignBits(U->getValue(), TD);
   3595     if (NS <= 1)
   3596       return setSignedRange(U, ConservativeResult);
   3597     return setSignedRange(U, ConservativeResult.intersectWith(
   3598       ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
   3599                     APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1)));
   3600   }
   3601 
   3602   return setSignedRange(S, ConservativeResult);
   3603 }
   3604 
   3605 /// createSCEV - We know that there is no SCEV for the specified value.
   3606 /// Analyze the expression.
   3607 ///
   3608 const SCEV *ScalarEvolution::createSCEV(Value *V) {
   3609   if (!isSCEVable(V->getType()))
   3610     return getUnknown(V);
   3611 
   3612   unsigned Opcode = Instruction::UserOp1;
   3613   if (Instruction *I = dyn_cast<Instruction>(V)) {
   3614     Opcode = I->getOpcode();
   3615 
   3616     // Don't attempt to analyze instructions in blocks that aren't
   3617     // reachable. Such instructions don't matter, and they aren't required
   3618     // to obey basic rules for definitions dominating uses which this
   3619     // analysis depends on.
   3620     if (!DT->isReachableFromEntry(I->getParent()))
   3621       return getUnknown(V);
   3622   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
   3623     Opcode = CE->getOpcode();
   3624   else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
   3625     return getConstant(CI);
   3626   else if (isa<ConstantPointerNull>(V))
   3627     return getConstant(V->getType(), 0);
   3628   else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
   3629     return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee());
   3630   else
   3631     return getUnknown(V);
   3632 
   3633   Operator *U = cast<Operator>(V);
   3634   switch (Opcode) {
   3635   case Instruction::Add: {
   3636     // The simple thing to do would be to just call getSCEV on both operands
   3637     // and call getAddExpr with the result. However if we're looking at a
   3638     // bunch of things all added together, this can be quite inefficient,
   3639     // because it leads to N-1 getAddExpr calls for N ultimate operands.
   3640     // Instead, gather up all the operands and make a single getAddExpr call.
   3641     // LLVM IR canonical form means we need only traverse the left operands.
   3642     //
   3643     // Don't apply this instruction's NSW or NUW flags to the new
   3644     // expression. The instruction may be guarded by control flow that the
   3645     // no-wrap behavior depends on. Non-control-equivalent instructions can be
   3646     // mapped to the same SCEV expression, and it would be incorrect to transfer
   3647     // NSW/NUW semantics to those operations.
   3648     SmallVector<const SCEV *, 4> AddOps;
   3649     AddOps.push_back(getSCEV(U->getOperand(1)));
   3650     for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) {
   3651       unsigned Opcode = Op->getValueID() - Value::InstructionVal;
   3652       if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
   3653         break;
   3654       U = cast<Operator>(Op);
   3655       const SCEV *Op1 = getSCEV(U->getOperand(1));
   3656       if (Opcode == Instruction::Sub)
   3657         AddOps.push_back(getNegativeSCEV(Op1));
   3658       else
   3659         AddOps.push_back(Op1);
   3660     }
   3661     AddOps.push_back(getSCEV(U->getOperand(0)));
   3662     return getAddExpr(AddOps);
   3663   }
   3664   case Instruction::Mul: {
   3665     // Don't transfer NSW/NUW for the same reason as AddExpr.
   3666     SmallVector<const SCEV *, 4> MulOps;
   3667     MulOps.push_back(getSCEV(U->getOperand(1)));
   3668     for (Value *Op = U->getOperand(0);
   3669          Op->getValueID() == Instruction::Mul + Value::InstructionVal;
   3670          Op = U->getOperand(0)) {
   3671       U = cast<Operator>(Op);
   3672       MulOps.push_back(getSCEV(U->getOperand(1)));
   3673     }
   3674     MulOps.push_back(getSCEV(U->getOperand(0)));
   3675     return getMulExpr(MulOps);
   3676   }
   3677   case Instruction::UDiv:
   3678     return getUDivExpr(getSCEV(U->getOperand(0)),
   3679                        getSCEV(U->getOperand(1)));
   3680   case Instruction::Sub:
   3681     return getMinusSCEV(getSCEV(U->getOperand(0)),
   3682                         getSCEV(U->getOperand(1)));
   3683   case Instruction::And:
   3684     // For an expression like x&255 that merely masks off the high bits,
   3685     // use zext(trunc(x)) as the SCEV expression.
   3686     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
   3687       if (CI->isNullValue())
   3688         return getSCEV(U->getOperand(1));
   3689       if (CI->isAllOnesValue())
   3690         return getSCEV(U->getOperand(0));
   3691       const APInt &A = CI->getValue();
   3692 
   3693       // Instcombine's ShrinkDemandedConstant may strip bits out of
   3694       // constants, obscuring what would otherwise be a low-bits mask.
   3695       // Use ComputeMaskedBits to compute what ShrinkDemandedConstant
   3696       // knew about to reconstruct a low-bits mask value.
   3697       unsigned LZ = A.countLeadingZeros();
   3698       unsigned BitWidth = A.getBitWidth();
   3699       APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
   3700       ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD);
   3701 
   3702       APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ);
   3703 
   3704       if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask))
   3705         return
   3706           getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)),
   3707                                 IntegerType::get(getContext(), BitWidth - LZ)),
   3708                             U->getType());
   3709     }
   3710     break;
   3711 
   3712   case Instruction::Or:
   3713     // If the RHS of the Or is a constant, we may have something like:
   3714     // X*4+1 which got turned into X*4|1.  Handle this as an Add so loop
   3715     // optimizations will transparently handle this case.
   3716     //
   3717     // In order for this transformation to be safe, the LHS must be of the
   3718     // form X*(2^n) and the Or constant must be less than 2^n.
   3719     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
   3720       const SCEV *LHS = getSCEV(U->getOperand(0));
   3721       const APInt &CIVal = CI->getValue();
   3722       if (GetMinTrailingZeros(LHS) >=
   3723           (CIVal.getBitWidth() - CIVal.countLeadingZeros())) {
   3724         // Build a plain add SCEV.
   3725         const SCEV *S = getAddExpr(LHS, getSCEV(CI));
   3726         // If the LHS of the add was an addrec and it has no-wrap flags,
   3727         // transfer the no-wrap flags, since an or won't introduce a wrap.
   3728         if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) {
   3729           const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS);
   3730           const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags(
   3731             OldAR->getNoWrapFlags());
   3732         }
   3733         return S;
   3734       }
   3735     }
   3736     break;
   3737   case Instruction::Xor:
   3738     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
   3739       // If the RHS of the xor is a signbit, then this is just an add.
   3740       // Instcombine turns add of signbit into xor as a strength reduction step.
   3741       if (CI->getValue().isSignBit())
   3742         return getAddExpr(getSCEV(U->getOperand(0)),
   3743                           getSCEV(U->getOperand(1)));
   3744 
   3745       // If the RHS of xor is -1, then this is a not operation.
   3746       if (CI->isAllOnesValue())
   3747         return getNotSCEV(getSCEV(U->getOperand(0)));
   3748 
   3749       // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask.
   3750       // This is a variant of the check for xor with -1, and it handles
   3751       // the case where instcombine has trimmed non-demanded bits out
   3752       // of an xor with -1.
   3753       if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0)))
   3754         if (ConstantInt *LCI = dyn_cast<ConstantInt>(BO->getOperand(1)))
   3755           if (BO->getOpcode() == Instruction::And &&
   3756               LCI->getValue() == CI->getValue())
   3757             if (const SCEVZeroExtendExpr *Z =
   3758                   dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) {
   3759               Type *UTy = U->getType();
   3760               const SCEV *Z0 = Z->getOperand();
   3761               Type *Z0Ty = Z0->getType();
   3762               unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
   3763 
   3764               // If C is a low-bits mask, the zero extend is serving to
   3765               // mask off the high bits. Complement the operand and
   3766               // re-apply the zext.
   3767               if (APIntOps::isMask(Z0TySize, CI->getValue()))
   3768                 return getZeroExtendExpr(getNotSCEV(Z0), UTy);
   3769 
   3770               // If C is a single bit, it may be in the sign-bit position
   3771               // before the zero-extend. In this case, represent the xor
   3772               // using an add, which is equivalent, and re-apply the zext.
   3773               APInt Trunc = CI->getValue().trunc(Z0TySize);
   3774               if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
   3775                   Trunc.isSignBit())
   3776                 return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
   3777                                          UTy);
   3778             }
   3779     }
   3780     break;
   3781 
   3782   case Instruction::Shl:
   3783     // Turn shift left of a constant amount into a multiply.
   3784     if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
   3785       uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
   3786 
   3787       // If the shift count is not less than the bitwidth, the result of
   3788       // the shift is undefined. Don't try to analyze it, because the
   3789       // resolution chosen here may differ from the resolution chosen in
   3790       // other parts of the compiler.
   3791       if (SA->getValue().uge(BitWidth))
   3792         break;
   3793 
   3794       Constant *X = ConstantInt::get(getContext(),
   3795         APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
   3796       return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
   3797     }
   3798     break;
   3799 
   3800   case Instruction::LShr:
   3801     // Turn logical shift right of a constant into a unsigned divide.
   3802     if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
   3803       uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
   3804 
   3805       // If the shift count is not less than the bitwidth, the result of
   3806       // the shift is undefined. Don't try to analyze it, because the
   3807       // resolution chosen here may differ from the resolution chosen in
   3808       // other parts of the compiler.
   3809       if (SA->getValue().uge(BitWidth))
   3810         break;
   3811 
   3812       Constant *X = ConstantInt::get(getContext(),
   3813         APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
   3814       return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X));
   3815     }
   3816     break;
   3817 
   3818   case Instruction::AShr:
   3819     // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression.
   3820     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1)))
   3821       if (Operator *L = dyn_cast<Operator>(U->getOperand(0)))
   3822         if (L->getOpcode() == Instruction::Shl &&
   3823             L->getOperand(1) == U->getOperand(1)) {
   3824           uint64_t BitWidth = getTypeSizeInBits(U->getType());
   3825 
   3826           // If the shift count is not less than the bitwidth, the result of
   3827           // the shift is undefined. Don't try to analyze it, because the
   3828           // resolution chosen here may differ from the resolution chosen in
   3829           // other parts of the compiler.
   3830           if (CI->getValue().uge(BitWidth))
   3831             break;
   3832 
   3833           uint64_t Amt = BitWidth - CI->getZExtValue();
   3834           if (Amt == BitWidth)
   3835             return getSCEV(L->getOperand(0));       // shift by zero --> noop
   3836           return
   3837             getSignExtendExpr(getTruncateExpr(getSCEV(L->getOperand(0)),
   3838                                               IntegerType::get(getContext(),
   3839                                                                Amt)),
   3840                               U->getType());
   3841         }
   3842     break;
   3843 
   3844   case Instruction::Trunc:
   3845     return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType());
   3846 
   3847   case Instruction::ZExt:
   3848     return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType());
   3849 
   3850   case Instruction::SExt:
   3851     return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType());
   3852 
   3853   case Instruction::BitCast:
   3854     // BitCasts are no-op casts so we just eliminate the cast.
   3855     if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType()))
   3856       return getSCEV(U->getOperand(0));
   3857     break;
   3858 
   3859   // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can
   3860   // lead to pointer expressions which cannot safely be expanded to GEPs,
   3861   // because ScalarEvolution doesn't respect the GEP aliasing rules when
   3862   // simplifying integer expressions.
   3863 
   3864   case Instruction::GetElementPtr:
   3865     return createNodeForGEP(cast<GEPOperator>(U));
   3866 
   3867   case Instruction::PHI:
   3868     return createNodeForPHI(cast<PHINode>(U));
   3869 
   3870   case Instruction::Select:
   3871     // This could be a smax or umax that was lowered earlier.
   3872     // Try to recover it.
   3873     if (ICmpInst *ICI = dyn_cast<ICmpInst>(U->getOperand(0))) {
   3874       Value *LHS = ICI->getOperand(0);
   3875       Value *RHS = ICI->getOperand(1);
   3876       switch (ICI->getPredicate()) {
   3877       case ICmpInst::ICMP_SLT:
   3878       case ICmpInst::ICMP_SLE:
   3879         std::swap(LHS, RHS);
   3880         // fall through
   3881       case ICmpInst::ICMP_SGT:
   3882       case ICmpInst::ICMP_SGE:
   3883         // a >s b ? a+x : b+x  ->  smax(a, b)+x
   3884         // a >s b ? b+x : a+x  ->  smin(a, b)+x
   3885         if (LHS->getType() == U->getType()) {
   3886           const SCEV *LS = getSCEV(LHS);
   3887           const SCEV *RS = getSCEV(RHS);
   3888           const SCEV *LA = getSCEV(U->getOperand(1));
   3889           const SCEV *RA = getSCEV(U->getOperand(2));
   3890           const SCEV *LDiff = getMinusSCEV(LA, LS);
   3891           const SCEV *RDiff = getMinusSCEV(RA, RS);
   3892           if (LDiff == RDiff)
   3893             return getAddExpr(getSMaxExpr(LS, RS), LDiff);
   3894           LDiff = getMinusSCEV(LA, RS);
   3895           RDiff = getMinusSCEV(RA, LS);
   3896           if (LDiff == RDiff)
   3897             return getAddExpr(getSMinExpr(LS, RS), LDiff);
   3898         }
   3899         break;
   3900       case ICmpInst::ICMP_ULT:
   3901       case ICmpInst::ICMP_ULE:
   3902         std::swap(LHS, RHS);
   3903         // fall through
   3904       case ICmpInst::ICMP_UGT:
   3905       case ICmpInst::ICMP_UGE:
   3906         // a >u b ? a+x : b+x  ->  umax(a, b)+x
   3907         // a >u b ? b+x : a+x  ->  umin(a, b)+x
   3908         if (LHS->getType() == U->getType()) {
   3909           const SCEV *LS = getSCEV(LHS);
   3910           const SCEV *RS = getSCEV(RHS);
   3911           const SCEV *LA = getSCEV(U->getOperand(1));
   3912           const SCEV *RA = getSCEV(U->getOperand(2));
   3913           const SCEV *LDiff = getMinusSCEV(LA, LS);
   3914           const SCEV *RDiff = getMinusSCEV(RA, RS);
   3915           if (LDiff == RDiff)
   3916             return getAddExpr(getUMaxExpr(LS, RS), LDiff);
   3917           LDiff = getMinusSCEV(LA, RS);
   3918           RDiff = getMinusSCEV(RA, LS);
   3919           if (LDiff == RDiff)
   3920             return getAddExpr(getUMinExpr(LS, RS), LDiff);
   3921         }
   3922         break;
   3923       case ICmpInst::ICMP_NE:
   3924         // n != 0 ? n+x : 1+x  ->  umax(n, 1)+x
   3925         if (LHS->getType() == U->getType() &&
   3926             isa<ConstantInt>(RHS) &&
   3927             cast<ConstantInt>(RHS)->isZero()) {
   3928           const SCEV *One = getConstant(LHS->getType(), 1);
   3929           const SCEV *LS = getSCEV(LHS);
   3930           const SCEV *LA = getSCEV(U->getOperand(1));
   3931           const SCEV *RA = getSCEV(U->getOperand(2));
   3932           const SCEV *LDiff = getMinusSCEV(LA, LS);
   3933           const SCEV *RDiff = getMinusSCEV(RA, One);
   3934           if (LDiff == RDiff)
   3935             return getAddExpr(getUMaxExpr(One, LS), LDiff);
   3936         }
   3937         break;
   3938       case ICmpInst::ICMP_EQ:
   3939         // n == 0 ? 1+x : n+x  ->  umax(n, 1)+x
   3940         if (LHS->getType() == U->getType() &&
   3941             isa<ConstantInt>(RHS) &&
   3942             cast<ConstantInt>(RHS)->isZero()) {
   3943           const SCEV *One = getConstant(LHS->getType(), 1);
   3944           const SCEV *LS = getSCEV(LHS);
   3945           const SCEV *LA = getSCEV(U->getOperand(1));
   3946           const SCEV *RA = getSCEV(U->getOperand(2));
   3947           const SCEV *LDiff = getMinusSCEV(LA, One);
   3948           const SCEV *RDiff = getMinusSCEV(RA, LS);
   3949           if (LDiff == RDiff)
   3950             return getAddExpr(getUMaxExpr(One, LS), LDiff);
   3951         }
   3952         break;
   3953       default:
   3954         break;
   3955       }
   3956     }
   3957 
   3958   default: // We cannot analyze this expression.
   3959     break;
   3960   }
   3961 
   3962   return getUnknown(V);
   3963 }
   3964 
   3965 
   3966 
   3967 //===----------------------------------------------------------------------===//
   3968 //                   Iteration Count Computation Code
   3969 //
   3970 
   3971 /// getSmallConstantTripCount - Returns the maximum trip count of this loop as a
   3972 /// normal unsigned value. Returns 0 if the trip count is unknown or not
   3973 /// constant. Will also return 0 if the maximum trip count is very large (>=
   3974 /// 2^32).
   3975 ///
   3976 /// This "trip count" assumes that control exits via ExitingBlock. More
   3977 /// precisely, it is the number of times that control may reach ExitingBlock
   3978 /// before taking the branch. For loops with multiple exits, it may not be the
   3979 /// number times that the loop header executes because the loop may exit
   3980 /// prematurely via another branch.
   3981 ///
   3982 /// FIXME: We conservatively call getBackedgeTakenCount(L) instead of
   3983 /// getExitCount(L, ExitingBlock) to compute a safe trip count considering all
   3984 /// loop exits. getExitCount() may return an exact count for this branch
   3985 /// assuming no-signed-wrap. The number of well-defined iterations may actually
   3986 /// be higher than this trip count if this exit test is skipped and the loop
   3987 /// exits via a different branch. Ideally, getExitCount() would know whether it
   3988 /// depends on a NSW assumption, and we would only fall back to a conservative
   3989 /// trip count in that case.
   3990 unsigned ScalarEvolution::
   3991 getSmallConstantTripCount(Loop *L, BasicBlock * /*ExitingBlock*/) {
   3992   const SCEVConstant *ExitCount =
   3993     dyn_cast<SCEVConstant>(getBackedgeTakenCount(L));
   3994   if (!ExitCount)
   3995     return 0;
   3996 
   3997   ConstantInt *ExitConst = ExitCount->getValue();
   3998 
   3999   // Guard against huge trip counts.
   4000   if (ExitConst->getValue().getActiveBits() > 32)
   4001     return 0;
   4002 
   4003   // In case of integer overflow, this returns 0, which is correct.
   4004   return ((unsigned)ExitConst->getZExtValue()) + 1;
   4005 }
   4006 
   4007 /// getSmallConstantTripMultiple - Returns the largest constant divisor of the
   4008 /// trip count of this loop as a normal unsigned value, if possible. This
   4009 /// means that the actual trip count is always a multiple of the returned
   4010 /// value (don't forget the trip count could very well be zero as well!).
   4011 ///
   4012 /// Returns 1 if the trip count is unknown or not guaranteed to be the
   4013 /// multiple of a constant (which is also the case if the trip count is simply
   4014 /// constant, use getSmallConstantTripCount for that case), Will also return 1
   4015 /// if the trip count is very large (>= 2^32).
   4016 ///
   4017 /// As explained in the comments for getSmallConstantTripCount, this assumes
   4018 /// that control exits the loop via ExitingBlock.
   4019 unsigned ScalarEvolution::
   4020 getSmallConstantTripMultiple(Loop *L, BasicBlock * /*ExitingBlock*/) {
   4021   const SCEV *ExitCount = getBackedgeTakenCount(L);
   4022   if (ExitCount == getCouldNotCompute())
   4023     return 1;
   4024 
   4025   // Get the trip count from the BE count by adding 1.
   4026   const SCEV *TCMul = getAddExpr(ExitCount,
   4027                                  getConstant(ExitCount->getType(), 1));
   4028   // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt
   4029   // to factor simple cases.
   4030   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul))
   4031     TCMul = Mul->getOperand(0);
   4032 
   4033   const SCEVConstant *MulC = dyn_cast<SCEVConstant>(TCMul);
   4034   if (!MulC)
   4035     return 1;
   4036 
   4037   ConstantInt *Result = MulC->getValue();
   4038 
   4039   // Guard against huge trip counts (this requires checking
   4040   // for zero to handle the case where the trip count == -1 and the
   4041   // addition wraps).
   4042   if (!Result || Result->getValue().getActiveBits() > 32 ||
   4043       Result->getValue().getActiveBits() == 0)
   4044     return 1;
   4045 
   4046   return (unsigned)Result->getZExtValue();
   4047 }
   4048 
   4049 // getExitCount - Get the expression for the number of loop iterations for which
   4050 // this loop is guaranteed not to exit via ExitingBlock. Otherwise return
   4051 // SCEVCouldNotCompute.
   4052 const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) {
   4053   return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
   4054 }
   4055 
   4056 /// getBackedgeTakenCount - If the specified loop has a predictable
   4057 /// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
   4058 /// object. The backedge-taken count is the number of times the loop header
   4059 /// will be branched to from within the loop. This is one less than the
   4060 /// trip count of the loop, since it doesn't count the first iteration,
   4061 /// when the header is branched to from outside the loop.
   4062 ///
   4063 /// Note that it is not valid to call this method on a loop without a
   4064 /// loop-invariant backedge-taken count (see
   4065 /// hasLoopInvariantBackedgeTakenCount).
   4066 ///
   4067 const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
   4068   return getBackedgeTakenInfo(L).getExact(this);
   4069 }
   4070 
   4071 /// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except
   4072 /// return the least SCEV value that is known never to be less than the
   4073 /// actual backedge taken count.
   4074 const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
   4075   return getBackedgeTakenInfo(L).getMax(this);
   4076 }
   4077 
   4078 /// PushLoopPHIs - Push PHI nodes in the header of the given loop
   4079 /// onto the given Worklist.
   4080 static void
   4081 PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
   4082   BasicBlock *Header = L->getHeader();
   4083 
   4084   // Push all Loop-header PHIs onto the Worklist stack.
   4085   for (BasicBlock::iterator I = Header->begin();
   4086        PHINode *PN = dyn_cast<PHINode>(I); ++I)
   4087     Worklist.push_back(PN);
   4088 }
   4089 
   4090 const ScalarEvolution::BackedgeTakenInfo &
   4091 ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
   4092   // Initially insert an invalid entry for this loop. If the insertion
   4093   // succeeds, proceed to actually compute a backedge-taken count and
   4094   // update the value. The temporary CouldNotCompute value tells SCEV
   4095   // code elsewhere that it shouldn't attempt to request a new
   4096   // backedge-taken count, which could result in infinite recursion.
   4097   std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
   4098     BackedgeTakenCounts.insert(std::make_pair(L, BackedgeTakenInfo()));
   4099   if (!Pair.second)
   4100     return Pair.first->second;
   4101 
   4102   // ComputeBackedgeTakenCount may allocate memory for its result. Inserting it
   4103   // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result
   4104   // must be cleared in this scope.
   4105   BackedgeTakenInfo Result = ComputeBackedgeTakenCount(L);
   4106 
   4107   if (Result.getExact(this) != getCouldNotCompute()) {
   4108     assert(isLoopInvariant(Result.getExact(this), L) &&
   4109            isLoopInvariant(Result.getMax(this), L) &&
   4110            "Computed backedge-taken count isn't loop invariant for loop!");
   4111     ++NumTripCountsComputed;
   4112   }
   4113   else if (Result.getMax(this) == getCouldNotCompute() &&
   4114            isa<PHINode>(L->getHeader()->begin())) {
   4115     // Only count loops that have phi nodes as not being computable.
   4116     ++NumTripCountsNotComputed;
   4117   }
   4118 
   4119   // Now that we know more about the trip count for this loop, forget any
   4120   // existing SCEV values for PHI nodes in this loop since they are only
   4121   // conservative estimates made without the benefit of trip count
   4122   // information. This is similar to the code in forgetLoop, except that
   4123   // it handles SCEVUnknown PHI nodes specially.
   4124   if (Result.hasAnyInfo()) {
   4125     SmallVector<Instruction *, 16> Worklist;
   4126     PushLoopPHIs(L, Worklist);
   4127 
   4128     SmallPtrSet<Instruction *, 8> Visited;
   4129     while (!Worklist.empty()) {
   4130       Instruction *I = Worklist.pop_back_val();
   4131       if (!Visited.insert(I)) continue;
   4132 
   4133       ValueExprMapType::iterator It =
   4134         ValueExprMap.find_as(static_cast<Value *>(I));
   4135       if (It != ValueExprMap.end()) {
   4136         const SCEV *Old = It->second;
   4137 
   4138         // SCEVUnknown for a PHI either means that it has an unrecognized
   4139         // structure, or it's a PHI that's in the progress of being computed
   4140         // by createNodeForPHI.  In the former case, additional loop trip
   4141         // count information isn't going to change anything. In the later
   4142         // case, createNodeForPHI will perform the necessary updates on its
   4143         // own when it gets to that point.
   4144         if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) {
   4145           forgetMemoizedResults(Old);
   4146           ValueExprMap.erase(It);
   4147         }
   4148         if (PHINode *PN = dyn_cast<PHINode>(I))
   4149           ConstantEvolutionLoopExitValue.erase(PN);
   4150       }
   4151 
   4152       PushDefUseChildren(I, Worklist);
   4153     }
   4154   }
   4155 
   4156   // Re-lookup the insert position, since the call to
   4157   // ComputeBackedgeTakenCount above could result in a
   4158   // recusive call to getBackedgeTakenInfo (on a different
   4159   // loop), which would invalidate the iterator computed
   4160   // earlier.
   4161   return BackedgeTakenCounts.find(L)->second = Result;
   4162 }
   4163 
   4164 /// forgetLoop - This method should be called by the client when it has
   4165 /// changed a loop in a way that may effect ScalarEvolution's ability to
   4166 /// compute a trip count, or if the loop is deleted.
   4167 void ScalarEvolution::forgetLoop(const Loop *L) {
   4168   // Drop any stored trip count value.
   4169   DenseMap<const Loop*, BackedgeTakenInfo>::iterator BTCPos =
   4170     BackedgeTakenCounts.find(L);
   4171   if (BTCPos != BackedgeTakenCounts.end()) {
   4172     BTCPos->second.clear();
   4173     BackedgeTakenCounts.erase(BTCPos);
   4174   }
   4175 
   4176   // Drop information about expressions based on loop-header PHIs.
   4177   SmallVector<Instruction *, 16> Worklist;
   4178   PushLoopPHIs(L, Worklist);
   4179 
   4180   SmallPtrSet<Instruction *, 8> Visited;
   4181   while (!Worklist.empty()) {
   4182     Instruction *I = Worklist.pop_back_val();
   4183     if (!Visited.insert(I)) continue;
   4184 
   4185     ValueExprMapType::iterator It =
   4186       ValueExprMap.find_as(static_cast<Value *>(I));
   4187     if (It != ValueExprMap.end()) {
   4188       forgetMemoizedResults(It->second);
   4189       ValueExprMap.erase(It);
   4190       if (PHINode *PN = dyn_cast<PHINode>(I))
   4191         ConstantEvolutionLoopExitValue.erase(PN);
   4192     }
   4193 
   4194     PushDefUseChildren(I, Worklist);
   4195   }
   4196 
   4197   // Forget all contained loops too, to avoid dangling entries in the
   4198   // ValuesAtScopes map.
   4199   for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
   4200     forgetLoop(*I);
   4201 }
   4202 
   4203 /// forgetValue - This method should be called by the client when it has
   4204 /// changed a value in a way that may effect its value, or which may
   4205 /// disconnect it from a def-use chain linking it to a loop.
   4206 void ScalarEvolution::forgetValue(Value *V) {
   4207   Instruction *I = dyn_cast<Instruction>(V);
   4208   if (!I) return;
   4209 
   4210   // Drop information about expressions based on loop-header PHIs.
   4211   SmallVector<Instruction *, 16> Worklist;
   4212   Worklist.push_back(I);
   4213 
   4214   SmallPtrSet<Instruction *, 8> Visited;
   4215   while (!Worklist.empty()) {
   4216     I = Worklist.pop_back_val();
   4217     if (!Visited.insert(I)) continue;
   4218 
   4219     ValueExprMapType::iterator It =
   4220       ValueExprMap.find_as(static_cast<Value *>(I));
   4221     if (It != ValueExprMap.end()) {
   4222       forgetMemoizedResults(It->second);
   4223       ValueExprMap.erase(It);
   4224       if (PHINode *PN = dyn_cast<PHINode>(I))
   4225         ConstantEvolutionLoopExitValue.erase(PN);
   4226     }
   4227 
   4228     PushDefUseChildren(I, Worklist);
   4229   }
   4230 }
   4231 
   4232 /// getExact - Get the exact loop backedge taken count considering all loop
   4233 /// exits. A computable result can only be return for loops with a single exit.
   4234 /// Returning the minimum taken count among all exits is incorrect because one
   4235 /// of the loop's exit limit's may have been skipped. HowFarToZero assumes that
   4236 /// the limit of each loop test is never skipped. This is a valid assumption as
   4237 /// long as the loop exits via that test. For precise results, it is the
   4238 /// caller's responsibility to specify the relevant loop exit using
   4239 /// getExact(ExitingBlock, SE).
   4240 const SCEV *
   4241 ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
   4242   // If any exits were not computable, the loop is not computable.
   4243   if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute();
   4244 
   4245   // We need exactly one computable exit.
   4246   if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute();
   4247   assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info");
   4248 
   4249   const SCEV *BECount = 0;
   4250   for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
   4251        ENT != 0; ENT = ENT->getNextExit()) {
   4252 
   4253     assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV");
   4254 
   4255     if (!BECount)
   4256       BECount = ENT->ExactNotTaken;
   4257     else if (BECount != ENT->ExactNotTaken)
   4258       return SE->getCouldNotCompute();
   4259   }
   4260   assert(BECount && "Invalid not taken count for loop exit");
   4261   return BECount;
   4262 }
   4263 
   4264 /// getExact - Get the exact not taken count for this loop exit.
   4265 const SCEV *
   4266 ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
   4267                                              ScalarEvolution *SE) const {
   4268   for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
   4269        ENT != 0; ENT = ENT->getNextExit()) {
   4270 
   4271     if (ENT->ExitingBlock == ExitingBlock)
   4272       return ENT->ExactNotTaken;
   4273   }
   4274   return SE->getCouldNotCompute();
   4275 }
   4276 
   4277 /// getMax - Get the max backedge taken count for the loop.
   4278 const SCEV *
   4279 ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
   4280   return Max ? Max : SE->getCouldNotCompute();
   4281 }
   4282 
   4283 bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
   4284                                                     ScalarEvolution *SE) const {
   4285   if (Max && Max != SE->getCouldNotCompute() && SE->hasOperand(Max, S))
   4286     return true;
   4287 
   4288   if (!ExitNotTaken.ExitingBlock)
   4289     return false;
   4290 
   4291   for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
   4292        ENT != 0; ENT = ENT->getNextExit()) {
   4293 
   4294     if (ENT->ExactNotTaken != SE->getCouldNotCompute()
   4295         && SE->hasOperand(ENT->ExactNotTaken, S)) {
   4296       return true;
   4297     }
   4298   }
   4299   return false;
   4300 }
   4301 
   4302 /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
   4303 /// computable exit into a persistent ExitNotTakenInfo array.
   4304 ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
   4305   SmallVectorImpl< std::pair<BasicBlock *, const SCEV *> > &ExitCounts,
   4306   bool Complete, const SCEV *MaxCount) : Max(MaxCount) {
   4307 
   4308   if (!Complete)
   4309     ExitNotTaken.setIncomplete();
   4310 
   4311   unsigned NumExits = ExitCounts.size();
   4312   if (NumExits == 0) return;
   4313 
   4314   ExitNotTaken.ExitingBlock = ExitCounts[0].first;
   4315   ExitNotTaken.ExactNotTaken = ExitCounts[0].second;
   4316   if (NumExits == 1) return;
   4317 
   4318   // Handle the rare case of multiple computable exits.
   4319   ExitNotTakenInfo *ENT = new ExitNotTakenInfo[NumExits-1];
   4320 
   4321   ExitNotTakenInfo *PrevENT = &ExitNotTaken;
   4322   for (unsigned i = 1; i < NumExits; ++i, PrevENT = ENT, ++ENT) {
   4323     PrevENT->setNextExit(ENT);
   4324     ENT->ExitingBlock = ExitCounts[i].first;
   4325     ENT->ExactNotTaken = ExitCounts[i].second;
   4326   }
   4327 }
   4328 
   4329 /// clear - Invalidate this result and free the ExitNotTakenInfo array.
   4330 void ScalarEvolution::BackedgeTakenInfo::clear() {
   4331   ExitNotTaken.ExitingBlock = 0;
   4332   ExitNotTaken.ExactNotTaken = 0;
   4333   delete[] ExitNotTaken.getNextExit();
   4334 }
   4335 
   4336 /// ComputeBackedgeTakenCount - Compute the number of times the backedge
   4337 /// of the specified loop will execute.
   4338 ScalarEvolution::BackedgeTakenInfo
   4339 ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
   4340   SmallVector<BasicBlock *, 8> ExitingBlocks;
   4341   L->getExitingBlocks(ExitingBlocks);
   4342 
   4343   // Examine all exits and pick the most conservative values.
   4344   const SCEV *MaxBECount = getCouldNotCompute();
   4345   bool CouldComputeBECount = true;
   4346   SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts;
   4347   for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
   4348     ExitLimit EL = ComputeExitLimit(L, ExitingBlocks[i]);
   4349     if (EL.Exact == getCouldNotCompute())
   4350       // We couldn't compute an exact value for this exit, so
   4351       // we won't be able to compute an exact value for the loop.
   4352       CouldComputeBECount = false;
   4353     else
   4354       ExitCounts.push_back(std::make_pair(ExitingBlocks[i], EL.Exact));
   4355 
   4356     if (MaxBECount == getCouldNotCompute())
   4357       MaxBECount = EL.Max;
   4358     else if (EL.Max != getCouldNotCompute()) {
   4359       // We cannot take the "min" MaxBECount, because non-unit stride loops may
   4360       // skip some loop tests. Taking the max over the exits is sufficiently
   4361       // conservative.  TODO: We could do better taking into consideration
   4362       // that (1) the loop has unit stride (2) the last loop test is
   4363       // less-than/greater-than (3) any loop test is less-than/greater-than AND
   4364       // falls-through some constant times less then the other tests.
   4365       MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max);
   4366     }
   4367   }
   4368 
   4369   return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
   4370 }
   4371 
   4372 /// ComputeExitLimit - Compute the number of times the backedge of the specified
   4373 /// loop will execute if it exits via the specified block.
   4374 ScalarEvolution::ExitLimit
   4375 ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
   4376 
   4377   // Okay, we've chosen an exiting block.  See what condition causes us to
   4378   // exit at this block.
   4379   //
   4380   // FIXME: we should be able to handle switch instructions (with a single exit)
   4381   BranchInst *ExitBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
   4382   if (ExitBr == 0) return getCouldNotCompute();
   4383   assert(ExitBr->isConditional() && "If unconditional, it can't be in loop!");
   4384 
   4385   // At this point, we know we have a conditional branch that determines whether
   4386   // the loop is exited.  However, we don't know if the branch is executed each
   4387   // time through the loop.  If not, then the execution count of the branch will
   4388   // not be equal to the trip count of the loop.
   4389   //
   4390   // Currently we check for this by checking to see if the Exit branch goes to
   4391   // the loop header.  If so, we know it will always execute the same number of
   4392   // times as the loop.  We also handle the case where the exit block *is* the
   4393   // loop header.  This is common for un-rotated loops.
   4394   //
   4395   // If both of those tests fail, walk up the unique predecessor chain to the
   4396   // header, stopping if there is an edge that doesn't exit the loop. If the
   4397   // header is reached, the execution count of the branch will be equal to the
   4398   // trip count of the loop.
   4399   //
   4400   //  More extensive analysis could be done to handle more cases here.
   4401   //
   4402   if (ExitBr->getSuccessor(0) != L->getHeader() &&
   4403       ExitBr->getSuccessor(1) != L->getHeader() &&
   4404       ExitBr->getParent() != L->getHeader()) {
   4405     // The simple checks failed, try climbing the unique predecessor chain
   4406     // up to the header.
   4407     bool Ok = false;
   4408     for (BasicBlock *BB = ExitBr->getParent(); BB; ) {
   4409       BasicBlock *Pred = BB->getUniquePredecessor();
   4410       if (!Pred)
   4411         return getCouldNotCompute();
   4412       TerminatorInst *PredTerm = Pred->getTerminator();
   4413       for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) {
   4414         BasicBlock *PredSucc = PredTerm->getSuccessor(i);
   4415         if (PredSucc == BB)
   4416           continue;
   4417         // If the predecessor has a successor that isn't BB and isn't
   4418         // outside the loop, assume the worst.
   4419         if (L->contains(PredSucc))
   4420           return getCouldNotCompute();
   4421       }
   4422       if (Pred == L->getHeader()) {
   4423         Ok = true;
   4424         break;
   4425       }
   4426       BB = Pred;
   4427     }
   4428     if (!Ok)
   4429       return getCouldNotCompute();
   4430   }
   4431 
   4432   // Proceed to the next level to examine the exit condition expression.
   4433   return ComputeExitLimitFromCond(L, ExitBr->getCondition(),
   4434                                   ExitBr->getSuccessor(0),
   4435                                   ExitBr->getSuccessor(1),
   4436                                   /*IsSubExpr=*/false);
   4437 }
   4438 
   4439 /// ComputeExitLimitFromCond - Compute the number of times the
   4440 /// backedge of the specified loop will execute if its exit condition
   4441 /// were a conditional branch of ExitCond, TBB, and FBB.
   4442 ///
   4443 /// @param IsSubExpr is true if ExitCond does not directly control the exit
   4444 /// branch. In this case, we cannot assume that the loop only exits when the
   4445 /// condition is true and cannot infer that failing to meet the condition prior
   4446 /// to integer wraparound results in undefined behavior.
   4447 ScalarEvolution::ExitLimit
   4448 ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
   4449                                           Value *ExitCond,
   4450                                           BasicBlock *TBB,
   4451                                           BasicBlock *FBB,
   4452                                           bool IsSubExpr) {
   4453   // Check if the controlling expression for this loop is an And or Or.
   4454   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
   4455     if (BO->getOpcode() == Instruction::And) {
   4456       // Recurse on the operands of the and.
   4457       bool EitherMayExit = L->contains(TBB);
   4458       ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
   4459                                                IsSubExpr || EitherMayExit);
   4460       ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
   4461                                                IsSubExpr || EitherMayExit);
   4462       const SCEV *BECount = getCouldNotCompute();
   4463       const SCEV *MaxBECount = getCouldNotCompute();
   4464       if (EitherMayExit) {
   4465         // Both conditions must be true for the loop to continue executing.
   4466         // Choose the less conservative count.
   4467         if (EL0.Exact == getCouldNotCompute() ||
   4468             EL1.Exact == getCouldNotCompute())
   4469           BECount = getCouldNotCompute();
   4470         else
   4471           BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
   4472         if (EL0.Max == getCouldNotCompute())
   4473           MaxBECount = EL1.Max;
   4474         else if (EL1.Max == getCouldNotCompute())
   4475           MaxBECount = EL0.Max;
   4476         else
   4477           MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
   4478       } else {
   4479         // Both conditions must be true at the same time for the loop to exit.
   4480         // For now, be conservative.
   4481         assert(L->contains(FBB) && "Loop block has no successor in loop!");
   4482         if (EL0.Max == EL1.Max)
   4483           MaxBECount = EL0.Max;
   4484         if (EL0.Exact == EL1.Exact)
   4485           BECount = EL0.Exact;
   4486       }
   4487 
   4488       return ExitLimit(BECount, MaxBECount);
   4489     }
   4490     if (BO->getOpcode() == Instruction::Or) {
   4491       // Recurse on the operands of the or.
   4492       bool EitherMayExit = L->contains(FBB);
   4493       ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
   4494                                                IsSubExpr || EitherMayExit);
   4495       ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
   4496                                                IsSubExpr || EitherMayExit);
   4497       const SCEV *BECount = getCouldNotCompute();
   4498       const SCEV *MaxBECount = getCouldNotCompute();
   4499       if (EitherMayExit) {
   4500         // Both conditions must be false for the loop to continue executing.
   4501         // Choose the less conservative count.
   4502         if (EL0.Exact == getCouldNotCompute() ||
   4503             EL1.Exact == getCouldNotCompute())
   4504           BECount = getCouldNotCompute();
   4505         else
   4506           BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
   4507         if (EL0.Max == getCouldNotCompute())
   4508           MaxBECount = EL1.Max;
   4509         else if (EL1.Max == getCouldNotCompute())
   4510           MaxBECount = EL0.Max;
   4511         else
   4512           MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
   4513       } else {
   4514         // Both conditions must be false at the same time for the loop to exit.
   4515         // For now, be conservative.
   4516         assert(L->contains(TBB) && "Loop block has no successor in loop!");
   4517         if (EL0.Max == EL1.Max)
   4518           MaxBECount = EL0.Max;
   4519         if (EL0.Exact == EL1.Exact)
   4520           BECount = EL0.Exact;
   4521       }
   4522 
   4523       return ExitLimit(BECount, MaxBECount);
   4524     }
   4525   }
   4526 
   4527   // With an icmp, it may be feasible to compute an exact backedge-taken count.
   4528   // Proceed to the next level to examine the icmp.
   4529   if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
   4530     return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, IsSubExpr);
   4531 
   4532   // Check for a constant condition. These are normally stripped out by
   4533   // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
   4534   // preserve the CFG and is temporarily leaving constant conditions
   4535   // in place.
   4536   if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) {
   4537     if (L->contains(FBB) == !CI->getZExtValue())
   4538       // The backedge is always taken.
   4539       return getCouldNotCompute();
   4540     else
   4541       // The backedge is never taken.
   4542       return getConstant(CI->getType(), 0);
   4543   }
   4544 
   4545   // If it's not an integer or pointer comparison then compute it the hard way.
   4546   return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
   4547 }
   4548 
   4549 /// ComputeExitLimitFromICmp - Compute the number of times the
   4550 /// backedge of the specified loop will execute if its exit condition
   4551 /// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB.
   4552 ScalarEvolution::ExitLimit
   4553 ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
   4554                                           ICmpInst *ExitCond,
   4555                                           BasicBlock *TBB,
   4556                                           BasicBlock *FBB,
   4557                                           bool IsSubExpr) {
   4558 
   4559   // If the condition was exit on true, convert the condition to exit on false
   4560   ICmpInst::Predicate Cond;
   4561   if (!L->contains(FBB))
   4562     Cond = ExitCond->getPredicate();
   4563   else
   4564     Cond = ExitCond->getInversePredicate();
   4565 
   4566   // Handle common loops like: for (X = "string"; *X; ++X)
   4567   if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
   4568     if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
   4569       ExitLimit ItCnt =
   4570         ComputeLoadConstantCompareExitLimit(LI, RHS, L, Cond);
   4571       if (ItCnt.hasAnyInfo())
   4572         return ItCnt;
   4573     }
   4574 
   4575   const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
   4576   const SCEV *RHS = getSCEV(ExitCond->getOperand(1));
   4577 
   4578   // Try to evaluate any dependencies out of the loop.
   4579   LHS = getSCEVAtScope(LHS, L);
   4580   RHS = getSCEVAtScope(RHS, L);
   4581 
   4582   // At this point, we would like to compute how many iterations of the
   4583   // loop the predicate will return true for these inputs.
   4584   if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) {
   4585     // If there is a loop-invariant, force it into the RHS.
   4586     std::swap(LHS, RHS);
   4587     Cond = ICmpInst::getSwappedPredicate(Cond);
   4588   }
   4589 
   4590   // Simplify the operands before analyzing them.
   4591   (void)SimplifyICmpOperands(Cond, LHS, RHS);
   4592 
   4593   // If we have a comparison of a chrec against a constant, try to use value
   4594   // ranges to answer this query.
   4595   if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS))
   4596     if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS))
   4597       if (AddRec->getLoop() == L) {
   4598         // Form the constant range.
   4599         ConstantRange CompRange(
   4600             ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue()));
   4601 
   4602         const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
   4603         if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
   4604       }
   4605 
   4606   switch (Cond) {
   4607   case ICmpInst::ICMP_NE: {                     // while (X != Y)
   4608     // Convert to: while (X-Y != 0)
   4609     ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, IsSubExpr);
   4610     if (EL.hasAnyInfo()) return EL;
   4611     break;
   4612   }
   4613   case ICmpInst::ICMP_EQ: {                     // while (X == Y)
   4614     // Convert to: while (X-Y == 0)
   4615     ExitLimit EL = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
   4616     if (EL.hasAnyInfo()) return EL;
   4617     break;
   4618   }
   4619   case ICmpInst::ICMP_SLT: {
   4620     ExitLimit EL = HowManyLessThans(LHS, RHS, L, true, IsSubExpr);
   4621     if (EL.hasAnyInfo()) return EL;
   4622     break;
   4623   }
   4624   case ICmpInst::ICMP_SGT: {
   4625     ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
   4626                                     getNotSCEV(RHS), L, true, IsSubExpr);
   4627     if (EL.hasAnyInfo()) return EL;
   4628     break;
   4629   }
   4630   case ICmpInst::ICMP_ULT: {
   4631     ExitLimit EL = HowManyLessThans(LHS, RHS, L, false, IsSubExpr);
   4632     if (EL.hasAnyInfo()) return EL;
   4633     break;
   4634   }
   4635   case ICmpInst::ICMP_UGT: {
   4636     ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
   4637                                     getNotSCEV(RHS), L, false, IsSubExpr);
   4638     if (EL.hasAnyInfo()) return EL;
   4639     break;
   4640   }
   4641   default:
   4642 #if 0
   4643     dbgs() << "ComputeBackedgeTakenCount ";
   4644     if (ExitCond->getOperand(0)->getType()->isUnsigned())
   4645       dbgs() << "[unsigned] ";
   4646     dbgs() << *LHS << "   "
   4647          << Instruction::getOpcodeName(Instruction::ICmp)
   4648          << "   " << *RHS << "\n";
   4649 #endif
   4650     break;
   4651   }
   4652   return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
   4653 }
   4654 
   4655 static ConstantInt *
   4656 EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
   4657                                 ScalarEvolution &SE) {
   4658   const SCEV *InVal = SE.getConstant(C);
   4659   const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE);
   4660   assert(isa<SCEVConstant>(Val) &&
   4661          "Evaluation of SCEV at constant didn't fold correctly?");
   4662   return cast<SCEVConstant>(Val)->getValue();
   4663 }
   4664 
   4665 /// ComputeLoadConstantCompareExitLimit - Given an exit condition of
   4666 /// 'icmp op load X, cst', try to see if we can compute the backedge
   4667 /// execution count.
   4668 ScalarEvolution::ExitLimit
   4669 ScalarEvolution::ComputeLoadConstantCompareExitLimit(
   4670   LoadInst *LI,
   4671   Constant *RHS,
   4672   const Loop *L,
   4673   ICmpInst::Predicate predicate) {
   4674 
   4675   if (LI->isVolatile()) return getCouldNotCompute();
   4676 
   4677   // Check to see if the loaded pointer is a getelementptr of a global.
   4678   // TODO: Use SCEV instead of manually grubbing with GEPs.
   4679   GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
   4680   if (!GEP) return getCouldNotCompute();
   4681 
   4682   // Make sure that it is really a constant global we are gepping, with an
   4683   // initializer, and make sure the first IDX is really 0.
   4684   GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
   4685   if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
   4686       GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) ||
   4687       !cast<Constant>(GEP->getOperand(1))->isNullValue())
   4688     return getCouldNotCompute();
   4689 
   4690   // Okay, we allow one non-constant index into the GEP instruction.
   4691   Value *VarIdx = 0;
   4692   std::vector<Constant*> Indexes;
   4693   unsigned VarIdxNum = 0;
   4694   for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
   4695     if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
   4696       Indexes.push_back(CI);
   4697     } else if (!isa<ConstantInt>(GEP->getOperand(i))) {
   4698       if (VarIdx) return getCouldNotCompute();  // Multiple non-constant idx's.
   4699       VarIdx = GEP->getOperand(i);
   4700       VarIdxNum = i-2;
   4701       Indexes.push_back(0);
   4702     }
   4703 
   4704   // Loop-invariant loads may be a byproduct of loop optimization. Skip them.
   4705   if (!VarIdx)
   4706     return getCouldNotCompute();
   4707 
   4708   // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
   4709   // Check to see if X is a loop variant variable value now.
   4710   const SCEV *Idx = getSCEV(VarIdx);
   4711   Idx = getSCEVAtScope(Idx, L);
   4712 
   4713   // We can only recognize very limited forms of loop index expressions, in
   4714   // particular, only affine AddRec's like {C1,+,C2}.
   4715   const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx);
   4716   if (!IdxExpr || !IdxExpr->isAffine() || isLoopInvariant(IdxExpr, L) ||
   4717       !isa<SCEVConstant>(IdxExpr->getOperand(0)) ||
   4718       !isa<SCEVConstant>(IdxExpr->getOperand(1)))
   4719     return getCouldNotCompute();
   4720 
   4721   unsigned MaxSteps = MaxBruteForceIterations;
   4722   for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) {
   4723     ConstantInt *ItCst = ConstantInt::get(
   4724                            cast<IntegerType>(IdxExpr->getType()), IterationNum);
   4725     ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this);
   4726 
   4727     // Form the GEP offset.
   4728     Indexes[VarIdxNum] = Val;
   4729 
   4730     Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(),
   4731                                                          Indexes);
   4732     if (Result == 0) break;  // Cannot compute!
   4733 
   4734     // Evaluate the condition for this iteration.
   4735     Result = ConstantExpr::getICmp(predicate, Result, RHS);
   4736     if (!isa<ConstantInt>(Result)) break;  // Couldn't decide for sure
   4737     if (cast<ConstantInt>(Result)->getValue().isMinValue()) {
   4738 #if 0
   4739       dbgs() << "\n***\n*** Computed loop count " << *ItCst
   4740              << "\n*** From global " << *GV << "*** BB: " << *L->getHeader()
   4741              << "***\n";
   4742 #endif
   4743       ++NumArrayLenItCounts;
   4744       return getConstant(ItCst);   // Found terminating iteration!
   4745     }
   4746   }
   4747   return getCouldNotCompute();
   4748 }
   4749 
   4750 
   4751 /// CanConstantFold - Return true if we can constant fold an instruction of the
   4752 /// specified type, assuming that all operands were constants.
   4753 static bool CanConstantFold(const Instruction *I) {
   4754   if (isa<BinaryOperator>(I) || isa<CmpInst>(I) ||
   4755       isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) ||
   4756       isa<LoadInst>(I))
   4757     return true;
   4758 
   4759   if (const CallInst *CI = dyn_cast<CallInst>(I))
   4760     if (const Function *F = CI->getCalledFunction())
   4761       return canConstantFoldCallTo(F);
   4762   return false;
   4763 }
   4764 
   4765 /// Determine whether this instruction can constant evolve within this loop
   4766 /// assuming its operands can all constant evolve.
   4767 static bool canConstantEvolve(Instruction *I, const Loop *L) {
   4768   // An instruction outside of the loop can't be derived from a loop PHI.
   4769   if (!L->contains(I)) return false;
   4770 
   4771   if (isa<PHINode>(I)) {
   4772     if (L->getHeader() == I->getParent())
   4773       return true;
   4774     else
   4775       // We don't currently keep track of the control flow needed to evaluate
   4776       // PHIs, so we cannot handle PHIs inside of loops.
   4777       return false;
   4778   }
   4779 
   4780   // If we won't be able to constant fold this expression even if the operands
   4781   // are constants, bail early.
   4782   return CanConstantFold(I);
   4783 }
   4784 
   4785 /// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by
   4786 /// recursing through each instruction operand until reaching a loop header phi.
   4787 static PHINode *
   4788 getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
   4789                                DenseMap<Instruction *, PHINode *> &PHIMap) {
   4790 
   4791   // Otherwise, we can evaluate this instruction if all of its operands are
   4792   // constant or derived from a PHI node themselves.
   4793   PHINode *PHI = 0;
   4794   for (Instruction::op_iterator OpI = UseInst->op_begin(),
   4795          OpE = UseInst->op_end(); OpI != OpE; ++OpI) {
   4796 
   4797     if (isa<Constant>(*OpI)) continue;
   4798 
   4799     Instruction *OpInst = dyn_cast<Instruction>(*OpI);
   4800     if (!OpInst || !canConstantEvolve(OpInst, L)) return 0;
   4801 
   4802     PHINode *P = dyn_cast<PHINode>(OpInst);
   4803     if (!P)
   4804       // If this operand is already visited, reuse the prior result.
   4805       // We may have P != PHI if this is the deepest point at which the
   4806       // inconsistent paths meet.
   4807       P = PHIMap.lookup(OpInst);
   4808     if (!P) {
   4809       // Recurse and memoize the results, whether a phi is found or not.
   4810       // This recursive call invalidates pointers into PHIMap.
   4811       P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap);
   4812       PHIMap[OpInst] = P;
   4813     }
   4814     if (P == 0) return 0;        // Not evolving from PHI
   4815     if (PHI && PHI != P) return 0;  // Evolving from multiple different PHIs.
   4816     PHI = P;
   4817   }
   4818   // This is a expression evolving from a constant PHI!
   4819   return PHI;
   4820 }
   4821 
   4822 /// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
   4823 /// in the loop that V is derived from.  We allow arbitrary operations along the
   4824 /// way, but the operands of an operation must either be constants or a value
   4825 /// derived from a constant PHI.  If this expression does not fit with these
   4826 /// constraints, return null.
   4827 static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
   4828   Instruction *I = dyn_cast<Instruction>(V);
   4829   if (I == 0 || !canConstantEvolve(I, L)) return 0;
   4830 
   4831   if (PHINode *PN = dyn_cast<PHINode>(I)) {
   4832     return PN;
   4833   }
   4834 
   4835   // Record non-constant instructions contained by the loop.
   4836   DenseMap<Instruction *, PHINode *> PHIMap;
   4837   return getConstantEvolvingPHIOperands(I, L, PHIMap);
   4838 }
   4839 
   4840 /// EvaluateExpression - Given an expression that passes the
   4841 /// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node
   4842 /// in the loop has the value PHIVal.  If we can't fold this expression for some
   4843 /// reason, return null.
   4844 static Constant *EvaluateExpression(Value *V, const Loop *L,
   4845                                     DenseMap<Instruction *, Constant *> &Vals,
   4846                                     const DataLayout *TD,
   4847                                     const TargetLibraryInfo *TLI) {
   4848   // Convenient constant check, but redundant for recursive calls.
   4849   if (Constant *C = dyn_cast<Constant>(V)) return C;
   4850   Instruction *I = dyn_cast<Instruction>(V);
   4851   if (!I) return 0;
   4852 
   4853   if (Constant *C = Vals.lookup(I)) return C;
   4854 
   4855   // An instruction inside the loop depends on a value outside the loop that we
   4856   // weren't given a mapping for, or a value such as a call inside the loop.
   4857   if (!canConstantEvolve(I, L)) return 0;
   4858 
   4859   // An unmapped PHI can be due to a branch or another loop inside this loop,
   4860   // or due to this not being the initial iteration through a loop where we
   4861   // couldn't compute the evolution of this particular PHI last time.
   4862   if (isa<PHINode>(I)) return 0;
   4863 
   4864   std::vector<Constant*> Operands(I->getNumOperands());
   4865 
   4866   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
   4867     Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i));
   4868     if (!Operand) {
   4869       Operands[i] = dyn_cast<Constant>(I->getOperand(i));
   4870       if (!Operands[i]) return 0;
   4871       continue;
   4872     }
   4873     Constant *C = EvaluateExpression(Operand, L, Vals, TD, TLI);
   4874     Vals[Operand] = C;
   4875     if (!C) return 0;
   4876     Operands[i] = C;
   4877   }
   4878 
   4879   if (CmpInst *CI = dyn_cast<CmpInst>(I))
   4880     return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
   4881                                            Operands[1], TD, TLI);
   4882   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
   4883     if (!LI->isVolatile())
   4884       return ConstantFoldLoadFromConstPtr(Operands[0], TD);
   4885   }
   4886   return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD,
   4887                                   TLI);
   4888 }
   4889 
   4890 /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
   4891 /// in the header of its containing loop, we know the loop executes a
   4892 /// constant number of times, and the PHI node is just a recurrence
   4893 /// involving constants, fold it.
   4894 Constant *
   4895 ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
   4896                                                    const APInt &BEs,
   4897                                                    const Loop *L) {
   4898   DenseMap<PHINode*, Constant*>::const_iterator I =
   4899     ConstantEvolutionLoopExitValue.find(PN);
   4900   if (I != ConstantEvolutionLoopExitValue.end())
   4901     return I->second;
   4902 
   4903   if (BEs.ugt(MaxBruteForceIterations))
   4904     return ConstantEvolutionLoopExitValue[PN] = 0;  // Not going to evaluate it.
   4905 
   4906   Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
   4907 
   4908   DenseMap<Instruction *, Constant *> CurrentIterVals;
   4909   BasicBlock *Header = L->getHeader();
   4910   assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
   4911 
   4912   // Since the loop is canonicalized, the PHI node must have two entries.  One
   4913   // entry must be a constant (coming in from outside of the loop), and the
   4914   // second must be derived from the same PHI.
   4915   bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
   4916   PHINode *PHI = 0;
   4917   for (BasicBlock::iterator I = Header->begin();
   4918        (PHI = dyn_cast<PHINode>(I)); ++I) {
   4919     Constant *StartCST =
   4920       dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
   4921     if (StartCST == 0) continue;
   4922     CurrentIterVals[PHI] = StartCST;
   4923   }
   4924   if (!CurrentIterVals.count(PN))
   4925     return RetVal = 0;
   4926 
   4927   Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
   4928 
   4929   // Execute the loop symbolically to determine the exit value.
   4930   if (BEs.getActiveBits() >= 32)
   4931     return RetVal = 0; // More than 2^32-1 iterations?? Not doing it!
   4932 
   4933   unsigned NumIterations = BEs.getZExtValue(); // must be in range
   4934   unsigned IterationNum = 0;
   4935   for (; ; ++IterationNum) {
   4936     if (IterationNum == NumIterations)
   4937       return RetVal = CurrentIterVals[PN];  // Got exit value!
   4938 
   4939     // Compute the value of the PHIs for the next iteration.
   4940     // EvaluateExpression adds non-phi values to the CurrentIterVals map.
   4941     DenseMap<Instruction *, Constant *> NextIterVals;
   4942     Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD,
   4943                                            TLI);
   4944     if (NextPHI == 0)
   4945       return 0;        // Couldn't evaluate!
   4946     NextIterVals[PN] = NextPHI;
   4947 
   4948     bool StoppedEvolving = NextPHI == CurrentIterVals[PN];
   4949 
   4950     // Also evaluate the other PHI nodes.  However, we don't get to stop if we
   4951     // cease to be able to evaluate one of them or if they stop evolving,
   4952     // because that doesn't necessarily prevent us from computing PN.
   4953     SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute;
   4954     for (DenseMap<Instruction *, Constant *>::const_iterator
   4955            I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
   4956       PHINode *PHI = dyn_cast<PHINode>(I->first);
   4957       if (!PHI || PHI == PN || PHI->getParent() != Header) continue;
   4958       PHIsToCompute.push_back(std::make_pair(PHI, I->second));
   4959     }
   4960     // We use two distinct loops because EvaluateExpression may invalidate any
   4961     // iterators into CurrentIterVals.
   4962     for (SmallVectorImpl<std::pair<PHINode *, Constant*> >::const_iterator
   4963              I = PHIsToCompute.begin(), E = PHIsToCompute.end(); I != E; ++I) {
   4964       PHINode *PHI = I->first;
   4965       Constant *&NextPHI = NextIterVals[PHI];
   4966       if (!NextPHI) {   // Not already computed.
   4967         Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
   4968         NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI);
   4969       }
   4970       if (NextPHI != I->second)
   4971         StoppedEvolving = false;
   4972     }
   4973 
   4974     // If all entries in CurrentIterVals == NextIterVals then we can stop
   4975     // iterating, the loop can't continue to change.
   4976     if (StoppedEvolving)
   4977       return RetVal = CurrentIterVals[PN];
   4978 
   4979     CurrentIterVals.swap(NextIterVals);
   4980   }
   4981 }
   4982 
   4983 /// ComputeExitCountExhaustively - If the loop is known to execute a
   4984 /// constant number of times (the condition evolves only from constants),
   4985 /// try to evaluate a few iterations of the loop until we get the exit
   4986 /// condition gets a value of ExitWhen (true or false).  If we cannot
   4987 /// evaluate the trip count of the loop, return getCouldNotCompute().
   4988 const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
   4989                                                           Value *Cond,
   4990                                                           bool ExitWhen) {
   4991   PHINode *PN = getConstantEvolvingPHI(Cond, L);
   4992   if (PN == 0) return getCouldNotCompute();
   4993 
   4994   // If the loop is canonicalized, the PHI will have exactly two entries.
   4995   // That's the only form we support here.
   4996   if (PN->getNumIncomingValues() != 2) return getCouldNotCompute();
   4997 
   4998   DenseMap<Instruction *, Constant *> CurrentIterVals;
   4999   BasicBlock *Header = L->getHeader();
   5000   assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
   5001 
   5002   // One entry must be a constant (coming in from outside of the loop), and the
   5003   // second must be derived from the same PHI.
   5004   bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
   5005   PHINode *PHI = 0;
   5006   for (BasicBlock::iterator I = Header->begin();
   5007        (PHI = dyn_cast<PHINode>(I)); ++I) {
   5008     Constant *StartCST =
   5009       dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
   5010     if (StartCST == 0) continue;
   5011     CurrentIterVals[PHI] = StartCST;
   5012   }
   5013   if (!CurrentIterVals.count(PN))
   5014     return getCouldNotCompute();
   5015 
   5016   // Okay, we find a PHI node that defines the trip count of this loop.  Execute
   5017   // the loop symbolically to determine when the condition gets a value of
   5018   // "ExitWhen".
   5019 
   5020   unsigned MaxIterations = MaxBruteForceIterations;   // Limit analysis.
   5021   for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
   5022     ConstantInt *CondVal =
   5023       dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, CurrentIterVals,
   5024                                                        TD, TLI));
   5025 
   5026     // Couldn't symbolically evaluate.
   5027     if (!CondVal) return getCouldNotCompute();
   5028 
   5029     if (CondVal->getValue() == uint64_t(ExitWhen)) {
   5030       ++NumBruteForceTripCountsComputed;
   5031       return getConstant(Type::getInt32Ty(getContext()), IterationNum);
   5032     }
   5033 
   5034     // Update all the PHI nodes for the next iteration.
   5035     DenseMap<Instruction *, Constant *> NextIterVals;
   5036 
   5037     // Create a list of which PHIs we need to compute. We want to do this before
   5038     // calling EvaluateExpression on them because that may invalidate iterators
   5039     // into CurrentIterVals.
   5040     SmallVector<PHINode *, 8> PHIsToCompute;
   5041     for (DenseMap<Instruction *, Constant *>::const_iterator
   5042            I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
   5043       PHINode *PHI = dyn_cast<PHINode>(I->first);
   5044       if (!PHI || PHI->getParent() != Header) continue;
   5045       PHIsToCompute.push_back(PHI);
   5046     }
   5047     for (SmallVectorImpl<PHINode *>::const_iterator I = PHIsToCompute.begin(),
   5048              E = PHIsToCompute.end(); I != E; ++I) {
   5049       PHINode *PHI = *I;
   5050       Constant *&NextPHI = NextIterVals[PHI];
   5051       if (NextPHI) continue;    // Already computed!
   5052 
   5053       Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
   5054       NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI);
   5055     }
   5056     CurrentIterVals.swap(NextIterVals);
   5057   }
   5058 
   5059   // Too many iterations were needed to evaluate.
   5060   return getCouldNotCompute();
   5061 }
   5062 
   5063 /// getSCEVAtScope - Return a SCEV expression for the specified value
   5064 /// at the specified scope in the program.  The L value specifies a loop
   5065 /// nest to evaluate the expression at, where null is the top-level or a
   5066 /// specified loop is immediately inside of the loop.
   5067 ///
   5068 /// This method can be used to compute the exit value for a variable defined
   5069 /// in a loop by querying what the value will hold in the parent loop.
   5070 ///
   5071 /// In the case that a relevant loop exit value cannot be computed, the
   5072 /// original value V is returned.
   5073 const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
   5074   // Check to see if we've folded this expression at this loop before.
   5075   std::map<const Loop *, const SCEV *> &Values = ValuesAtScopes[V];
   5076   std::pair<std::map<const Loop *, const SCEV *>::iterator, bool> Pair =
   5077     Values.insert(std::make_pair(L, static_cast<const SCEV *>(0)));
   5078   if (!Pair.second)
   5079     return Pair.first->second ? Pair.first->second : V;
   5080 
   5081   // Otherwise compute it.
   5082   const SCEV *C = computeSCEVAtScope(V, L);
   5083   ValuesAtScopes[V][L] = C;
   5084   return C;
   5085 }
   5086 
   5087 /// This builds up a Constant using the ConstantExpr interface.  That way, we
   5088 /// will return Constants for objects which aren't represented by a
   5089 /// SCEVConstant, because SCEVConstant is restricted to ConstantInt.
   5090 /// Returns NULL if the SCEV isn't representable as a Constant.
   5091 static Constant *BuildConstantFromSCEV(const SCEV *V) {
   5092   switch (V->getSCEVType()) {
   5093     default:  // TODO: smax, umax.
   5094     case scCouldNotCompute:
   5095     case scAddRecExpr:
   5096       break;
   5097     case scConstant:
   5098       return cast<SCEVConstant>(V)->getValue();
   5099     case scUnknown:
   5100       return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue());
   5101     case scSignExtend: {
   5102       const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V);
   5103       if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand()))
   5104         return ConstantExpr::getSExt(CastOp, SS->getType());
   5105       break;
   5106     }
   5107     case scZeroExtend: {
   5108       const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V);
   5109       if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand()))
   5110         return ConstantExpr::getZExt(CastOp, SZ->getType());
   5111       break;
   5112     }
   5113     case scTruncate: {
   5114       const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V);
   5115       if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand()))
   5116         return ConstantExpr::getTrunc(CastOp, ST->getType());
   5117       break;
   5118     }
   5119     case scAddExpr: {
   5120       const SCEVAddExpr *SA = cast<SCEVAddExpr>(V);
   5121       if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) {
   5122         if (C->getType()->isPointerTy())
   5123           C = ConstantExpr::getBitCast(C, Type::getInt8PtrTy(C->getContext()));
   5124         for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
   5125           Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
   5126           if (!C2) return 0;
   5127 
   5128           // First pointer!
   5129           if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
   5130             std::swap(C, C2);
   5131             // The offsets have been converted to bytes.  We can add bytes to an
   5132             // i8* by GEP with the byte count in the first index.
   5133             C = ConstantExpr::getBitCast(C,Type::getInt8PtrTy(C->getContext()));
   5134           }
   5135 
   5136           // Don't bother trying to sum two pointers. We probably can't
   5137           // statically compute a load that results from it anyway.
   5138           if (C2->getType()->isPointerTy())
   5139             return 0;
   5140 
   5141           if (C->getType()->isPointerTy()) {
   5142             if (cast<PointerType>(C->getType())->getElementType()->isStructTy())
   5143               C2 = ConstantExpr::getIntegerCast(
   5144                   C2, Type::getInt32Ty(C->getContext()), true);
   5145             C = ConstantExpr::getGetElementPtr(C, C2);
   5146           } else
   5147             C = ConstantExpr::getAdd(C, C2);
   5148         }
   5149         return C;
   5150       }
   5151       break;
   5152     }
   5153     case scMulExpr: {
   5154       const SCEVMulExpr *SM = cast<SCEVMulExpr>(V);
   5155       if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) {
   5156         // Don't bother with pointers at all.
   5157         if (C->getType()->isPointerTy()) return 0;
   5158         for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) {
   5159           Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i));
   5160           if (!C2 || C2->getType()->isPointerTy()) return 0;
   5161           C = ConstantExpr::getMul(C, C2);
   5162         }
   5163         return C;
   5164       }
   5165       break;
   5166     }
   5167     case scUDivExpr: {
   5168       const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V);
   5169       if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS()))
   5170         if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS()))
   5171           if (LHS->getType() == RHS->getType())
   5172             return ConstantExpr::getUDiv(LHS, RHS);
   5173       break;
   5174     }
   5175   }
   5176   return 0;
   5177 }
   5178 
   5179 const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
   5180   if (isa<SCEVConstant>(V)) return V;
   5181 
   5182   // If this instruction is evolved from a constant-evolving PHI, compute the
   5183   // exit value from the loop without using SCEVs.
   5184   if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {
   5185     if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) {
   5186       const Loop *LI = (*this->LI)[I->getParent()];
   5187       if (LI && LI->getParentLoop() == L)  // Looking for loop exit value.
   5188         if (PHINode *PN = dyn_cast<PHINode>(I))
   5189           if (PN->getParent() == LI->getHeader()) {
   5190             // Okay, there is no closed form solution for the PHI node.  Check
   5191             // to see if the loop that contains it has a known backedge-taken
   5192             // count.  If so, we may be able to force computation of the exit
   5193             // value.
   5194             const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
   5195             if (const SCEVConstant *BTCC =
   5196                   dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
   5197               // Okay, we know how many times the containing loop executes.  If
   5198               // this is a constant evolving PHI node, get the final value at
   5199               // the specified iteration number.
   5200               Constant *RV = getConstantEvolutionLoopExitValue(PN,
   5201                                                    BTCC->getValue()->getValue(),
   5202                                                                LI);
   5203               if (RV) return getSCEV(RV);
   5204             }
   5205           }
   5206 
   5207       // Okay, this is an expression that we cannot symbolically evaluate
   5208       // into a SCEV.  Check to see if it's possible to symbolically evaluate
   5209       // the arguments into constants, and if so, try to constant propagate the
   5210       // result.  This is particularly useful for computing loop exit values.
   5211       if (CanConstantFold(I)) {
   5212         SmallVector<Constant *, 4> Operands;
   5213         bool MadeImprovement = false;
   5214         for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
   5215           Value *Op = I->getOperand(i);
   5216           if (Constant *C = dyn_cast<Constant>(Op)) {
   5217             Operands.push_back(C);
   5218             continue;
   5219           }
   5220 
   5221           // If any of the operands is non-constant and if they are
   5222           // non-integer and non-pointer, don't even try to analyze them
   5223           // with scev techniques.
   5224           if (!isSCEVable(Op->getType()))
   5225             return V;
   5226 
   5227           const SCEV *OrigV = getSCEV(Op);
   5228           const SCEV *OpV = getSCEVAtScope(OrigV, L);
   5229           MadeImprovement |= OrigV != OpV;
   5230 
   5231           Constant *C = BuildConstantFromSCEV(OpV);
   5232           if (!C) return V;
   5233           if (C->getType() != Op->getType())
   5234             C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
   5235                                                               Op->getType(),
   5236                                                               false),
   5237                                       C, Op->getType());
   5238           Operands.push_back(C);
   5239         }
   5240 
   5241         // Check to see if getSCEVAtScope actually made an improvement.
   5242         if (MadeImprovement) {
   5243           Constant *C = 0;
   5244           if (const CmpInst *CI = dyn_cast<CmpInst>(I))
   5245             C = ConstantFoldCompareInstOperands(CI->getPredicate(),
   5246                                                 Operands[0], Operands[1], TD,
   5247                                                 TLI);
   5248           else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
   5249             if (!LI->isVolatile())
   5250               C = ConstantFoldLoadFromConstPtr(Operands[0], TD);
   5251           } else
   5252             C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
   5253                                          Operands, TD, TLI);
   5254           if (!C) return V;
   5255           return getSCEV(C);
   5256         }
   5257       }
   5258     }
   5259 
   5260     // This is some other type of SCEVUnknown, just return it.
   5261     return V;
   5262   }
   5263 
   5264   if (const SCEVCommutativeExpr *Comm = dyn_cast<SCEVCommutativeExpr>(V)) {
   5265     // Avoid performing the look-up in the common case where the specified
   5266     // expression has no loop-variant portions.
   5267     for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) {
   5268       const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
   5269       if (OpAtScope != Comm->getOperand(i)) {
   5270         // Okay, at least one of these operands is loop variant but might be
   5271         // foldable.  Build a new instance of the folded commutative expression.
   5272         SmallVector<const SCEV *, 8> NewOps(Comm->op_begin(),
   5273                                             Comm->op_begin()+i);
   5274         NewOps.push_back(OpAtScope);
   5275 
   5276         for (++i; i != e; ++i) {
   5277           OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
   5278           NewOps.push_back(OpAtScope);
   5279         }
   5280         if (isa<SCEVAddExpr>(Comm))
   5281           return getAddExpr(NewOps);
   5282         if (isa<SCEVMulExpr>(Comm))
   5283           return getMulExpr(NewOps);
   5284         if (isa<SCEVSMaxExpr>(Comm))
   5285           return getSMaxExpr(NewOps);
   5286         if (isa<SCEVUMaxExpr>(Comm))
   5287           return getUMaxExpr(NewOps);
   5288         llvm_unreachable("Unknown commutative SCEV type!");
   5289       }
   5290     }
   5291     // If we got here, all operands are loop invariant.
   5292     return Comm;
   5293   }
   5294 
   5295   if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) {
   5296     const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L);
   5297     const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L);
   5298     if (LHS == Div->getLHS() && RHS == Div->getRHS())
   5299       return Div;   // must be loop invariant
   5300     return getUDivExpr(LHS, RHS);
   5301   }
   5302 
   5303   // If this is a loop recurrence for a loop that does not contain L, then we
   5304   // are dealing with the final value computed by the loop.
   5305   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) {
   5306     // First, attempt to evaluate each operand.
   5307     // Avoid performing the look-up in the common case where the specified
   5308     // expression has no loop-variant portions.
   5309     for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
   5310       const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L);
   5311       if (OpAtScope == AddRec->getOperand(i))
   5312         continue;
   5313 
   5314       // Okay, at least one of these operands is loop variant but might be
   5315       // foldable.  Build a new instance of the folded commutative expression.
   5316       SmallVector<const SCEV *, 8> NewOps(AddRec->op_begin(),
   5317                                           AddRec->op_begin()+i);
   5318       NewOps.push_back(OpAtScope);
   5319       for (++i; i != e; ++i)
   5320         NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L));
   5321 
   5322       const SCEV *FoldedRec =
   5323         getAddRecExpr(NewOps, AddRec->getLoop(),
   5324                       AddRec->getNoWrapFlags(SCEV::FlagNW));
   5325       AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec);
   5326       // The addrec may be folded to a nonrecurrence, for example, if the
   5327       // induction variable is multiplied by zero after constant folding. Go
   5328       // ahead and return the folded value.
   5329       if (!AddRec)
   5330         return FoldedRec;
   5331       break;
   5332     }
   5333 
   5334     // If the scope is outside the addrec's loop, evaluate it by using the
   5335     // loop exit value of the addrec.
   5336     if (!AddRec->getLoop()->contains(L)) {
   5337       // To evaluate this recurrence, we need to know how many times the AddRec
   5338       // loop iterates.  Compute this now.
   5339       const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop());
   5340       if (BackedgeTakenCount == getCouldNotCompute()) return AddRec;
   5341 
   5342       // Then, evaluate the AddRec.
   5343       return AddRec->evaluateAtIteration(BackedgeTakenCount, *this);
   5344     }
   5345 
   5346     return AddRec;
   5347   }
   5348 
   5349   if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) {
   5350     const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
   5351     if (Op == Cast->getOperand())
   5352       return Cast;  // must be loop invariant
   5353     return getZeroExtendExpr(Op, Cast->getType());
   5354   }
   5355 
   5356   if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) {
   5357     const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
   5358     if (Op == Cast->getOperand())
   5359       return Cast;  // must be loop invariant
   5360     return getSignExtendExpr(Op, Cast->getType());
   5361   }
   5362 
   5363   if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) {
   5364     const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
   5365     if (Op == Cast->getOperand())
   5366       return Cast;  // must be loop invariant
   5367     return getTruncateExpr(Op, Cast->getType());
   5368   }
   5369 
   5370   llvm_unreachable("Unknown SCEV type!");
   5371 }
   5372 
   5373 /// getSCEVAtScope - This is a convenience function which does
   5374 /// getSCEVAtScope(getSCEV(V), L).
   5375 const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) {
   5376   return getSCEVAtScope(getSCEV(V), L);
   5377 }
   5378 
   5379 /// SolveLinEquationWithOverflow - Finds the minimum unsigned root of the
   5380 /// following equation:
   5381 ///
   5382 ///     A * X = B (mod N)
   5383 ///
   5384 /// where N = 2^BW and BW is the common bit width of A and B. The signedness of
   5385 /// A and B isn't important.
   5386 ///
   5387 /// If the equation does not have a solution, SCEVCouldNotCompute is returned.
   5388 static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
   5389                                                ScalarEvolution &SE) {
   5390   uint32_t BW = A.getBitWidth();
   5391   assert(BW == B.getBitWidth() && "Bit widths must be the same.");
   5392   assert(A != 0 && "A must be non-zero.");
   5393 
   5394   // 1. D = gcd(A, N)
   5395   //
   5396   // The gcd of A and N may have only one prime factor: 2. The number of
   5397   // trailing zeros in A is its multiplicity
   5398   uint32_t Mult2 = A.countTrailingZeros();
   5399   // D = 2^Mult2
   5400 
   5401   // 2. Check if B is divisible by D.
   5402   //
   5403   // B is divisible by D if and only if the multiplicity of prime factor 2 for B
   5404   // is not less than multiplicity of this prime factor for D.
   5405   if (B.countTrailingZeros() < Mult2)
   5406     return SE.getCouldNotCompute();
   5407 
   5408   // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic
   5409   // modulo (N / D).
   5410   //
   5411   // (N / D) may need BW+1 bits in its representation.  Hence, we'll use this
   5412   // bit width during computations.
   5413   APInt AD = A.lshr(Mult2).zext(BW + 1);  // AD = A / D
   5414   APInt Mod(BW + 1, 0);
   5415   Mod.setBit(BW - Mult2);  // Mod = N / D
   5416   APInt I = AD.multiplicativeInverse(Mod);
   5417 
   5418   // 4. Compute the minimum unsigned root of the equation:
   5419   // I * (B / D) mod (N / D)
   5420   APInt Result = (I * B.lshr(Mult2).zext(BW + 1)).urem(Mod);
   5421 
   5422   // The result is guaranteed to be less than 2^BW so we may truncate it to BW
   5423   // bits.
   5424   return SE.getConstant(Result.trunc(BW));
   5425 }
   5426 
   5427 /// SolveQuadraticEquation - Find the roots of the quadratic equation for the
   5428 /// given quadratic chrec {L,+,M,+,N}.  This returns either the two roots (which
   5429 /// might be the same) or two SCEVCouldNotCompute objects.
   5430 ///
   5431 static std::pair<const SCEV *,const SCEV *>
   5432 SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
   5433   assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!");
   5434   const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0));
   5435   const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1));
   5436   const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2));
   5437 
   5438   // We currently can only solve this if the coefficients are constants.
   5439   if (!LC || !MC || !NC) {
   5440     const SCEV *CNC = SE.getCouldNotCompute();
   5441     return std::make_pair(CNC, CNC);
   5442   }
   5443 
   5444   uint32_t BitWidth = LC->getValue()->getValue().getBitWidth();
   5445   const APInt &L = LC->getValue()->getValue();
   5446   const APInt &M = MC->getValue()->getValue();
   5447   const APInt &N = NC->getValue()->getValue();
   5448   APInt Two(BitWidth, 2);
   5449   APInt Four(BitWidth, 4);
   5450 
   5451   {
   5452     using namespace APIntOps;
   5453     const APInt& C = L;
   5454     // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
   5455     // The B coefficient is M-N/2
   5456     APInt B(M);
   5457     B -= sdiv(N,Two);
   5458 
   5459     // The A coefficient is N/2
   5460     APInt A(N.sdiv(Two));
   5461 
   5462     // Compute the B^2-4ac term.
   5463     APInt SqrtTerm(B);
   5464     SqrtTerm *= B;
   5465     SqrtTerm -= Four * (A * C);
   5466 
   5467     if (SqrtTerm.isNegative()) {
   5468       // The loop is provably infinite.
   5469       const SCEV *CNC = SE.getCouldNotCompute();
   5470       return std::make_pair(CNC, CNC);
   5471     }
   5472 
   5473     // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
   5474     // integer value or else APInt::sqrt() will assert.
   5475     APInt SqrtVal(SqrtTerm.sqrt());
   5476 
   5477     // Compute the two solutions for the quadratic formula.
   5478     // The divisions must be performed as signed divisions.
   5479     APInt NegB(-B);
   5480     APInt TwoA(A << 1);
   5481     if (TwoA.isMinValue()) {
   5482       const SCEV *CNC = SE.getCouldNotCompute();
   5483       return std::make_pair(CNC, CNC);
   5484     }
   5485 
   5486     LLVMContext &Context = SE.getContext();
   5487 
   5488     ConstantInt *Solution1 =
   5489       ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA));
   5490     ConstantInt *Solution2 =
   5491       ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA));
   5492 
   5493     return std::make_pair(SE.getConstant(Solution1),
   5494                           SE.getConstant(Solution2));
   5495   } // end APIntOps namespace
   5496 }
   5497 
   5498 /// HowFarToZero - Return the number of times a backedge comparing the specified
   5499 /// value to zero will execute.  If not computable, return CouldNotCompute.
   5500 ///
   5501 /// This is only used for loops with a "x != y" exit test. The exit condition is
   5502 /// now expressed as a single expression, V = x-y. So the exit test is
   5503 /// effectively V != 0.  We know and take advantage of the fact that this
   5504 /// expression only being used in a comparison by zero context.
   5505 ScalarEvolution::ExitLimit
   5506 ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) {
   5507   // If the value is a constant
   5508   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
   5509     // If the value is already zero, the branch will execute zero times.
   5510     if (C->getValue()->isZero()) return C;
   5511     return getCouldNotCompute();  // Otherwise it will loop infinitely.
   5512   }
   5513 
   5514   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V);
   5515   if (!AddRec || AddRec->getLoop() != L)
   5516     return getCouldNotCompute();
   5517 
   5518   // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
   5519   // the quadratic equation to solve it.
   5520   if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
   5521     std::pair<const SCEV *,const SCEV *> Roots =
   5522       SolveQuadraticEquation(AddRec, *this);
   5523     const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
   5524     const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
   5525     if (R1 && R2) {
   5526 #if 0
   5527       dbgs() << "HFTZ: " << *V << " - sol#1: " << *R1
   5528              << "  sol#2: " << *R2 << "\n";
   5529 #endif
   5530       // Pick the smallest positive root value.
   5531       if (ConstantInt *CB =
   5532           dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT,
   5533                                                       R1->getValue(),
   5534                                                       R2->getValue()))) {
   5535         if (CB->getZExtValue() == false)
   5536           std::swap(R1, R2);   // R1 is the minimum root now.
   5537 
   5538         // We can only use this value if the chrec ends up with an exact zero
   5539         // value at this index.  When solving for "X*X != 5", for example, we
   5540         // should not accept a root of 2.
   5541         const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
   5542         if (Val->isZero())
   5543           return R1;  // We found a quadratic root!
   5544       }
   5545     }
   5546     return getCouldNotCompute();
   5547   }
   5548 
   5549   // Otherwise we can only handle this if it is affine.
   5550   if (!AddRec->isAffine())
   5551     return getCouldNotCompute();
   5552 
   5553   // If this is an affine expression, the execution count of this branch is
   5554   // the minimum unsigned root of the following equation:
   5555   //
   5556   //     Start + Step*N = 0 (mod 2^BW)
   5557   //
   5558   // equivalent to:
   5559   //
   5560   //             Step*N = -Start (mod 2^BW)
   5561   //
   5562   // where BW is the common bit width of Start and Step.
   5563 
   5564   // Get the initial value for the loop.
   5565   const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop());
   5566   const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop());
   5567 
   5568   // For now we handle only constant steps.
   5569   //
   5570   // TODO: Handle a nonconstant Step given AddRec<NUW>. If the
   5571   // AddRec is NUW, then (in an unsigned sense) it cannot be counting up to wrap
   5572   // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step.
   5573   // We have not yet seen any such cases.
   5574   const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
   5575   if (StepC == 0 || StepC->getValue()->equalsInt(0))
   5576     return getCouldNotCompute();
   5577 
   5578   // For positive steps (counting up until unsigned overflow):
   5579   //   N = -Start/Step (as unsigned)
   5580   // For negative steps (counting down to zero):
   5581   //   N = Start/-Step
   5582   // First compute the unsigned distance from zero in the direction of Step.
   5583   bool CountDown = StepC->getValue()->getValue().isNegative();
   5584   const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start);
   5585 
   5586   // Handle unitary steps, which cannot wraparound.
   5587   // 1*N = -Start; -1*N = Start (mod 2^BW), so:
   5588   //   N = Distance (as unsigned)
   5589   if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) {
   5590     ConstantRange CR = getUnsignedRange(Start);
   5591     const SCEV *MaxBECount;
   5592     if (!CountDown && CR.getUnsignedMin().isMinValue())
   5593       // When counting up, the worst starting value is 1, not 0.
   5594       MaxBECount = CR.getUnsignedMax().isMinValue()
   5595         ? getConstant(APInt::getMinValue(CR.getBitWidth()))
   5596         : getConstant(APInt::getMaxValue(CR.getBitWidth()));
   5597     else
   5598       MaxBECount = getConstant(CountDown ? CR.getUnsignedMax()
   5599                                          : -CR.getUnsignedMin());
   5600     return ExitLimit(Distance, MaxBECount);
   5601   }
   5602 
   5603   // If the recurrence is known not to wraparound, unsigned divide computes the
   5604   // back edge count. (Ideally we would have an "isexact" bit for udiv). We know
   5605   // that the value will either become zero (and thus the loop terminates), that
   5606   // the loop will terminate through some other exit condition first, or that
   5607   // the loop has undefined behavior.  This means we can't "miss" the exit
   5608   // value, even with nonunit stride.
   5609   //
   5610   // This is only valid for expressions that directly compute the loop exit. It
   5611   // is invalid for subexpressions in which the loop may exit through this
   5612   // branch even if this subexpression is false. In that case, the trip count
   5613   // computed by this udiv could be smaller than the number of well-defined
   5614   // iterations.
   5615   if (!IsSubExpr && AddRec->getNoWrapFlags(SCEV::FlagNW))
   5616     return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
   5617 
   5618   // Then, try to solve the above equation provided that Start is constant.
   5619   if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
   5620     return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
   5621                                         -StartC->getValue()->getValue(),
   5622                                         *this);
   5623   return getCouldNotCompute();
   5624 }
   5625 
   5626 /// HowFarToNonZero - Return the number of times a backedge checking the
   5627 /// specified value for nonzero will execute.  If not computable, return
   5628 /// CouldNotCompute
   5629 ScalarEvolution::ExitLimit
   5630 ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
   5631   // Loops that look like: while (X == 0) are very strange indeed.  We don't
   5632   // handle them yet except for the trivial case.  This could be expanded in the
   5633   // future as needed.
   5634 
   5635   // If the value is a constant, check to see if it is known to be non-zero
   5636   // already.  If so, the backedge will execute zero times.
   5637   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
   5638     if (!C->getValue()->isNullValue())
   5639       return getConstant(C->getType(), 0);
   5640     return getCouldNotCompute();  // Otherwise it will loop infinitely.
   5641   }
   5642 
   5643   // We could implement others, but I really doubt anyone writes loops like
   5644   // this, and if they did, they would already be constant folded.
   5645   return getCouldNotCompute();
   5646 }
   5647 
   5648 /// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB
   5649 /// (which may not be an immediate predecessor) which has exactly one
   5650 /// successor from which BB is reachable, or null if no such block is
   5651 /// found.
   5652 ///
   5653 std::pair<BasicBlock *, BasicBlock *>
   5654 ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
   5655   // If the block has a unique predecessor, then there is no path from the
   5656   // predecessor to the block that does not go through the direct edge
   5657   // from the predecessor to the block.
   5658   if (BasicBlock *Pred = BB->getSinglePredecessor())
   5659     return std::make_pair(Pred, BB);
   5660 
   5661   // A loop's header is defined to be a block that dominates the loop.
   5662   // If the header has a unique predecessor outside the loop, it must be
   5663   // a block that has exactly one successor that can reach the loop.
   5664   if (Loop *L = LI->getLoopFor(BB))
   5665     return std::make_pair(L->getLoopPredecessor(), L->getHeader());
   5666 
   5667   return std::pair<BasicBlock *, BasicBlock *>();
   5668 }
   5669 
   5670 /// HasSameValue - SCEV structural equivalence is usually sufficient for
   5671 /// testing whether two expressions are equal, however for the purposes of
   5672 /// looking for a condition guarding a loop, it can be useful to be a little
   5673 /// more general, since a front-end may have replicated the controlling
   5674 /// expression.
   5675 ///
   5676 static bool HasSameValue(const SCEV *A, const SCEV *B) {
   5677   // Quick check to see if they are the same SCEV.
   5678   if (A == B) return true;
   5679 
   5680   // Otherwise, if they're both SCEVUnknown, it's possible that they hold
   5681   // two different instructions with the same value. Check for this case.
   5682   if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A))
   5683     if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B))
   5684       if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue()))
   5685         if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue()))
   5686           if (AI->isIdenticalTo(BI) && !AI->mayReadFromMemory())
   5687             return true;
   5688 
   5689   // Otherwise assume they may have a different value.
   5690   return false;
   5691 }
   5692 
   5693 /// SimplifyICmpOperands - Simplify LHS and RHS in a comparison with
   5694 /// predicate Pred. Return true iff any changes were made.
   5695 ///
   5696 bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
   5697                                            const SCEV *&LHS, const SCEV *&RHS,
   5698                                            unsigned Depth) {
   5699   bool Changed = false;
   5700 
   5701   // If we hit the max recursion limit bail out.
   5702   if (Depth >= 3)
   5703     return false;
   5704 
   5705   // Canonicalize a constant to the right side.
   5706   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
   5707     // Check for both operands constant.
   5708     if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
   5709       if (ConstantExpr::getICmp(Pred,
   5710                                 LHSC->getValue(),
   5711                                 RHSC->getValue())->isNullValue())
   5712         goto trivially_false;
   5713       else
   5714         goto trivially_true;
   5715     }
   5716     // Otherwise swap the operands to put the constant on the right.
   5717     std::swap(LHS, RHS);
   5718     Pred = ICmpInst::getSwappedPredicate(Pred);
   5719     Changed = true;
   5720   }
   5721 
   5722   // If we're comparing an addrec with a value which is loop-invariant in the
   5723   // addrec's loop, put the addrec on the left. Also make a dominance check,
   5724   // as both operands could be addrecs loop-invariant in each other's loop.
   5725   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) {
   5726     const Loop *L = AR->getLoop();
   5727     if (isLoopInvariant(LHS, L) && properlyDominates(LHS, L->getHeader())) {
   5728       std::swap(LHS, RHS);
   5729       Pred = ICmpInst::getSwappedPredicate(Pred);
   5730       Changed = true;
   5731     }
   5732   }
   5733 
   5734   // If there's a constant operand, canonicalize comparisons with boundary
   5735   // cases, and canonicalize *-or-equal comparisons to regular comparisons.
   5736   if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
   5737     const APInt &RA = RC->getValue()->getValue();
   5738     switch (Pred) {
   5739     default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
   5740     case ICmpInst::ICMP_EQ:
   5741     case ICmpInst::ICMP_NE:
   5742       // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b.
   5743       if (!RA)
   5744         if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS))
   5745           if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(AE->getOperand(0)))
   5746             if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 &&
   5747                 ME->getOperand(0)->isAllOnesValue()) {
   5748               RHS = AE->getOperand(1);
   5749               LHS = ME->getOperand(1);
   5750               Changed = true;
   5751             }
   5752       break;
   5753     case ICmpInst::ICMP_UGE:
   5754       if ((RA - 1).isMinValue()) {
   5755         Pred = ICmpInst::ICMP_NE;
   5756         RHS = getConstant(RA - 1);
   5757         Changed = true;
   5758         break;
   5759       }
   5760       if (RA.isMaxValue()) {
   5761         Pred = ICmpInst::ICMP_EQ;
   5762         Changed = true;
   5763         break;
   5764       }
   5765       if (RA.isMinValue()) goto trivially_true;
   5766 
   5767       Pred = ICmpInst::ICMP_UGT;
   5768       RHS = getConstant(RA - 1);
   5769       Changed = true;
   5770       break;
   5771     case ICmpInst::ICMP_ULE:
   5772       if ((RA + 1).isMaxValue()) {
   5773         Pred = ICmpInst::ICMP_NE;
   5774         RHS = getConstant(RA + 1);
   5775         Changed = true;
   5776         break;
   5777       }
   5778       if (RA.isMinValue()) {
   5779         Pred = ICmpInst::ICMP_EQ;
   5780         Changed = true;
   5781         break;
   5782       }
   5783       if (RA.isMaxValue()) goto trivially_true;
   5784 
   5785       Pred = ICmpInst::ICMP_ULT;
   5786       RHS = getConstant(RA + 1);
   5787       Changed = true;
   5788       break;
   5789     case ICmpInst::ICMP_SGE:
   5790       if ((RA - 1).isMinSignedValue()) {
   5791         Pred = ICmpInst::ICMP_NE;
   5792         RHS = getConstant(RA - 1);
   5793         Changed = true;
   5794         break;
   5795       }
   5796       if (RA.isMaxSignedValue()) {
   5797         Pred = ICmpInst::ICMP_EQ;
   5798         Changed = true;
   5799         break;
   5800       }
   5801       if (RA.isMinSignedValue()) goto trivially_true;
   5802 
   5803       Pred = ICmpInst::ICMP_SGT;
   5804       RHS = getConstant(RA - 1);
   5805       Changed = true;
   5806       break;
   5807     case ICmpInst::ICMP_SLE:
   5808       if ((RA + 1).isMaxSignedValue()) {
   5809         Pred = ICmpInst::ICMP_NE;
   5810         RHS = getConstant(RA + 1);
   5811         Changed = true;
   5812         break;
   5813       }
   5814       if (RA.isMinSignedValue()) {
   5815         Pred = ICmpInst::ICMP_EQ;
   5816         Changed = true;
   5817         break;
   5818       }
   5819       if (RA.isMaxSignedValue()) goto trivially_true;
   5820 
   5821       Pred = ICmpInst::ICMP_SLT;
   5822       RHS = getConstant(RA + 1);
   5823       Changed = true;
   5824       break;
   5825     case ICmpInst::ICMP_UGT:
   5826       if (RA.isMinValue()) {
   5827         Pred = ICmpInst::ICMP_NE;
   5828         Changed = true;
   5829         break;
   5830       }
   5831       if ((RA + 1).isMaxValue()) {
   5832         Pred = ICmpInst::ICMP_EQ;
   5833         RHS = getConstant(RA + 1);
   5834         Changed = true;
   5835         break;
   5836       }
   5837       if (RA.isMaxValue()) goto trivially_false;
   5838       break;
   5839     case ICmpInst::ICMP_ULT:
   5840       if (RA.isMaxValue()) {
   5841         Pred = ICmpInst::ICMP_NE;
   5842         Changed = true;
   5843         break;
   5844       }
   5845       if ((RA - 1).isMinValue()) {
   5846         Pred = ICmpInst::ICMP_EQ;
   5847         RHS = getConstant(RA - 1);
   5848         Changed = true;
   5849         break;
   5850       }
   5851       if (RA.isMinValue()) goto trivially_false;
   5852       break;
   5853     case ICmpInst::ICMP_SGT:
   5854       if (RA.isMinSignedValue()) {
   5855         Pred = ICmpInst::ICMP_NE;
   5856         Changed = true;
   5857         break;
   5858       }
   5859       if ((RA + 1).isMaxSignedValue()) {
   5860         Pred = ICmpInst::ICMP_EQ;
   5861         RHS = getConstant(RA + 1);
   5862         Changed = true;
   5863         break;
   5864       }
   5865       if (RA.isMaxSignedValue()) goto trivially_false;
   5866       break;
   5867     case ICmpInst::ICMP_SLT:
   5868       if (RA.isMaxSignedValue()) {
   5869         Pred = ICmpInst::ICMP_NE;
   5870         Changed = true;
   5871         break;
   5872       }
   5873       if ((RA - 1).isMinSignedValue()) {
   5874        Pred = ICmpInst::ICMP_EQ;
   5875        RHS = getConstant(RA - 1);
   5876         Changed = true;
   5877        break;
   5878       }
   5879       if (RA.isMinSignedValue()) goto trivially_false;
   5880       break;
   5881     }
   5882   }
   5883 
   5884   // Check for obvious equality.
   5885   if (HasSameValue(LHS, RHS)) {
   5886     if (ICmpInst::isTrueWhenEqual(Pred))
   5887       goto trivially_true;
   5888     if (ICmpInst::isFalseWhenEqual(Pred))
   5889       goto trivially_false;
   5890   }
   5891 
   5892   // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by
   5893   // adding or subtracting 1 from one of the operands.
   5894   switch (Pred) {
   5895   case ICmpInst::ICMP_SLE:
   5896     if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) {
   5897       RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
   5898                        SCEV::FlagNSW);
   5899       Pred = ICmpInst::ICMP_SLT;
   5900       Changed = true;
   5901     } else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) {
   5902       LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
   5903                        SCEV::FlagNSW);
   5904       Pred = ICmpInst::ICMP_SLT;
   5905       Changed = true;
   5906     }
   5907     break;
   5908   case ICmpInst::ICMP_SGE:
   5909     if (!getSignedRange(RHS).getSignedMin().isMinSignedValue()) {
   5910       RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
   5911                        SCEV::FlagNSW);
   5912       Pred = ICmpInst::ICMP_SGT;
   5913       Changed = true;
   5914     } else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) {
   5915       LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
   5916                        SCEV::FlagNSW);
   5917       Pred = ICmpInst::ICMP_SGT;
   5918       Changed = true;
   5919     }
   5920     break;
   5921   case ICmpInst::ICMP_ULE:
   5922     if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) {
   5923       RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
   5924                        SCEV::FlagNUW);
   5925       Pred = ICmpInst::ICMP_ULT;
   5926       Changed = true;
   5927     } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) {
   5928       LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
   5929                        SCEV::FlagNUW);
   5930       Pred = ICmpInst::ICMP_ULT;
   5931       Changed = true;
   5932     }
   5933     break;
   5934   case ICmpInst::ICMP_UGE:
   5935     if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) {
   5936       RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
   5937                        SCEV::FlagNUW);
   5938       Pred = ICmpInst::ICMP_UGT;
   5939       Changed = true;
   5940     } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) {
   5941       LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
   5942                        SCEV::FlagNUW);
   5943       Pred = ICmpInst::ICMP_UGT;
   5944       Changed = true;
   5945     }
   5946     break;
   5947   default:
   5948     break;
   5949   }
   5950 
   5951   // TODO: More simplifications are possible here.
   5952 
   5953   // Recursively simplify until we either hit a recursion limit or nothing
   5954   // changes.
   5955   if (Changed)
   5956     return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1);
   5957 
   5958   return Changed;
   5959 
   5960 trivially_true:
   5961   // Return 0 == 0.
   5962   LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
   5963   Pred = ICmpInst::ICMP_EQ;
   5964   return true;
   5965 
   5966 trivially_false:
   5967   // Return 0 != 0.
   5968   LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
   5969   Pred = ICmpInst::ICMP_NE;
   5970   return true;
   5971 }
   5972 
   5973 bool ScalarEvolution::isKnownNegative(const SCEV *S) {
   5974   return getSignedRange(S).getSignedMax().isNegative();
   5975 }
   5976 
   5977 bool ScalarEvolution::isKnownPositive(const SCEV *S) {
   5978   return getSignedRange(S).getSignedMin().isStrictlyPositive();
   5979 }
   5980 
   5981 bool ScalarEvolution::isKnownNonNegative(const SCEV *S) {
   5982   return !getSignedRange(S).getSignedMin().isNegative();
   5983 }
   5984 
   5985 bool ScalarEvolution::isKnownNonPositive(const SCEV *S) {
   5986   return !getSignedRange(S).getSignedMax().isStrictlyPositive();
   5987 }
   5988 
   5989 bool ScalarEvolution::isKnownNonZero(const SCEV *S) {
   5990   return isKnownNegative(S) || isKnownPositive(S);
   5991 }
   5992 
   5993 bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
   5994                                        const SCEV *LHS, const SCEV *RHS) {
   5995   // Canonicalize the inputs first.
   5996   (void)SimplifyICmpOperands(Pred, LHS, RHS);
   5997 
   5998   // If LHS or RHS is an addrec, check to see if the condition is true in
   5999   // every iteration of the loop.
   6000   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
   6001     if (isLoopEntryGuardedByCond(
   6002           AR->getLoop(), Pred, AR->getStart(), RHS) &&
   6003         isLoopBackedgeGuardedByCond(
   6004           AR->getLoop(), Pred, AR->getPostIncExpr(*this), RHS))
   6005       return true;
   6006   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS))
   6007     if (isLoopEntryGuardedByCond(
   6008           AR->getLoop(), Pred, LHS, AR->getStart()) &&
   6009         isLoopBackedgeGuardedByCond(
   6010           AR->getLoop(), Pred, LHS, AR->getPostIncExpr(*this)))
   6011       return true;
   6012 
   6013   // Otherwise see what can be done with known constant ranges.
   6014   return isKnownPredicateWithRanges(Pred, LHS, RHS);
   6015 }
   6016 
   6017 bool
   6018 ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
   6019                                             const SCEV *LHS, const SCEV *RHS) {
   6020   if (HasSameValue(LHS, RHS))
   6021     return ICmpInst::isTrueWhenEqual(Pred);
   6022 
   6023   // This code is split out from isKnownPredicate because it is called from
   6024   // within isLoopEntryGuardedByCond.
   6025   switch (Pred) {
   6026   default:
   6027     llvm_unreachable("Unexpected ICmpInst::Predicate value!");
   6028   case ICmpInst::ICMP_SGT:
   6029     Pred = ICmpInst::ICMP_SLT;
   6030     std::swap(LHS, RHS);
   6031   case ICmpInst::ICMP_SLT: {
   6032     ConstantRange LHSRange = getSignedRange(LHS);
   6033     ConstantRange RHSRange = getSignedRange(RHS);
   6034     if (LHSRange.getSignedMax().slt(RHSRange.getSignedMin()))
   6035       return true;
   6036     if (LHSRange.getSignedMin().sge(RHSRange.getSignedMax()))
   6037       return false;
   6038     break;
   6039   }
   6040   case ICmpInst::ICMP_SGE:
   6041     Pred = ICmpInst::ICMP_SLE;
   6042     std::swap(LHS, RHS);
   6043   case ICmpInst::ICMP_SLE: {
   6044     ConstantRange LHSRange = getSignedRange(LHS);
   6045     ConstantRange RHSRange = getSignedRange(RHS);
   6046     if (LHSRange.getSignedMax().sle(RHSRange.getSignedMin()))
   6047       return true;
   6048     if (LHSRange.getSignedMin().sgt(RHSRange.getSignedMax()))
   6049       return false;
   6050     break;
   6051   }
   6052   case ICmpInst::ICMP_UGT:
   6053     Pred = ICmpInst::ICMP_ULT;
   6054     std::swap(LHS, RHS);
   6055   case ICmpInst::ICMP_ULT: {
   6056     ConstantRange LHSRange = getUnsignedRange(LHS);
   6057     ConstantRange RHSRange = getUnsignedRange(RHS);
   6058     if (LHSRange.getUnsignedMax().ult(RHSRange.getUnsignedMin()))
   6059       return true;
   6060     if (LHSRange.getUnsignedMin().uge(RHSRange.getUnsignedMax()))
   6061       return false;
   6062     break;
   6063   }
   6064   case ICmpInst::ICMP_UGE:
   6065     Pred = ICmpInst::ICMP_ULE;
   6066     std::swap(LHS, RHS);
   6067   case ICmpInst::ICMP_ULE: {
   6068     ConstantRange LHSRange = getUnsignedRange(LHS);
   6069     ConstantRange RHSRange = getUnsignedRange(RHS);
   6070     if (LHSRange.getUnsignedMax().ule(RHSRange.getUnsignedMin()))
   6071       return true;
   6072     if (LHSRange.getUnsignedMin().ugt(RHSRange.getUnsignedMax()))
   6073       return false;
   6074     break;
   6075   }
   6076   case ICmpInst::ICMP_NE: {
   6077     if (getUnsignedRange(LHS).intersectWith(getUnsignedRange(RHS)).isEmptySet())
   6078       return true;
   6079     if (getSignedRange(LHS).intersectWith(getSignedRange(RHS)).isEmptySet())
   6080       return true;
   6081 
   6082     const SCEV *Diff = getMinusSCEV(LHS, RHS);
   6083     if (isKnownNonZero(Diff))
   6084       return true;
   6085     break;
   6086   }
   6087   case ICmpInst::ICMP_EQ:
   6088     // The check at the top of the function catches the case where
   6089     // the values are known to be equal.
   6090     break;
   6091   }
   6092   return false;
   6093 }
   6094 
   6095 /// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
   6096 /// protected by a conditional between LHS and RHS.  This is used to
   6097 /// to eliminate casts.
   6098 bool
   6099 ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
   6100                                              ICmpInst::Predicate Pred,
   6101                                              const SCEV *LHS, const SCEV *RHS) {
   6102   // Interpret a null as meaning no loop, where there is obviously no guard
   6103   // (interprocedural conditions notwithstanding).
   6104   if (!L) return true;
   6105 
   6106   BasicBlock *Latch = L->getLoopLatch();
   6107   if (!Latch)
   6108     return false;
   6109 
   6110   BranchInst *LoopContinuePredicate =
   6111     dyn_cast<BranchInst>(Latch->getTerminator());
   6112   if (!LoopContinuePredicate ||
   6113       LoopContinuePredicate->isUnconditional())
   6114     return false;
   6115 
   6116   return isImpliedCond(Pred, LHS, RHS,
   6117                        LoopContinuePredicate->getCondition(),
   6118                        LoopContinuePredicate->getSuccessor(0) != L->getHeader());
   6119 }
   6120 
   6121 /// isLoopEntryGuardedByCond - Test whether entry to the loop is protected
   6122 /// by a conditional between LHS and RHS.  This is used to help avoid max
   6123 /// expressions in loop trip counts, and to eliminate casts.
   6124 bool
   6125 ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
   6126                                           ICmpInst::Predicate Pred,
   6127                                           const SCEV *LHS, const SCEV *RHS) {
   6128   // Interpret a null as meaning no loop, where there is obviously no guard
   6129   // (interprocedural conditions notwithstanding).
   6130   if (!L) return false;
   6131 
   6132   // Starting at the loop predecessor, climb up the predecessor chain, as long
   6133   // as there are predecessors that can be found that have unique successors
   6134   // leading to the original header.
   6135   for (std::pair<BasicBlock *, BasicBlock *>
   6136          Pair(L->getLoopPredecessor(), L->getHeader());
   6137        Pair.first;
   6138        Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
   6139 
   6140     BranchInst *LoopEntryPredicate =
   6141       dyn_cast<BranchInst>(Pair.first->getTerminator());
   6142     if (!LoopEntryPredicate ||
   6143         LoopEntryPredicate->isUnconditional())
   6144       continue;
   6145 
   6146     if (isImpliedCond(Pred, LHS, RHS,
   6147                       LoopEntryPredicate->getCondition(),
   6148                       LoopEntryPredicate->getSuccessor(0) != Pair.second))
   6149       return true;
   6150   }
   6151 
   6152   return false;
   6153 }
   6154 
   6155 /// RAII wrapper to prevent recursive application of isImpliedCond.
   6156 /// ScalarEvolution's PendingLoopPredicates set must be empty unless we are
   6157 /// currently evaluating isImpliedCond.
   6158 struct MarkPendingLoopPredicate {
   6159   Value *Cond;
   6160   DenseSet<Value*> &LoopPreds;
   6161   bool Pending;
   6162 
   6163   MarkPendingLoopPredicate(Value *C, DenseSet<Value*> &LP)
   6164     : Cond(C), LoopPreds(LP) {
   6165     Pending = !LoopPreds.insert(Cond).second;
   6166   }
   6167   ~MarkPendingLoopPredicate() {
   6168     if (!Pending)
   6169       LoopPreds.erase(Cond);
   6170   }
   6171 };
   6172 
   6173 /// isImpliedCond - Test whether the condition described by Pred, LHS,
   6174 /// and RHS is true whenever the given Cond value evaluates to true.
   6175 bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
   6176                                     const SCEV *LHS, const SCEV *RHS,
   6177                                     Value *FoundCondValue,
   6178                                     bool Inverse) {
   6179   MarkPendingLoopPredicate Mark(FoundCondValue, PendingLoopPredicates);
   6180   if (Mark.Pending)
   6181     return false;
   6182 
   6183   // Recursively handle And and Or conditions.
   6184   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) {
   6185     if (BO->getOpcode() == Instruction::And) {
   6186       if (!Inverse)
   6187         return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
   6188                isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
   6189     } else if (BO->getOpcode() == Instruction::Or) {
   6190       if (Inverse)
   6191         return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
   6192                isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
   6193     }
   6194   }
   6195 
   6196   ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue);
   6197   if (!ICI) return false;
   6198 
   6199   // Bail if the ICmp's operands' types are wider than the needed type
   6200   // before attempting to call getSCEV on them. This avoids infinite
   6201   // recursion, since the analysis of widening casts can require loop
   6202   // exit condition information for overflow checking, which would
   6203   // lead back here.
   6204   if (getTypeSizeInBits(LHS->getType()) <
   6205       getTypeSizeInBits(ICI->getOperand(0)->getType()))
   6206     return false;
   6207 
   6208   // Now that we found a conditional branch that dominates the loop or controls
   6209   // the loop latch. Check to see if it is the comparison we are looking for.
   6210   ICmpInst::Predicate FoundPred;
   6211   if (Inverse)
   6212     FoundPred = ICI->getInversePredicate();
   6213   else
   6214     FoundPred = ICI->getPredicate();
   6215 
   6216   const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
   6217   const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));
   6218 
   6219   // Balance the types. The case where FoundLHS' type is wider than
   6220   // LHS' type is checked for above.
   6221   if (getTypeSizeInBits(LHS->getType()) >
   6222       getTypeSizeInBits(FoundLHS->getType())) {
   6223     if (CmpInst::isSigned(Pred)) {
   6224       FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
   6225       FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType());
   6226     } else {
   6227       FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType());
   6228       FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType());
   6229     }
   6230   }
   6231 
   6232   // Canonicalize the query to match the way instcombine will have
   6233   // canonicalized the comparison.
   6234   if (SimplifyICmpOperands(Pred, LHS, RHS))
   6235     if (LHS == RHS)
   6236       return CmpInst::isTrueWhenEqual(Pred);
   6237   if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS))
   6238     if (FoundLHS == FoundRHS)
   6239       return CmpInst::isFalseWhenEqual(FoundPred);
   6240 
   6241   // Check to see if we can make the LHS or RHS match.
   6242   if (LHS == FoundRHS || RHS == FoundLHS) {
   6243     if (isa<SCEVConstant>(RHS)) {
   6244       std::swap(FoundLHS, FoundRHS);
   6245       FoundPred = ICmpInst::getSwappedPredicate(FoundPred);
   6246     } else {
   6247       std::swap(LHS, RHS);
   6248       Pred = ICmpInst::getSwappedPredicate(Pred);
   6249     }
   6250   }
   6251 
   6252   // Check whether the found predicate is the same as the desired predicate.
   6253   if (FoundPred == Pred)
   6254     return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS);
   6255 
   6256   // Check whether swapping the found predicate makes it the same as the
   6257   // desired predicate.
   6258   if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) {
   6259     if (isa<SCEVConstant>(RHS))
   6260       return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS);
   6261     else
   6262       return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred),
   6263                                    RHS, LHS, FoundLHS, FoundRHS);
   6264   }
   6265 
   6266   // Check whether the actual condition is beyond sufficient.
   6267   if (FoundPred == ICmpInst::ICMP_EQ)
   6268     if (ICmpInst::isTrueWhenEqual(Pred))
   6269       if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS))
   6270         return true;
   6271   if (Pred == ICmpInst::ICMP_NE)
   6272     if (!ICmpInst::isTrueWhenEqual(FoundPred))
   6273       if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS))
   6274         return true;
   6275 
   6276   // Otherwise assume the worst.
   6277   return false;
   6278 }
   6279 
   6280 /// isImpliedCondOperands - Test whether the condition described by Pred,
   6281 /// LHS, and RHS is true whenever the condition described by Pred, FoundLHS,
   6282 /// and FoundRHS is true.
   6283 bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
   6284                                             const SCEV *LHS, const SCEV *RHS,
   6285                                             const SCEV *FoundLHS,
   6286                                             const SCEV *FoundRHS) {
   6287   return isImpliedCondOperandsHelper(Pred, LHS, RHS,
   6288                                      FoundLHS, FoundRHS) ||
   6289          // ~x < ~y --> x > y
   6290          isImpliedCondOperandsHelper(Pred, LHS, RHS,
   6291                                      getNotSCEV(FoundRHS),
   6292                                      getNotSCEV(FoundLHS));
   6293 }
   6294 
   6295 /// isImpliedCondOperandsHelper - Test whether the condition described by
   6296 /// Pred, LHS, and RHS is true whenever the condition described by Pred,
   6297 /// FoundLHS, and FoundRHS is true.
   6298 bool
   6299 ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
   6300                                              const SCEV *LHS, const SCEV *RHS,
   6301                                              const SCEV *FoundLHS,
   6302                                              const SCEV *FoundRHS) {
   6303   switch (Pred) {
   6304   default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
   6305   case ICmpInst::ICMP_EQ:
   6306   case ICmpInst::ICMP_NE:
   6307     if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS))
   6308       return true;
   6309     break;
   6310   case ICmpInst::ICMP_SLT:
   6311   case ICmpInst::ICMP_SLE:
   6312     if (isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
   6313         isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, RHS, FoundRHS))
   6314       return true;
   6315     break;
   6316   case ICmpInst::ICMP_SGT:
   6317   case ICmpInst::ICMP_SGE:
   6318     if (isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
   6319         isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, RHS, FoundRHS))
   6320       return true;
   6321     break;
   6322   case ICmpInst::ICMP_ULT:
   6323   case ICmpInst::ICMP_ULE:
   6324     if (isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
   6325         isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, RHS, FoundRHS))
   6326       return true;
   6327     break;
   6328   case ICmpInst::ICMP_UGT:
   6329   case ICmpInst::ICMP_UGE:
   6330     if (isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
   6331         isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, RHS, FoundRHS))
   6332       return true;
   6333     break;
   6334   }
   6335 
   6336   return false;
   6337 }
   6338 
   6339 /// getBECount - Subtract the end and start values and divide by the step,
   6340 /// rounding up, to get the number of times the backedge is executed. Return
   6341 /// CouldNotCompute if an intermediate computation overflows.
   6342 const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
   6343                                         const SCEV *End,
   6344                                         const SCEV *Step,
   6345                                         bool NoWrap) {
   6346   assert(!isKnownNegative(Step) &&
   6347          "This code doesn't handle negative strides yet!");
   6348 
   6349   Type *Ty = Start->getType();
   6350 
   6351   // When Start == End, we have an exact BECount == 0. Short-circuit this case
   6352   // here because SCEV may not be able to determine that the unsigned division
   6353   // after rounding is zero.
   6354   if (Start == End)
   6355     return getConstant(Ty, 0);
   6356 
   6357   const SCEV *NegOne = getConstant(Ty, (uint64_t)-1);
   6358   const SCEV *Diff = getMinusSCEV(End, Start);
   6359   const SCEV *RoundUp = getAddExpr(Step, NegOne);
   6360 
   6361   // Add an adjustment to the difference between End and Start so that
   6362   // the division will effectively round up.
   6363   const SCEV *Add = getAddExpr(Diff, RoundUp);
   6364 
   6365   if (!NoWrap) {
   6366     // Check Add for unsigned overflow.
   6367     // TODO: More sophisticated things could be done here.
   6368     Type *WideTy = IntegerType::get(getContext(),
   6369                                           getTypeSizeInBits(Ty) + 1);
   6370     const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy);
   6371     const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy);
   6372     const SCEV *OperandExtendedAdd = getAddExpr(EDiff, ERoundUp);
   6373     if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd)
   6374       return getCouldNotCompute();
   6375   }
   6376 
   6377   return getUDivExpr(Add, Step);
   6378 }
   6379 
   6380 /// HowManyLessThans - Return the number of times a backedge containing the
   6381 /// specified less-than comparison will execute.  If not computable, return
   6382 /// CouldNotCompute.
   6383 ///
   6384 /// @param IsSubExpr is true when the LHS < RHS condition does not directly
   6385 /// control the branch. In this case, we can only compute an iteration count for
   6386 /// a subexpression that cannot overflow before evaluating true.
   6387 ScalarEvolution::ExitLimit
   6388 ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
   6389                                   const Loop *L, bool isSigned,
   6390                                   bool IsSubExpr) {
   6391   // Only handle:  "ADDREC < LoopInvariant".
   6392   if (!isLoopInvariant(RHS, L)) return getCouldNotCompute();
   6393 
   6394   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS);
   6395   if (!AddRec || AddRec->getLoop() != L)
   6396     return getCouldNotCompute();
   6397 
   6398   // Check to see if we have a flag which makes analysis easy.
   6399   bool NoWrap = false;
   6400   if (!IsSubExpr) {
   6401     NoWrap = AddRec->getNoWrapFlags(
   6402       (SCEV::NoWrapFlags)(((isSigned ? SCEV::FlagNSW : SCEV::FlagNUW))
   6403                           | SCEV::FlagNW));
   6404   }
   6405   if (AddRec->isAffine()) {
   6406     unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
   6407     const SCEV *Step = AddRec->getStepRecurrence(*this);
   6408 
   6409     if (Step->isZero())
   6410       return getCouldNotCompute();
   6411     if (Step->isOne()) {
   6412       // With unit stride, the iteration never steps past the limit value.
   6413     } else if (isKnownPositive(Step)) {
   6414       // Test whether a positive iteration can step past the limit
   6415       // value and past the maximum value for its type in a single step.
   6416       // Note that it's not sufficient to check NoWrap here, because even
   6417       // though the value after a wrap is undefined, it's not undefined
   6418       // behavior, so if wrap does occur, the loop could either terminate or
   6419       // loop infinitely, but in either case, the loop is guaranteed to
   6420       // iterate at least until the iteration where the wrapping occurs.
   6421       const SCEV *One = getConstant(Step->getType(), 1);
   6422       if (isSigned) {
   6423         APInt Max = APInt::getSignedMaxValue(BitWidth);
   6424         if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax())
   6425               .slt(getSignedRange(RHS).getSignedMax()))
   6426           return getCouldNotCompute();
   6427       } else {
   6428         APInt Max = APInt::getMaxValue(BitWidth);
   6429         if ((Max - getUnsignedRange(getMinusSCEV(Step, One)).getUnsignedMax())
   6430               .ult(getUnsignedRange(RHS).getUnsignedMax()))
   6431           return getCouldNotCompute();
   6432       }
   6433     } else
   6434       // TODO: Handle negative strides here and below.
   6435       return getCouldNotCompute();
   6436 
   6437     // We know the LHS is of the form {n,+,s} and the RHS is some loop-invariant
   6438     // m.  So, we count the number of iterations in which {n,+,s} < m is true.
   6439     // Note that we cannot simply return max(m-n,0)/s because it's not safe to
   6440     // treat m-n as signed nor unsigned due to overflow possibility.
   6441 
   6442     // First, we get the value of the LHS in the first iteration: n
   6443     const SCEV *Start = AddRec->getOperand(0);
   6444 
   6445     // Determine the minimum constant start value.
   6446     const SCEV *MinStart = getConstant(isSigned ?
   6447       getSignedRange(Start).getSignedMin() :
   6448       getUnsignedRange(Start).getUnsignedMin());
   6449 
   6450     // If we know that the condition is true in order to enter the loop,
   6451     // then we know that it will run exactly (m-n)/s times. Otherwise, we
   6452     // only know that it will execute (max(m,n)-n)/s times. In both cases,
   6453     // the division must round up.
   6454     const SCEV *End = RHS;
   6455     if (!isLoopEntryGuardedByCond(L,
   6456                                   isSigned ? ICmpInst::ICMP_SLT :
   6457                                              ICmpInst::ICMP_ULT,
   6458                                   getMinusSCEV(Start, Step), RHS))
   6459       End = isSigned ? getSMaxExpr(RHS, Start)
   6460                      : getUMaxExpr(RHS, Start);
   6461 
   6462     // Determine the maximum constant end value.
   6463     const SCEV *MaxEnd = getConstant(isSigned ?
   6464       getSignedRange(End).getSignedMax() :
   6465       getUnsignedRange(End).getUnsignedMax());
   6466 
   6467     // If MaxEnd is within a step of the maximum integer value in its type,
   6468     // adjust it down to the minimum value which would produce the same effect.
   6469     // This allows the subsequent ceiling division of (N+(step-1))/step to
   6470     // compute the correct value.
   6471     const SCEV *StepMinusOne = getMinusSCEV(Step,
   6472                                             getConstant(Step->getType(), 1));
   6473     MaxEnd = isSigned ?
   6474       getSMinExpr(MaxEnd,
   6475                   getMinusSCEV(getConstant(APInt::getSignedMaxValue(BitWidth)),
   6476                                StepMinusOne)) :
   6477       getUMinExpr(MaxEnd,
   6478                   getMinusSCEV(getConstant(APInt::getMaxValue(BitWidth)),
   6479                                StepMinusOne));
   6480 
   6481     // Finally, we subtract these two values and divide, rounding up, to get
   6482     // the number of times the backedge is executed.
   6483     const SCEV *BECount = getBECount(Start, End, Step, NoWrap);
   6484 
   6485     // The maximum backedge count is similar, except using the minimum start
   6486     // value and the maximum end value.
   6487     // If we already have an exact constant BECount, use it instead.
   6488     const SCEV *MaxBECount = isa<SCEVConstant>(BECount) ? BECount
   6489       : getBECount(MinStart, MaxEnd, Step, NoWrap);
   6490 
   6491     // If the stride is nonconstant, and NoWrap == true, then
   6492     // getBECount(MinStart, MaxEnd) may not compute. This would result in an
   6493     // exact BECount and invalid MaxBECount, which should be avoided to catch
   6494     // more optimization opportunities.
   6495     if (isa<SCEVCouldNotCompute>(MaxBECount))
   6496       MaxBECount = BECount;
   6497 
   6498     return ExitLimit(BECount, MaxBECount);
   6499   }
   6500 
   6501   return getCouldNotCompute();
   6502 }
   6503 
   6504 /// getNumIterationsInRange - Return the number of iterations of this loop that
   6505 /// produce values in the specified constant range.  Another way of looking at
   6506 /// this is that it returns the first iteration number where the value is not in
   6507 /// the condition, thus computing the exit count. If the iteration count can't
   6508 /// be computed, an instance of SCEVCouldNotCompute is returned.
   6509 const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
   6510                                                     ScalarEvolution &SE) const {
   6511   if (Range.isFullSet())  // Infinite loop.
   6512     return SE.getCouldNotCompute();
   6513 
   6514   // If the start is a non-zero constant, shift the range to simplify things.
   6515   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
   6516     if (!SC->getValue()->isZero()) {
   6517       SmallVector<const SCEV *, 4> Operands(op_begin(), op_end());
   6518       Operands[0] = SE.getConstant(SC->getType(), 0);
   6519       const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(),
   6520                                              getNoWrapFlags(FlagNW));
   6521       if (const SCEVAddRecExpr *ShiftedAddRec =
   6522             dyn_cast<SCEVAddRecExpr>(Shifted))
   6523         return ShiftedAddRec->getNumIterationsInRange(
   6524                            Range.subtract(SC->getValue()->getValue()), SE);
   6525       // This is strange and shouldn't happen.
   6526       return SE.getCouldNotCompute();
   6527     }
   6528 
   6529   // The only time we can solve this is when we have all constant indices.
   6530   // Otherwise, we cannot determine the overflow conditions.
   6531   for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
   6532     if (!isa<SCEVConstant>(getOperand(i)))
   6533       return SE.getCouldNotCompute();
   6534 
   6535 
   6536   // Okay at this point we know that all elements of the chrec are constants and
   6537   // that the start element is zero.
   6538 
   6539   // First check to see if the range contains zero.  If not, the first
   6540   // iteration exits.
   6541   unsigned BitWidth = SE.getTypeSizeInBits(getType());
   6542   if (!Range.contains(APInt(BitWidth, 0)))
   6543     return SE.getConstant(getType(), 0);
   6544 
   6545   if (isAffine()) {
   6546     // If this is an affine expression then we have this situation:
   6547     //   Solve {0,+,A} in Range  ===  Ax in Range
   6548 
   6549     // We know that zero is in the range.  If A is positive then we know that
   6550     // the upper value of the range must be the first possible exit value.
   6551     // If A is negative then the lower of the range is the last possible loop
   6552     // value.  Also note that we already checked for a full range.
   6553     APInt One(BitWidth,1);
   6554     APInt A     = cast<SCEVConstant>(getOperand(1))->getValue()->getValue();
   6555     APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower();
   6556 
   6557     // The exit value should be (End+A)/A.
   6558     APInt ExitVal = (End + A).udiv(A);
   6559     ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal);
   6560 
   6561     // Evaluate at the exit value.  If we really did fall out of the valid
   6562     // range, then we computed our trip count, otherwise wrap around or other
   6563     // things must have happened.
   6564     ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE);
   6565     if (Range.contains(Val->getValue()))
   6566       return SE.getCouldNotCompute();  // Something strange happened
   6567 
   6568     // Ensure that the previous value is in the range.  This is a sanity check.
   6569     assert(Range.contains(
   6570            EvaluateConstantChrecAtConstant(this,
   6571            ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) &&
   6572            "Linear scev computation is off in a bad way!");
   6573     return SE.getConstant(ExitValue);
   6574   } else if (isQuadratic()) {
   6575     // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the
   6576     // quadratic equation to solve it.  To do this, we must frame our problem in
   6577     // terms of figuring out when zero is crossed, instead of when
   6578     // Range.getUpper() is crossed.
   6579     SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end());
   6580     NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
   6581     const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(),
   6582                                              // getNoWrapFlags(FlagNW)
   6583                                              FlagAnyWrap);
   6584 
   6585     // Next, solve the constructed addrec
   6586     std::pair<const SCEV *,const SCEV *> Roots =
   6587       SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE);
   6588     const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
   6589     const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
   6590     if (R1) {
   6591       // Pick the smallest positive root value.
   6592       if (ConstantInt *CB =
   6593           dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT,
   6594                          R1->getValue(), R2->getValue()))) {
   6595         if (CB->getZExtValue() == false)
   6596           std::swap(R1, R2);   // R1 is the minimum root now.
   6597 
   6598         // Make sure the root is not off by one.  The returned iteration should
   6599         // not be in the range, but the previous one should be.  When solving
   6600         // for "X*X < 5", for example, we should not return a root of 2.
   6601         ConstantInt *R1Val = EvaluateConstantChrecAtConstant(this,
   6602                                                              R1->getValue(),
   6603                                                              SE);
   6604         if (Range.contains(R1Val->getValue())) {
   6605           // The next iteration must be out of the range...
   6606           ConstantInt *NextVal =
   6607                 ConstantInt::get(SE.getContext(), R1->getValue()->getValue()+1);
   6608 
   6609           R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
   6610           if (!Range.contains(R1Val->getValue()))
   6611             return SE.getConstant(NextVal);
   6612           return SE.getCouldNotCompute();  // Something strange happened
   6613         }
   6614 
   6615         // If R1 was not in the range, then it is a good return value.  Make
   6616         // sure that R1-1 WAS in the range though, just in case.
   6617         ConstantInt *NextVal =
   6618                ConstantInt::get(SE.getContext(), R1->getValue()->getValue()-1);
   6619         R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
   6620         if (Range.contains(R1Val->getValue()))
   6621           return R1;
   6622         return SE.getCouldNotCompute();  // Something strange happened
   6623       }
   6624     }
   6625   }
   6626 
   6627   return SE.getCouldNotCompute();
   6628 }
   6629 
   6630 
   6631 
   6632 //===----------------------------------------------------------------------===//
   6633 //                   SCEVCallbackVH Class Implementation
   6634 //===----------------------------------------------------------------------===//
   6635 
   6636 void ScalarEvolution::SCEVCallbackVH::deleted() {
   6637   assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
   6638   if (PHINode *PN = dyn_cast<PHINode>(getValPtr()))
   6639     SE->ConstantEvolutionLoopExitValue.erase(PN);
   6640   SE->ValueExprMap.erase(getValPtr());
   6641   // this now dangles!
   6642 }
   6643 
   6644 void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
   6645   assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
   6646 
   6647   // Forget all the expressions associated with users of the old value,
   6648   // so that future queries will recompute the expressions using the new
   6649   // value.
   6650   Value *Old = getValPtr();
   6651   SmallVector<User *, 16> Worklist;
   6652   SmallPtrSet<User *, 8> Visited;
   6653   for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end();
   6654        UI != UE; ++UI)
   6655     Worklist.push_back(*UI);
   6656   while (!Worklist.empty()) {
   6657     User *U = Worklist.pop_back_val();
   6658     // Deleting the Old value will cause this to dangle. Postpone
   6659     // that until everything else is done.
   6660     if (U == Old)
   6661       continue;
   6662     if (!Visited.insert(U))
   6663       continue;
   6664     if (PHINode *PN = dyn_cast<PHINode>(U))
   6665       SE->ConstantEvolutionLoopExitValue.erase(PN);
   6666     SE->ValueExprMap.erase(U);
   6667     for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
   6668          UI != UE; ++UI)
   6669       Worklist.push_back(*UI);
   6670   }
   6671   // Delete the Old value.
   6672   if (PHINode *PN = dyn_cast<PHINode>(Old))
   6673     SE->ConstantEvolutionLoopExitValue.erase(PN);
   6674   SE->ValueExprMap.erase(Old);
   6675   // this now dangles!
   6676 }
   6677 
   6678 ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
   6679   : CallbackVH(V), SE(se) {}
   6680 
   6681 //===----------------------------------------------------------------------===//
   6682 //                   ScalarEvolution Class Implementation
   6683 //===----------------------------------------------------------------------===//
   6684 
   6685 ScalarEvolution::ScalarEvolution()
   6686   : FunctionPass(ID), FirstUnknown(0) {
   6687   initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
   6688 }
   6689 
   6690 bool ScalarEvolution::runOnFunction(Function &F) {
   6691   this->F = &F;
   6692   LI = &getAnalysis<LoopInfo>();
   6693   TD = getAnalysisIfAvailable<DataLayout>();
   6694   TLI = &getAnalysis<TargetLibraryInfo>();
   6695   DT = &getAnalysis<DominatorTree>();
   6696   return false;
   6697 }
   6698 
   6699 void ScalarEvolution::releaseMemory() {
   6700   // Iterate through all the SCEVUnknown instances and call their
   6701   // destructors, so that they release their references to their values.
   6702   for (SCEVUnknown *U = FirstUnknown; U; U = U->Next)
   6703     U->~SCEVUnknown();
   6704   FirstUnknown = 0;
   6705 
   6706   ValueExprMap.clear();
   6707 
   6708   // Free any extra memory created for ExitNotTakenInfo in the unlikely event
   6709   // that a loop had multiple computable exits.
   6710   for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
   6711          BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end();
   6712        I != E; ++I) {
   6713     I->second.clear();
   6714   }
   6715 
   6716   assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
   6717 
   6718   BackedgeTakenCounts.clear();
   6719   ConstantEvolutionLoopExitValue.clear();
   6720   ValuesAtScopes.clear();
   6721   LoopDispositions.clear();
   6722   BlockDispositions.clear();
   6723   UnsignedRanges.clear();
   6724   SignedRanges.clear();
   6725   UniqueSCEVs.clear();
   6726   SCEVAllocator.Reset();
   6727 }
   6728 
   6729 void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
   6730   AU.setPreservesAll();
   6731   AU.addRequiredTransitive<LoopInfo>();
   6732   AU.addRequiredTransitive<DominatorTree>();
   6733   AU.addRequired<TargetLibraryInfo>();
   6734 }
   6735 
   6736 bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
   6737   return !isa<SCEVCouldNotCompute>(getBackedgeTakenCount(L));
   6738 }
   6739 
   6740 static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
   6741                           const Loop *L) {
   6742   // Print all inner loops first
   6743   for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
   6744     PrintLoopInfo(OS, SE, *I);
   6745 
   6746   OS << "Loop ";
   6747   WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
   6748   OS << ": ";
   6749 
   6750   SmallVector<BasicBlock *, 8> ExitBlocks;
   6751   L->getExitBlocks(ExitBlocks);
   6752   if (ExitBlocks.size() != 1)
   6753     OS << "<multiple exits> ";
   6754 
   6755   if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
   6756     OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L);
   6757   } else {
   6758     OS << "Unpredictable backedge-taken count. ";
   6759   }
   6760 
   6761   OS << "\n"
   6762         "Loop ";
   6763   WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
   6764   OS << ": ";
   6765 
   6766   if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
   6767     OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L);
   6768   } else {
   6769     OS << "Unpredictable max backedge-taken count. ";
   6770   }
   6771 
   6772   OS << "\n";
   6773 }
   6774 
   6775 void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
   6776   // ScalarEvolution's implementation of the print method is to print
   6777   // out SCEV values of all instructions that are interesting. Doing
   6778   // this potentially causes it to create new SCEV objects though,
   6779   // which technically conflicts with the const qualifier. This isn't
   6780   // observable from outside the class though, so casting away the
   6781   // const isn't dangerous.
   6782   ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
   6783 
   6784   OS << "Classifying expressions for: ";
   6785   WriteAsOperand(OS, F, /*PrintType=*/false);
   6786   OS << "\n";
   6787   for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
   6788     if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) {
   6789       OS << *I << '\n';
   6790       OS << "  -->  ";
   6791       const SCEV *SV = SE.getSCEV(&*I);
   6792       SV->print(OS);
   6793 
   6794       const Loop *L = LI->getLoopFor((*I).getParent());
   6795 
   6796       const SCEV *AtUse = SE.getSCEVAtScope(SV, L);
   6797       if (AtUse != SV) {
   6798         OS << "  -->  ";
   6799         AtUse->print(OS);
   6800       }
   6801 
   6802       if (L) {
   6803         OS << "\t\t" "Exits: ";
   6804         const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
   6805         if (!SE.isLoopInvariant(ExitValue, L)) {
   6806           OS << "<<Unknown>>";
   6807         } else {
   6808           OS << *ExitValue;
   6809         }
   6810       }
   6811 
   6812       OS << "\n";
   6813     }
   6814 
   6815   OS << "Determining loop execution counts for: ";
   6816   WriteAsOperand(OS, F, /*PrintType=*/false);
   6817   OS << "\n";
   6818   for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
   6819     PrintLoopInfo(OS, &SE, *I);
   6820 }
   6821 
   6822 ScalarEvolution::LoopDisposition
   6823 ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
   6824   std::map<const Loop *, LoopDisposition> &Values = LoopDispositions[S];
   6825   std::pair<std::map<const Loop *, LoopDisposition>::iterator, bool> Pair =
   6826     Values.insert(std::make_pair(L, LoopVariant));
   6827   if (!Pair.second)
   6828     return Pair.first->second;
   6829 
   6830   LoopDisposition D = computeLoopDisposition(S, L);
   6831   return LoopDispositions[S][L] = D;
   6832 }
   6833 
   6834 ScalarEvolution::LoopDisposition
   6835 ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
   6836   switch (S->getSCEVType()) {
   6837   case scConstant:
   6838     return LoopInvariant;
   6839   case scTruncate:
   6840   case scZeroExtend:
   6841   case scSignExtend:
   6842     return getLoopDisposition(cast<SCEVCastExpr>(S)->getOperand(), L);
   6843   case scAddRecExpr: {
   6844     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
   6845 
   6846     // If L is the addrec's loop, it's computable.
   6847     if (AR->getLoop() == L)
   6848       return LoopComputable;
   6849 
   6850     // Add recurrences are never invariant in the function-body (null loop).
   6851     if (!L)
   6852       return LoopVariant;
   6853 
   6854     // This recurrence is variant w.r.t. L if L contains AR's loop.
   6855     if (L->contains(AR->getLoop()))
   6856       return LoopVariant;
   6857 
   6858     // This recurrence is invariant w.r.t. L if AR's loop contains L.
   6859     if (AR->getLoop()->contains(L))
   6860       return LoopInvariant;
   6861 
   6862     // This recurrence is variant w.r.t. L if any of its operands
   6863     // are variant.
   6864     for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
   6865          I != E; ++I)
   6866       if (!isLoopInvariant(*I, L))
   6867         return LoopVariant;
   6868 
   6869     // Otherwise it's loop-invariant.
   6870     return LoopInvariant;
   6871   }
   6872   case scAddExpr:
   6873   case scMulExpr:
   6874   case scUMaxExpr:
   6875   case scSMaxExpr: {
   6876     const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
   6877     bool HasVarying = false;
   6878     for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
   6879          I != E; ++I) {
   6880       LoopDisposition D = getLoopDisposition(*I, L);
   6881       if (D == LoopVariant)
   6882         return LoopVariant;
   6883       if (D == LoopComputable)
   6884         HasVarying = true;
   6885     }
   6886     return HasVarying ? LoopComputable : LoopInvariant;
   6887   }
   6888   case scUDivExpr: {
   6889     const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
   6890     LoopDisposition LD = getLoopDisposition(UDiv->getLHS(), L);
   6891     if (LD == LoopVariant)
   6892       return LoopVariant;
   6893     LoopDisposition RD = getLoopDisposition(UDiv->getRHS(), L);
   6894     if (RD == LoopVariant)
   6895       return LoopVariant;
   6896     return (LD == LoopInvariant && RD == LoopInvariant) ?
   6897            LoopInvariant : LoopComputable;
   6898   }
   6899   case scUnknown:
   6900     // All non-instruction values are loop invariant.  All instructions are loop
   6901     // invariant if they are not contained in the specified loop.
   6902     // Instructions are never considered invariant in the function body
   6903     // (null loop) because they are defined within the "loop".
   6904     if (Instruction *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue()))
   6905       return (L && !L->contains(I)) ? LoopInvariant : LoopVariant;
   6906     return LoopInvariant;
   6907   case scCouldNotCompute:
   6908     llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
   6909   default: llvm_unreachable("Unknown SCEV kind!");
   6910   }
   6911 }
   6912 
   6913 bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) {
   6914   return getLoopDisposition(S, L) == LoopInvariant;
   6915 }
   6916 
   6917 bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) {
   6918   return getLoopDisposition(S, L) == LoopComputable;
   6919 }
   6920 
   6921 ScalarEvolution::BlockDisposition
   6922 ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
   6923   std::map<const BasicBlock *, BlockDisposition> &Values = BlockDispositions[S];
   6924   std::pair<std::map<const BasicBlock *, BlockDisposition>::iterator, bool>
   6925     Pair = Values.insert(std::make_pair(BB, DoesNotDominateBlock));
   6926   if (!Pair.second)
   6927     return Pair.first->second;
   6928 
   6929   BlockDisposition D = computeBlockDisposition(S, BB);
   6930   return BlockDispositions[S][BB] = D;
   6931 }
   6932 
   6933 ScalarEvolution::BlockDisposition
   6934 ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
   6935   switch (S->getSCEVType()) {
   6936   case scConstant:
   6937     return ProperlyDominatesBlock;
   6938   case scTruncate:
   6939   case scZeroExtend:
   6940   case scSignExtend:
   6941     return getBlockDisposition(cast<SCEVCastExpr>(S)->getOperand(), BB);
   6942   case scAddRecExpr: {
   6943     // This uses a "dominates" query instead of "properly dominates" query
   6944     // to test for proper dominance too, because the instruction which
   6945     // produces the addrec's value is a PHI, and a PHI effectively properly
   6946     // dominates its entire containing block.
   6947     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
   6948     if (!DT->dominates(AR->getLoop()->getHeader(), BB))
   6949       return DoesNotDominateBlock;
   6950   }
   6951   // FALL THROUGH into SCEVNAryExpr handling.
   6952   case scAddExpr:
   6953   case scMulExpr:
   6954   case scUMaxExpr:
   6955   case scSMaxExpr: {
   6956     const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
   6957     bool Proper = true;
   6958     for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
   6959          I != E; ++I) {
   6960       BlockDisposition D = getBlockDisposition(*I, BB);
   6961       if (D == DoesNotDominateBlock)
   6962         return DoesNotDominateBlock;
   6963       if (D == DominatesBlock)
   6964         Proper = false;
   6965     }
   6966     return Proper ? ProperlyDominatesBlock : DominatesBlock;
   6967   }
   6968   case scUDivExpr: {
   6969     const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
   6970     const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS();
   6971     BlockDisposition LD = getBlockDisposition(LHS, BB);
   6972     if (LD == DoesNotDominateBlock)
   6973       return DoesNotDominateBlock;
   6974     BlockDisposition RD = getBlockDisposition(RHS, BB);
   6975     if (RD == DoesNotDominateBlock)
   6976       return DoesNotDominateBlock;
   6977     return (LD == ProperlyDominatesBlock && RD == ProperlyDominatesBlock) ?
   6978       ProperlyDominatesBlock : DominatesBlock;
   6979   }
   6980   case scUnknown:
   6981     if (Instruction *I =
   6982           dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) {
   6983       if (I->getParent() == BB)
   6984         return DominatesBlock;
   6985       if (DT->properlyDominates(I->getParent(), BB))
   6986         return ProperlyDominatesBlock;
   6987       return DoesNotDominateBlock;
   6988     }
   6989     return ProperlyDominatesBlock;
   6990   case scCouldNotCompute:
   6991     llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
   6992   default:
   6993     llvm_unreachable("Unknown SCEV kind!");
   6994   }
   6995 }
   6996 
   6997 bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) {
   6998   return getBlockDisposition(S, BB) >= DominatesBlock;
   6999 }
   7000 
   7001 bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) {
   7002   return getBlockDisposition(S, BB) == ProperlyDominatesBlock;
   7003 }
   7004 
   7005 namespace {
   7006 // Search for a SCEV expression node within an expression tree.
   7007 // Implements SCEVTraversal::Visitor.
   7008 struct SCEVSearch {
   7009   const SCEV *Node;
   7010   bool IsFound;
   7011 
   7012   SCEVSearch(const SCEV *N): Node(N), IsFound(false) {}
   7013 
   7014   bool follow(const SCEV *S) {
   7015     IsFound |= (S == Node);
   7016     return !IsFound;
   7017   }
   7018   bool isDone() const { return IsFound; }
   7019 };
   7020 }
   7021 
   7022 bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
   7023   SCEVSearch Search(Op);
   7024   visitAll(S, Search);
   7025   return Search.IsFound;
   7026 }
   7027 
   7028 void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
   7029   ValuesAtScopes.erase(S);
   7030   LoopDispositions.erase(S);
   7031   BlockDispositions.erase(S);
   7032   UnsignedRanges.erase(S);
   7033   SignedRanges.erase(S);
   7034 
   7035   for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
   7036          BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) {
   7037     BackedgeTakenInfo &BEInfo = I->second;
   7038     if (BEInfo.hasOperand(S, this)) {
   7039       BEInfo.clear();
   7040       BackedgeTakenCounts.erase(I++);
   7041     }
   7042     else
   7043       ++I;
   7044   }
   7045 }
   7046 
   7047 typedef DenseMap<const Loop *, std::string> VerifyMap;
   7048 
   7049 /// replaceSubString - Replaces all occurences of From in Str with To.
   7050 static void replaceSubString(std::string &Str, StringRef From, StringRef To) {
   7051   size_t Pos = 0;
   7052   while ((Pos = Str.find(From, Pos)) != std::string::npos) {
   7053     Str.replace(Pos, From.size(), To.data(), To.size());
   7054     Pos += To.size();
   7055   }
   7056 }
   7057 
   7058 /// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis.
   7059 static void
   7060 getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) {
   7061   for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I) {
   7062     getLoopBackedgeTakenCounts(*I, Map, SE); // recurse.
   7063 
   7064     std::string &S = Map[L];
   7065     if (S.empty()) {
   7066       raw_string_ostream OS(S);
   7067       SE.getBackedgeTakenCount(L)->print(OS);
   7068 
   7069       // false and 0 are semantically equivalent. This can happen in dead loops.
   7070       replaceSubString(OS.str(), "false", "0");
   7071       // Remove wrap flags, their use in SCEV is highly fragile.
   7072       // FIXME: Remove this when SCEV gets smarter about them.
   7073       replaceSubString(OS.str(), "<nw>", "");
   7074       replaceSubString(OS.str(), "<nsw>", "");
   7075       replaceSubString(OS.str(), "<nuw>", "");
   7076     }
   7077   }
   7078 }
   7079 
   7080 void ScalarEvolution::verifyAnalysis() const {
   7081   if (!VerifySCEV)
   7082     return;
   7083 
   7084   ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
   7085 
   7086   // Gather stringified backedge taken counts for all loops using SCEV's caches.
   7087   // FIXME: It would be much better to store actual values instead of strings,
   7088   //        but SCEV pointers will change if we drop the caches.
   7089   VerifyMap BackedgeDumpsOld, BackedgeDumpsNew;
   7090   for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
   7091     getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE);
   7092 
   7093   // Gather stringified backedge taken counts for all loops without using
   7094   // SCEV's caches.
   7095   SE.releaseMemory();
   7096   for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
   7097     getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE);
   7098 
   7099   // Now compare whether they're the same with and without caches. This allows
   7100   // verifying that no pass changed the cache.
   7101   assert(BackedgeDumpsOld.size() == BackedgeDumpsNew.size() &&
   7102          "New loops suddenly appeared!");
   7103 
   7104   for (VerifyMap::iterator OldI = BackedgeDumpsOld.begin(),
   7105                            OldE = BackedgeDumpsOld.end(),
   7106                            NewI = BackedgeDumpsNew.begin();
   7107        OldI != OldE; ++OldI, ++NewI) {
   7108     assert(OldI->first == NewI->first && "Loop order changed!");
   7109 
   7110     // Compare the stringified SCEVs. We don't care if undef backedgetaken count
   7111     // changes.
   7112     // FIXME: We currently ignore SCEV changes from/to CouldNotCompute. This
   7113     // means that a pass is buggy or SCEV has to learn a new pattern but is
   7114     // usually not harmful.
   7115     if (OldI->second != NewI->second &&
   7116         OldI->second.find("undef") == std::string::npos &&
   7117         NewI->second.find("undef") == std::string::npos &&
   7118         OldI->second != "***COULDNOTCOMPUTE***" &&
   7119         NewI->second != "***COULDNOTCOMPUTE***") {
   7120       dbgs() << "SCEVValidator: SCEV for loop '"
   7121              << OldI->first->getHeader()->getName()
   7122              << "' changed from '" << OldI->second
   7123              << "' to '" << NewI->second << "'!\n";
   7124       std::abort();
   7125     }
   7126   }
   7127 
   7128   // TODO: Verify more things.
   7129 }
   7130