Home | History | Annotate | Download | only in Analysis
      1 //===- ScalarEvolution.cpp - Scalar Evolution Analysis ----------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains the implementation of the scalar evolution analysis
     11 // engine, which is used primarily to analyze expressions involving induction
     12 // variables in loops.
     13 //
     14 // There are several aspects to this library.  First is the representation of
     15 // scalar expressions, which are represented as subclasses of the SCEV class.
     16 // These classes are used to represent certain types of subexpressions that we
     17 // can handle. We only create one SCEV of a particular shape, so
     18 // pointer-comparisons for equality are legal.
     19 //
     20 // One important aspect of the SCEV objects is that they are never cyclic, even
     21 // if there is a cycle in the dataflow for an expression (ie, a PHI node).  If
     22 // the PHI node is one of the idioms that we can represent (e.g., a polynomial
     23 // recurrence) then we represent it directly as a recurrence node, otherwise we
     24 // represent it as a SCEVUnknown node.
     25 //
     26 // In addition to being able to represent expressions of various types, we also
     27 // have folders that are used to build the *canonical* representation for a
     28 // particular expression.  These folders are capable of using a variety of
     29 // rewrite rules to simplify the expressions.
     30 //
     31 // Once the folders are defined, we can implement the more interesting
     32 // higher-level code, such as the code that recognizes PHI nodes of various
     33 // types, computes the execution count of a loop, etc.
     34 //
     35 // TODO: We should use these routines and value representations to implement
     36 // dependence analysis!
     37 //
     38 //===----------------------------------------------------------------------===//
     39 //
     40 // There are several good references for the techniques used in this analysis.
     41 //
     42 //  Chains of recurrences -- a method to expedite the evaluation
     43 //  of closed-form functions
     44 //  Olaf Bachmann, Paul S. Wang, Eugene V. Zima
     45 //
     46 //  On computational properties of chains of recurrences
     47 //  Eugene V. Zima
     48 //
     49 //  Symbolic Evaluation of Chains of Recurrences for Loop Optimization
     50 //  Robert A. van Engelen
     51 //
     52 //  Efficient Symbolic Analysis for Optimizing Compilers
     53 //  Robert A. van Engelen
     54 //
     55 //  Using the chains of recurrences algebra for data dependence testing and
     56 //  induction variable substitution
     57 //  MS Thesis, Johnie Birch
     58 //
     59 //===----------------------------------------------------------------------===//
     60 
     61 #define DEBUG_TYPE "scalar-evolution"
     62 #include "llvm/Analysis/ScalarEvolution.h"
     63 #include "llvm/ADT/STLExtras.h"
     64 #include "llvm/ADT/SmallPtrSet.h"
     65 #include "llvm/ADT/Statistic.h"
     66 #include "llvm/Analysis/ConstantFolding.h"
     67 #include "llvm/Analysis/Dominators.h"
     68 #include "llvm/Analysis/InstructionSimplify.h"
     69 #include "llvm/Analysis/LoopInfo.h"
     70 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
     71 #include "llvm/Analysis/ValueTracking.h"
     72 #include "llvm/Assembly/Writer.h"
     73 #include "llvm/IR/Constants.h"
     74 #include "llvm/IR/DataLayout.h"
     75 #include "llvm/IR/DerivedTypes.h"
     76 #include "llvm/IR/GlobalAlias.h"
     77 #include "llvm/IR/GlobalVariable.h"
     78 #include "llvm/IR/Instructions.h"
     79 #include "llvm/IR/LLVMContext.h"
     80 #include "llvm/IR/Operator.h"
     81 #include "llvm/Support/CommandLine.h"
     82 #include "llvm/Support/ConstantRange.h"
     83 #include "llvm/Support/Debug.h"
     84 #include "llvm/Support/ErrorHandling.h"
     85 #include "llvm/Support/GetElementPtrTypeIterator.h"
     86 #include "llvm/Support/InstIterator.h"
     87 #include "llvm/Support/MathExtras.h"
     88 #include "llvm/Support/raw_ostream.h"
     89 #include "llvm/Target/TargetLibraryInfo.h"
     90 #include <algorithm>
     91 using namespace llvm;
     92 
     93 STATISTIC(NumArrayLenItCounts,
     94           "Number of trip counts computed with array length");
     95 STATISTIC(NumTripCountsComputed,
     96           "Number of loops with predictable loop counts");
     97 STATISTIC(NumTripCountsNotComputed,
     98           "Number of loops without predictable loop counts");
     99 STATISTIC(NumBruteForceTripCountsComputed,
    100           "Number of loops with trip counts computed by force");
    101 
    102 static cl::opt<unsigned>
    103 MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
    104                         cl::desc("Maximum number of iterations SCEV will "
    105                                  "symbolically execute a constant "
    106                                  "derived loop"),
    107                         cl::init(100));
    108 
    109 // FIXME: Enable this with XDEBUG when the test suite is clean.
    110 static cl::opt<bool>
    111 VerifySCEV("verify-scev",
    112            cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
    113 
    114 INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
    115                 "Scalar Evolution Analysis", false, true)
    116 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
    117 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
    118 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
    119 INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
    120                 "Scalar Evolution Analysis", false, true)
    121 char ScalarEvolution::ID = 0;
    122 
    123 //===----------------------------------------------------------------------===//
    124 //                           SCEV class definitions
    125 //===----------------------------------------------------------------------===//
    126 
    127 //===----------------------------------------------------------------------===//
    128 // Implementation of the SCEV class.
    129 //
    130 
    131 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    132 void SCEV::dump() const {
    133   print(dbgs());
    134   dbgs() << '\n';
    135 }
    136 #endif
    137 
    138 void SCEV::print(raw_ostream &OS) const {
    139   switch (getSCEVType()) {
    140   case scConstant:
    141     WriteAsOperand(OS, cast<SCEVConstant>(this)->getValue(), false);
    142     return;
    143   case scTruncate: {
    144     const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this);
    145     const SCEV *Op = Trunc->getOperand();
    146     OS << "(trunc " << *Op->getType() << " " << *Op << " to "
    147        << *Trunc->getType() << ")";
    148     return;
    149   }
    150   case scZeroExtend: {
    151     const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this);
    152     const SCEV *Op = ZExt->getOperand();
    153     OS << "(zext " << *Op->getType() << " " << *Op << " to "
    154        << *ZExt->getType() << ")";
    155     return;
    156   }
    157   case scSignExtend: {
    158     const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this);
    159     const SCEV *Op = SExt->getOperand();
    160     OS << "(sext " << *Op->getType() << " " << *Op << " to "
    161        << *SExt->getType() << ")";
    162     return;
    163   }
    164   case scAddRecExpr: {
    165     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this);
    166     OS << "{" << *AR->getOperand(0);
    167     for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i)
    168       OS << ",+," << *AR->getOperand(i);
    169     OS << "}<";
    170     if (AR->getNoWrapFlags(FlagNUW))
    171       OS << "nuw><";
    172     if (AR->getNoWrapFlags(FlagNSW))
    173       OS << "nsw><";
    174     if (AR->getNoWrapFlags(FlagNW) &&
    175         !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)))
    176       OS << "nw><";
    177     WriteAsOperand(OS, AR->getLoop()->getHeader(), /*PrintType=*/false);
    178     OS << ">";
    179     return;
    180   }
    181   case scAddExpr:
    182   case scMulExpr:
    183   case scUMaxExpr:
    184   case scSMaxExpr: {
    185     const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
    186     const char *OpStr = 0;
    187     switch (NAry->getSCEVType()) {
    188     case scAddExpr: OpStr = " + "; break;
    189     case scMulExpr: OpStr = " * "; break;
    190     case scUMaxExpr: OpStr = " umax "; break;
    191     case scSMaxExpr: OpStr = " smax "; break;
    192     }
    193     OS << "(";
    194     for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
    195          I != E; ++I) {
    196       OS << **I;
    197       if (llvm::next(I) != E)
    198         OS << OpStr;
    199     }
    200     OS << ")";
    201     switch (NAry->getSCEVType()) {
    202     case scAddExpr:
    203     case scMulExpr:
    204       if (NAry->getNoWrapFlags(FlagNUW))
    205         OS << "<nuw>";
    206       if (NAry->getNoWrapFlags(FlagNSW))
    207         OS << "<nsw>";
    208     }
    209     return;
    210   }
    211   case scUDivExpr: {
    212     const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this);
    213     OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")";
    214     return;
    215   }
    216   case scUnknown: {
    217     const SCEVUnknown *U = cast<SCEVUnknown>(this);
    218     Type *AllocTy;
    219     if (U->isSizeOf(AllocTy)) {
    220       OS << "sizeof(" << *AllocTy << ")";
    221       return;
    222     }
    223     if (U->isAlignOf(AllocTy)) {
    224       OS << "alignof(" << *AllocTy << ")";
    225       return;
    226     }
    227 
    228     Type *CTy;
    229     Constant *FieldNo;
    230     if (U->isOffsetOf(CTy, FieldNo)) {
    231       OS << "offsetof(" << *CTy << ", ";
    232       WriteAsOperand(OS, FieldNo, false);
    233       OS << ")";
    234       return;
    235     }
    236 
    237     // Otherwise just print it normally.
    238     WriteAsOperand(OS, U->getValue(), false);
    239     return;
    240   }
    241   case scCouldNotCompute:
    242     OS << "***COULDNOTCOMPUTE***";
    243     return;
    244   default: break;
    245   }
    246   llvm_unreachable("Unknown SCEV kind!");
    247 }
    248 
    249 Type *SCEV::getType() const {
    250   switch (getSCEVType()) {
    251   case scConstant:
    252     return cast<SCEVConstant>(this)->getType();
    253   case scTruncate:
    254   case scZeroExtend:
    255   case scSignExtend:
    256     return cast<SCEVCastExpr>(this)->getType();
    257   case scAddRecExpr:
    258   case scMulExpr:
    259   case scUMaxExpr:
    260   case scSMaxExpr:
    261     return cast<SCEVNAryExpr>(this)->getType();
    262   case scAddExpr:
    263     return cast<SCEVAddExpr>(this)->getType();
    264   case scUDivExpr:
    265     return cast<SCEVUDivExpr>(this)->getType();
    266   case scUnknown:
    267     return cast<SCEVUnknown>(this)->getType();
    268   case scCouldNotCompute:
    269     llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
    270   default:
    271     llvm_unreachable("Unknown SCEV kind!");
    272   }
    273 }
    274 
    275 bool SCEV::isZero() const {
    276   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
    277     return SC->getValue()->isZero();
    278   return false;
    279 }
    280 
    281 bool SCEV::isOne() const {
    282   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
    283     return SC->getValue()->isOne();
    284   return false;
    285 }
    286 
    287 bool SCEV::isAllOnesValue() const {
    288   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
    289     return SC->getValue()->isAllOnesValue();
    290   return false;
    291 }
    292 
    293 /// isNonConstantNegative - Return true if the specified scev is negated, but
    294 /// not a constant.
    295 bool SCEV::isNonConstantNegative() const {
    296   const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(this);
    297   if (!Mul) return false;
    298 
    299   // If there is a constant factor, it will be first.
    300   const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
    301   if (!SC) return false;
    302 
    303   // Return true if the value is negative, this matches things like (-42 * V).
    304   return SC->getValue()->getValue().isNegative();
    305 }
    306 
    307 SCEVCouldNotCompute::SCEVCouldNotCompute() :
    308   SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
    309 
    310 bool SCEVCouldNotCompute::classof(const SCEV *S) {
    311   return S->getSCEVType() == scCouldNotCompute;
    312 }
    313 
    314 const SCEV *ScalarEvolution::getConstant(ConstantInt *V) {
    315   FoldingSetNodeID ID;
    316   ID.AddInteger(scConstant);
    317   ID.AddPointer(V);
    318   void *IP = 0;
    319   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
    320   SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
    321   UniqueSCEVs.InsertNode(S, IP);
    322   return S;
    323 }
    324 
    325 const SCEV *ScalarEvolution::getConstant(const APInt& Val) {
    326   return getConstant(ConstantInt::get(getContext(), Val));
    327 }
    328 
    329 const SCEV *
    330 ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) {
    331   IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
    332   return getConstant(ConstantInt::get(ITy, V, isSigned));
    333 }
    334 
    335 SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
    336                            unsigned SCEVTy, const SCEV *op, Type *ty)
    337   : SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
    338 
    339 SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
    340                                    const SCEV *op, Type *ty)
    341   : SCEVCastExpr(ID, scTruncate, op, ty) {
    342   assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
    343          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
    344          "Cannot truncate non-integer value!");
    345 }
    346 
    347 SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
    348                                        const SCEV *op, Type *ty)
    349   : SCEVCastExpr(ID, scZeroExtend, op, ty) {
    350   assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
    351          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
    352          "Cannot zero extend non-integer value!");
    353 }
    354 
    355 SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
    356                                        const SCEV *op, Type *ty)
    357   : SCEVCastExpr(ID, scSignExtend, op, ty) {
    358   assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
    359          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
    360          "Cannot sign extend non-integer value!");
    361 }
    362 
    363 void SCEVUnknown::deleted() {
    364   // Clear this SCEVUnknown from various maps.
    365   SE->forgetMemoizedResults(this);
    366 
    367   // Remove this SCEVUnknown from the uniquing map.
    368   SE->UniqueSCEVs.RemoveNode(this);
    369 
    370   // Release the value.
    371   setValPtr(0);
    372 }
    373 
    374 void SCEVUnknown::allUsesReplacedWith(Value *New) {
    375   // Clear this SCEVUnknown from various maps.
    376   SE->forgetMemoizedResults(this);
    377 
    378   // Remove this SCEVUnknown from the uniquing map.
    379   SE->UniqueSCEVs.RemoveNode(this);
    380 
    381   // Update this SCEVUnknown to point to the new value. This is needed
    382   // because there may still be outstanding SCEVs which still point to
    383   // this SCEVUnknown.
    384   setValPtr(New);
    385 }
    386 
    387 bool SCEVUnknown::isSizeOf(Type *&AllocTy) const {
    388   if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
    389     if (VCE->getOpcode() == Instruction::PtrToInt)
    390       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
    391         if (CE->getOpcode() == Instruction::GetElementPtr &&
    392             CE->getOperand(0)->isNullValue() &&
    393             CE->getNumOperands() == 2)
    394           if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1)))
    395             if (CI->isOne()) {
    396               AllocTy = cast<PointerType>(CE->getOperand(0)->getType())
    397                                  ->getElementType();
    398               return true;
    399             }
    400 
    401   return false;
    402 }
    403 
    404 bool SCEVUnknown::isAlignOf(Type *&AllocTy) const {
    405   if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
    406     if (VCE->getOpcode() == Instruction::PtrToInt)
    407       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
    408         if (CE->getOpcode() == Instruction::GetElementPtr &&
    409             CE->getOperand(0)->isNullValue()) {
    410           Type *Ty =
    411             cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
    412           if (StructType *STy = dyn_cast<StructType>(Ty))
    413             if (!STy->isPacked() &&
    414                 CE->getNumOperands() == 3 &&
    415                 CE->getOperand(1)->isNullValue()) {
    416               if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2)))
    417                 if (CI->isOne() &&
    418                     STy->getNumElements() == 2 &&
    419                     STy->getElementType(0)->isIntegerTy(1)) {
    420                   AllocTy = STy->getElementType(1);
    421                   return true;
    422                 }
    423             }
    424         }
    425 
    426   return false;
    427 }
    428 
    429 bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {
    430   if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
    431     if (VCE->getOpcode() == Instruction::PtrToInt)
    432       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
    433         if (CE->getOpcode() == Instruction::GetElementPtr &&
    434             CE->getNumOperands() == 3 &&
    435             CE->getOperand(0)->isNullValue() &&
    436             CE->getOperand(1)->isNullValue()) {
    437           Type *Ty =
    438             cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
    439           // Ignore vector types here so that ScalarEvolutionExpander doesn't
    440           // emit getelementptrs that index into vectors.
    441           if (Ty->isStructTy() || Ty->isArrayTy()) {
    442             CTy = Ty;
    443             FieldNo = CE->getOperand(2);
    444             return true;
    445           }
    446         }
    447 
    448   return false;
    449 }
    450 
    451 //===----------------------------------------------------------------------===//
    452 //                               SCEV Utilities
    453 //===----------------------------------------------------------------------===//
    454 
    455 namespace {
    456   /// SCEVComplexityCompare - Return true if the complexity of the LHS is less
    457   /// than the complexity of the RHS.  This comparator is used to canonicalize
    458   /// expressions.
    459   class SCEVComplexityCompare {
    460     const LoopInfo *const LI;
    461   public:
    462     explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {}
    463 
    464     // Return true or false if LHS is less than, or at least RHS, respectively.
    465     bool operator()(const SCEV *LHS, const SCEV *RHS) const {
    466       return compare(LHS, RHS) < 0;
    467     }
    468 
    469     // Return negative, zero, or positive, if LHS is less than, equal to, or
    470     // greater than RHS, respectively. A three-way result allows recursive
    471     // comparisons to be more efficient.
    472     int compare(const SCEV *LHS, const SCEV *RHS) const {
    473       // Fast-path: SCEVs are uniqued so we can do a quick equality check.
    474       if (LHS == RHS)
    475         return 0;
    476 
    477       // Primarily, sort the SCEVs by their getSCEVType().
    478       unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
    479       if (LType != RType)
    480         return (int)LType - (int)RType;
    481 
    482       // Aside from the getSCEVType() ordering, the particular ordering
    483       // isn't very important except that it's beneficial to be consistent,
    484       // so that (a + b) and (b + a) don't end up as different expressions.
    485       switch (LType) {
    486       case scUnknown: {
    487         const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
    488         const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
    489 
    490         // Sort SCEVUnknown values with some loose heuristics. TODO: This is
    491         // not as complete as it could be.
    492         const Value *LV = LU->getValue(), *RV = RU->getValue();
    493 
    494         // Order pointer values after integer values. This helps SCEVExpander
    495         // form GEPs.
    496         bool LIsPointer = LV->getType()->isPointerTy(),
    497              RIsPointer = RV->getType()->isPointerTy();
    498         if (LIsPointer != RIsPointer)
    499           return (int)LIsPointer - (int)RIsPointer;
    500 
    501         // Compare getValueID values.
    502         unsigned LID = LV->getValueID(),
    503                  RID = RV->getValueID();
    504         if (LID != RID)
    505           return (int)LID - (int)RID;
    506 
    507         // Sort arguments by their position.
    508         if (const Argument *LA = dyn_cast<Argument>(LV)) {
    509           const Argument *RA = cast<Argument>(RV);
    510           unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
    511           return (int)LArgNo - (int)RArgNo;
    512         }
    513 
    514         // For instructions, compare their loop depth, and their operand
    515         // count.  This is pretty loose.
    516         if (const Instruction *LInst = dyn_cast<Instruction>(LV)) {
    517           const Instruction *RInst = cast<Instruction>(RV);
    518 
    519           // Compare loop depths.
    520           const BasicBlock *LParent = LInst->getParent(),
    521                            *RParent = RInst->getParent();
    522           if (LParent != RParent) {
    523             unsigned LDepth = LI->getLoopDepth(LParent),
    524                      RDepth = LI->getLoopDepth(RParent);
    525             if (LDepth != RDepth)
    526               return (int)LDepth - (int)RDepth;
    527           }
    528 
    529           // Compare the number of operands.
    530           unsigned LNumOps = LInst->getNumOperands(),
    531                    RNumOps = RInst->getNumOperands();
    532           return (int)LNumOps - (int)RNumOps;
    533         }
    534 
    535         return 0;
    536       }
    537 
    538       case scConstant: {
    539         const SCEVConstant *LC = cast<SCEVConstant>(LHS);
    540         const SCEVConstant *RC = cast<SCEVConstant>(RHS);
    541 
    542         // Compare constant values.
    543         const APInt &LA = LC->getValue()->getValue();
    544         const APInt &RA = RC->getValue()->getValue();
    545         unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
    546         if (LBitWidth != RBitWidth)
    547           return (int)LBitWidth - (int)RBitWidth;
    548         return LA.ult(RA) ? -1 : 1;
    549       }
    550 
    551       case scAddRecExpr: {
    552         const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
    553         const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
    554 
    555         // Compare addrec loop depths.
    556         const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
    557         if (LLoop != RLoop) {
    558           unsigned LDepth = LLoop->getLoopDepth(),
    559                    RDepth = RLoop->getLoopDepth();
    560           if (LDepth != RDepth)
    561             return (int)LDepth - (int)RDepth;
    562         }
    563 
    564         // Addrec complexity grows with operand count.
    565         unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
    566         if (LNumOps != RNumOps)
    567           return (int)LNumOps - (int)RNumOps;
    568 
    569         // Lexicographically compare.
    570         for (unsigned i = 0; i != LNumOps; ++i) {
    571           long X = compare(LA->getOperand(i), RA->getOperand(i));
    572           if (X != 0)
    573             return X;
    574         }
    575 
    576         return 0;
    577       }
    578 
    579       case scAddExpr:
    580       case scMulExpr:
    581       case scSMaxExpr:
    582       case scUMaxExpr: {
    583         const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
    584         const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
    585 
    586         // Lexicographically compare n-ary expressions.
    587         unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
    588         for (unsigned i = 0; i != LNumOps; ++i) {
    589           if (i >= RNumOps)
    590             return 1;
    591           long X = compare(LC->getOperand(i), RC->getOperand(i));
    592           if (X != 0)
    593             return X;
    594         }
    595         return (int)LNumOps - (int)RNumOps;
    596       }
    597 
    598       case scUDivExpr: {
    599         const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
    600         const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
    601 
    602         // Lexicographically compare udiv expressions.
    603         long X = compare(LC->getLHS(), RC->getLHS());
    604         if (X != 0)
    605           return X;
    606         return compare(LC->getRHS(), RC->getRHS());
    607       }
    608 
    609       case scTruncate:
    610       case scZeroExtend:
    611       case scSignExtend: {
    612         const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
    613         const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
    614 
    615         // Compare cast expressions by operand.
    616         return compare(LC->getOperand(), RC->getOperand());
    617       }
    618 
    619       default:
    620         llvm_unreachable("Unknown SCEV kind!");
    621       }
    622     }
    623   };
    624 }
    625 
    626 /// GroupByComplexity - Given a list of SCEV objects, order them by their
    627 /// complexity, and group objects of the same complexity together by value.
    628 /// When this routine is finished, we know that any duplicates in the vector are
    629 /// consecutive and that complexity is monotonically increasing.
    630 ///
    631 /// Note that we go take special precautions to ensure that we get deterministic
    632 /// results from this routine.  In other words, we don't want the results of
    633 /// this to depend on where the addresses of various SCEV objects happened to
    634 /// land in memory.
    635 ///
    636 static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
    637                               LoopInfo *LI) {
    638   if (Ops.size() < 2) return;  // Noop
    639   if (Ops.size() == 2) {
    640     // This is the common case, which also happens to be trivially simple.
    641     // Special case it.
    642     const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
    643     if (SCEVComplexityCompare(LI)(RHS, LHS))
    644       std::swap(LHS, RHS);
    645     return;
    646   }
    647 
    648   // Do the rough sort by complexity.
    649   std::stable_sort(Ops.begin(), Ops.end(), SCEVComplexityCompare(LI));
    650 
    651   // Now that we are sorted by complexity, group elements of the same
    652   // complexity.  Note that this is, at worst, N^2, but the vector is likely to
    653   // be extremely short in practice.  Note that we take this approach because we
    654   // do not want to depend on the addresses of the objects we are grouping.
    655   for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) {
    656     const SCEV *S = Ops[i];
    657     unsigned Complexity = S->getSCEVType();
    658 
    659     // If there are any objects of the same complexity and same value as this
    660     // one, group them.
    661     for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) {
    662       if (Ops[j] == S) { // Found a duplicate.
    663         // Move it to immediately after i'th element.
    664         std::swap(Ops[i+1], Ops[j]);
    665         ++i;   // no need to rescan it.
    666         if (i == e-2) return;  // Done!
    667       }
    668     }
    669   }
    670 }
    671 
    672 
    673 
    674 //===----------------------------------------------------------------------===//
    675 //                      Simple SCEV method implementations
    676 //===----------------------------------------------------------------------===//
    677 
    678 /// BinomialCoefficient - Compute BC(It, K).  The result has width W.
    679 /// Assume, K > 0.
    680 static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
    681                                        ScalarEvolution &SE,
    682                                        Type *ResultTy) {
    683   // Handle the simplest case efficiently.
    684   if (K == 1)
    685     return SE.getTruncateOrZeroExtend(It, ResultTy);
    686 
    687   // We are using the following formula for BC(It, K):
    688   //
    689   //   BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K!
    690   //
    691   // Suppose, W is the bitwidth of the return value.  We must be prepared for
    692   // overflow.  Hence, we must assure that the result of our computation is
    693   // equal to the accurate one modulo 2^W.  Unfortunately, division isn't
    694   // safe in modular arithmetic.
    695   //
    696   // However, this code doesn't use exactly that formula; the formula it uses
    697   // is something like the following, where T is the number of factors of 2 in
    698   // K! (i.e. trailing zeros in the binary representation of K!), and ^ is
    699   // exponentiation:
    700   //
    701   //   BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T)
    702   //
    703   // This formula is trivially equivalent to the previous formula.  However,
    704   // this formula can be implemented much more efficiently.  The trick is that
    705   // K! / 2^T is odd, and exact division by an odd number *is* safe in modular
    706   // arithmetic.  To do exact division in modular arithmetic, all we have
    707   // to do is multiply by the inverse.  Therefore, this step can be done at
    708   // width W.
    709   //
    710   // The next issue is how to safely do the division by 2^T.  The way this
    711   // is done is by doing the multiplication step at a width of at least W + T
    712   // bits.  This way, the bottom W+T bits of the product are accurate. Then,
    713   // when we perform the division by 2^T (which is equivalent to a right shift
    714   // by T), the bottom W bits are accurate.  Extra bits are okay; they'll get
    715   // truncated out after the division by 2^T.
    716   //
    717   // In comparison to just directly using the first formula, this technique
    718   // is much more efficient; using the first formula requires W * K bits,
    719   // but this formula less than W + K bits. Also, the first formula requires
    720   // a division step, whereas this formula only requires multiplies and shifts.
    721   //
    722   // It doesn't matter whether the subtraction step is done in the calculation
    723   // width or the input iteration count's width; if the subtraction overflows,
    724   // the result must be zero anyway.  We prefer here to do it in the width of
    725   // the induction variable because it helps a lot for certain cases; CodeGen
    726   // isn't smart enough to ignore the overflow, which leads to much less
    727   // efficient code if the width of the subtraction is wider than the native
    728   // register width.
    729   //
    730   // (It's possible to not widen at all by pulling out factors of 2 before
    731   // the multiplication; for example, K=2 can be calculated as
    732   // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires
    733   // extra arithmetic, so it's not an obvious win, and it gets
    734   // much more complicated for K > 3.)
    735 
    736   // Protection from insane SCEVs; this bound is conservative,
    737   // but it probably doesn't matter.
    738   if (K > 1000)
    739     return SE.getCouldNotCompute();
    740 
    741   unsigned W = SE.getTypeSizeInBits(ResultTy);
    742 
    743   // Calculate K! / 2^T and T; we divide out the factors of two before
    744   // multiplying for calculating K! / 2^T to avoid overflow.
    745   // Other overflow doesn't matter because we only care about the bottom
    746   // W bits of the result.
    747   APInt OddFactorial(W, 1);
    748   unsigned T = 1;
    749   for (unsigned i = 3; i <= K; ++i) {
    750     APInt Mult(W, i);
    751     unsigned TwoFactors = Mult.countTrailingZeros();
    752     T += TwoFactors;
    753     Mult = Mult.lshr(TwoFactors);
    754     OddFactorial *= Mult;
    755   }
    756 
    757   // We need at least W + T bits for the multiplication step
    758   unsigned CalculationBits = W + T;
    759 
    760   // Calculate 2^T, at width T+W.
    761   APInt DivFactor = APInt(CalculationBits, 1).shl(T);
    762 
    763   // Calculate the multiplicative inverse of K! / 2^T;
    764   // this multiplication factor will perform the exact division by
    765   // K! / 2^T.
    766   APInt Mod = APInt::getSignedMinValue(W+1);
    767   APInt MultiplyFactor = OddFactorial.zext(W+1);
    768   MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod);
    769   MultiplyFactor = MultiplyFactor.trunc(W);
    770 
    771   // Calculate the product, at width T+W
    772   IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
    773                                                       CalculationBits);
    774   const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
    775   for (unsigned i = 1; i != K; ++i) {
    776     const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i));
    777     Dividend = SE.getMulExpr(Dividend,
    778                              SE.getTruncateOrZeroExtend(S, CalculationTy));
    779   }
    780 
    781   // Divide by 2^T
    782   const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor));
    783 
    784   // Truncate the result, and divide by K! / 2^T.
    785 
    786   return SE.getMulExpr(SE.getConstant(MultiplyFactor),
    787                        SE.getTruncateOrZeroExtend(DivResult, ResultTy));
    788 }
    789 
    790 /// evaluateAtIteration - Return the value of this chain of recurrences at
    791 /// the specified iteration number.  We can evaluate this recurrence by
    792 /// multiplying each element in the chain by the binomial coefficient
    793 /// corresponding to it.  In other words, we can evaluate {A,+,B,+,C,+,D} as:
    794 ///
    795 ///   A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3)
    796 ///
    797 /// where BC(It, k) stands for binomial coefficient.
    798 ///
    799 const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
    800                                                 ScalarEvolution &SE) const {
    801   const SCEV *Result = getStart();
    802   for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
    803     // The computation is correct in the face of overflow provided that the
    804     // multiplication is performed _after_ the evaluation of the binomial
    805     // coefficient.
    806     const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType());
    807     if (isa<SCEVCouldNotCompute>(Coeff))
    808       return Coeff;
    809 
    810     Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff));
    811   }
    812   return Result;
    813 }
    814 
    815 //===----------------------------------------------------------------------===//
    816 //                    SCEV Expression folder implementations
    817 //===----------------------------------------------------------------------===//
    818 
    819 const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
    820                                              Type *Ty) {
    821   assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
    822          "This is not a truncating conversion!");
    823   assert(isSCEVable(Ty) &&
    824          "This is not a conversion to a SCEVable type!");
    825   Ty = getEffectiveSCEVType(Ty);
    826 
    827   FoldingSetNodeID ID;
    828   ID.AddInteger(scTruncate);
    829   ID.AddPointer(Op);
    830   ID.AddPointer(Ty);
    831   void *IP = 0;
    832   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
    833 
    834   // Fold if the operand is constant.
    835   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
    836     return getConstant(
    837       cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty)));
    838 
    839   // trunc(trunc(x)) --> trunc(x)
    840   if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
    841     return getTruncateExpr(ST->getOperand(), Ty);
    842 
    843   // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
    844   if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
    845     return getTruncateOrSignExtend(SS->getOperand(), Ty);
    846 
    847   // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
    848   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
    849     return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
    850 
    851   // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can
    852   // eliminate all the truncates.
    853   if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) {
    854     SmallVector<const SCEV *, 4> Operands;
    855     bool hasTrunc = false;
    856     for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) {
    857       const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty);
    858       hasTrunc = isa<SCEVTruncateExpr>(S);
    859       Operands.push_back(S);
    860     }
    861     if (!hasTrunc)
    862       return getAddExpr(Operands);
    863     UniqueSCEVs.FindNodeOrInsertPos(ID, IP);  // Mutates IP, returns NULL.
    864   }
    865 
    866   // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can
    867   // eliminate all the truncates.
    868   if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) {
    869     SmallVector<const SCEV *, 4> Operands;
    870     bool hasTrunc = false;
    871     for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) {
    872       const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty);
    873       hasTrunc = isa<SCEVTruncateExpr>(S);
    874       Operands.push_back(S);
    875     }
    876     if (!hasTrunc)
    877       return getMulExpr(Operands);
    878     UniqueSCEVs.FindNodeOrInsertPos(ID, IP);  // Mutates IP, returns NULL.
    879   }
    880 
    881   // If the input value is a chrec scev, truncate the chrec's operands.
    882   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
    883     SmallVector<const SCEV *, 4> Operands;
    884     for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
    885       Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty));
    886     return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
    887   }
    888 
    889   // The cast wasn't folded; create an explicit cast node. We can reuse
    890   // the existing insert position since if we get here, we won't have
    891   // made any changes which would invalidate it.
    892   SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator),
    893                                                  Op, Ty);
    894   UniqueSCEVs.InsertNode(S, IP);
    895   return S;
    896 }
    897 
    898 const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
    899                                                Type *Ty) {
    900   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
    901          "This is not an extending conversion!");
    902   assert(isSCEVable(Ty) &&
    903          "This is not a conversion to a SCEVable type!");
    904   Ty = getEffectiveSCEVType(Ty);
    905 
    906   // Fold if the operand is constant.
    907   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
    908     return getConstant(
    909       cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));
    910 
    911   // zext(zext(x)) --> zext(x)
    912   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
    913     return getZeroExtendExpr(SZ->getOperand(), Ty);
    914 
    915   // Before doing any expensive analysis, check to see if we've already
    916   // computed a SCEV for this Op and Ty.
    917   FoldingSetNodeID ID;
    918   ID.AddInteger(scZeroExtend);
    919   ID.AddPointer(Op);
    920   ID.AddPointer(Ty);
    921   void *IP = 0;
    922   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
    923 
    924   // zext(trunc(x)) --> zext(x) or x or trunc(x)
    925   if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
    926     // It's possible the bits taken off by the truncate were all zero bits. If
    927     // so, we should be able to simplify this further.
    928     const SCEV *X = ST->getOperand();
    929     ConstantRange CR = getUnsignedRange(X);
    930     unsigned TruncBits = getTypeSizeInBits(ST->getType());
    931     unsigned NewBits = getTypeSizeInBits(Ty);
    932     if (CR.truncate(TruncBits).zeroExtend(NewBits).contains(
    933             CR.zextOrTrunc(NewBits)))
    934       return getTruncateOrZeroExtend(X, Ty);
    935   }
    936 
    937   // If the input value is a chrec scev, and we can prove that the value
    938   // did not overflow the old, smaller, value, we can zero extend all of the
    939   // operands (often constants).  This allows analysis of something like
    940   // this:  for (unsigned char X = 0; X < 100; ++X) { int Y = X; }
    941   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
    942     if (AR->isAffine()) {
    943       const SCEV *Start = AR->getStart();
    944       const SCEV *Step = AR->getStepRecurrence(*this);
    945       unsigned BitWidth = getTypeSizeInBits(AR->getType());
    946       const Loop *L = AR->getLoop();
    947 
    948       // If we have special knowledge that this addrec won't overflow,
    949       // we don't need to do any further analysis.
    950       if (AR->getNoWrapFlags(SCEV::FlagNUW))
    951         return getAddRecExpr(getZeroExtendExpr(Start, Ty),
    952                              getZeroExtendExpr(Step, Ty),
    953                              L, AR->getNoWrapFlags());
    954 
    955       // Check whether the backedge-taken count is SCEVCouldNotCompute.
    956       // Note that this serves two purposes: It filters out loops that are
    957       // simply not analyzable, and it covers the case where this code is
    958       // being called from within backedge-taken count analysis, such that
    959       // attempting to ask for the backedge-taken count would likely result
    960       // in infinite recursion. In the later case, the analysis code will
    961       // cope with a conservative value, and it will take care to purge
    962       // that value once it has finished.
    963       const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
    964       if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
    965         // Manually compute the final value for AR, checking for
    966         // overflow.
    967 
    968         // Check whether the backedge-taken count can be losslessly casted to
    969         // the addrec's type. The count is always unsigned.
    970         const SCEV *CastedMaxBECount =
    971           getTruncateOrZeroExtend(MaxBECount, Start->getType());
    972         const SCEV *RecastedMaxBECount =
    973           getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
    974         if (MaxBECount == RecastedMaxBECount) {
    975           Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
    976           // Check whether Start+Step*MaxBECount has no unsigned overflow.
    977           const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
    978           const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul), WideTy);
    979           const SCEV *WideStart = getZeroExtendExpr(Start, WideTy);
    980           const SCEV *WideMaxBECount =
    981             getZeroExtendExpr(CastedMaxBECount, WideTy);
    982           const SCEV *OperandExtendedAdd =
    983             getAddExpr(WideStart,
    984                        getMulExpr(WideMaxBECount,
    985                                   getZeroExtendExpr(Step, WideTy)));
    986           if (ZAdd == OperandExtendedAdd) {
    987             // Cache knowledge of AR NUW, which is propagated to this AddRec.
    988             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
    989             // Return the expression with the addrec on the outside.
    990             return getAddRecExpr(getZeroExtendExpr(Start, Ty),
    991                                  getZeroExtendExpr(Step, Ty),
    992                                  L, AR->getNoWrapFlags());
    993           }
    994           // Similar to above, only this time treat the step value as signed.
    995           // This covers loops that count down.
    996           OperandExtendedAdd =
    997             getAddExpr(WideStart,
    998                        getMulExpr(WideMaxBECount,
    999                                   getSignExtendExpr(Step, WideTy)));
   1000           if (ZAdd == OperandExtendedAdd) {
   1001             // Cache knowledge of AR NW, which is propagated to this AddRec.
   1002             // Negative step causes unsigned wrap, but it still can't self-wrap.
   1003             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
   1004             // Return the expression with the addrec on the outside.
   1005             return getAddRecExpr(getZeroExtendExpr(Start, Ty),
   1006                                  getSignExtendExpr(Step, Ty),
   1007                                  L, AR->getNoWrapFlags());
   1008           }
   1009         }
   1010 
   1011         // If the backedge is guarded by a comparison with the pre-inc value
   1012         // the addrec is safe. Also, if the entry is guarded by a comparison
   1013         // with the start value and the backedge is guarded by a comparison
   1014         // with the post-inc value, the addrec is safe.
   1015         if (isKnownPositive(Step)) {
   1016           const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
   1017                                       getUnsignedRange(Step).getUnsignedMax());
   1018           if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
   1019               (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
   1020                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT,
   1021                                            AR->getPostIncExpr(*this), N))) {
   1022             // Cache knowledge of AR NUW, which is propagated to this AddRec.
   1023             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
   1024             // Return the expression with the addrec on the outside.
   1025             return getAddRecExpr(getZeroExtendExpr(Start, Ty),
   1026                                  getZeroExtendExpr(Step, Ty),
   1027                                  L, AR->getNoWrapFlags());
   1028           }
   1029         } else if (isKnownNegative(Step)) {
   1030           const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
   1031                                       getSignedRange(Step).getSignedMin());
   1032           if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
   1033               (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) &&
   1034                isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT,
   1035                                            AR->getPostIncExpr(*this), N))) {
   1036             // Cache knowledge of AR NW, which is propagated to this AddRec.
   1037             // Negative step causes unsigned wrap, but it still can't self-wrap.
   1038             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
   1039             // Return the expression with the addrec on the outside.
   1040             return getAddRecExpr(getZeroExtendExpr(Start, Ty),
   1041                                  getSignExtendExpr(Step, Ty),
   1042                                  L, AR->getNoWrapFlags());
   1043           }
   1044         }
   1045       }
   1046     }
   1047 
   1048   // The cast wasn't folded; create an explicit cast node.
   1049   // Recompute the insert position, as it may have been invalidated.
   1050   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   1051   SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
   1052                                                    Op, Ty);
   1053   UniqueSCEVs.InsertNode(S, IP);
   1054   return S;
   1055 }
   1056 
   1057 // Get the limit of a recurrence such that incrementing by Step cannot cause
   1058 // signed overflow as long as the value of the recurrence within the loop does
   1059 // not exceed this limit before incrementing.
   1060 static const SCEV *getOverflowLimitForStep(const SCEV *Step,
   1061                                            ICmpInst::Predicate *Pred,
   1062                                            ScalarEvolution *SE) {
   1063   unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
   1064   if (SE->isKnownPositive(Step)) {
   1065     *Pred = ICmpInst::ICMP_SLT;
   1066     return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
   1067                            SE->getSignedRange(Step).getSignedMax());
   1068   }
   1069   if (SE->isKnownNegative(Step)) {
   1070     *Pred = ICmpInst::ICMP_SGT;
   1071     return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
   1072                        SE->getSignedRange(Step).getSignedMin());
   1073   }
   1074   return 0;
   1075 }
   1076 
   1077 // The recurrence AR has been shown to have no signed wrap. Typically, if we can
   1078 // prove NSW for AR, then we can just as easily prove NSW for its preincrement
   1079 // or postincrement sibling. This allows normalizing a sign extended AddRec as
   1080 // such: {sext(Step + Start),+,Step} => {(Step + sext(Start),+,Step} As a
   1081 // result, the expression "Step + sext(PreIncAR)" is congruent with
   1082 // "sext(PostIncAR)"
   1083 static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
   1084                                             Type *Ty,
   1085                                             ScalarEvolution *SE) {
   1086   const Loop *L = AR->getLoop();
   1087   const SCEV *Start = AR->getStart();
   1088   const SCEV *Step = AR->getStepRecurrence(*SE);
   1089 
   1090   // Check for a simple looking step prior to loop entry.
   1091   const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
   1092   if (!SA)
   1093     return 0;
   1094 
   1095   // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
   1096   // subtraction is expensive. For this purpose, perform a quick and dirty
   1097   // difference, by checking for Step in the operand list.
   1098   SmallVector<const SCEV *, 4> DiffOps;
   1099   for (SCEVAddExpr::op_iterator I = SA->op_begin(), E = SA->op_end();
   1100        I != E; ++I) {
   1101     if (*I != Step)
   1102       DiffOps.push_back(*I);
   1103   }
   1104   if (DiffOps.size() == SA->getNumOperands())
   1105     return 0;
   1106 
   1107   // This is a postinc AR. Check for overflow on the preinc recurrence using the
   1108   // same three conditions that getSignExtendedExpr checks.
   1109 
   1110   // 1. NSW flags on the step increment.
   1111   const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags());
   1112   const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
   1113     SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
   1114 
   1115   if (PreAR && PreAR->getNoWrapFlags(SCEV::FlagNSW))
   1116     return PreStart;
   1117 
   1118   // 2. Direct overflow check on the step operation's expression.
   1119   unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
   1120   Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
   1121   const SCEV *OperandExtendedStart =
   1122     SE->getAddExpr(SE->getSignExtendExpr(PreStart, WideTy),
   1123                    SE->getSignExtendExpr(Step, WideTy));
   1124   if (SE->getSignExtendExpr(Start, WideTy) == OperandExtendedStart) {
   1125     // Cache knowledge of PreAR NSW.
   1126     if (PreAR)
   1127       const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(SCEV::FlagNSW);
   1128     // FIXME: this optimization needs a unit test
   1129     DEBUG(dbgs() << "SCEV: untested prestart overflow check\n");
   1130     return PreStart;
   1131   }
   1132 
   1133   // 3. Loop precondition.
   1134   ICmpInst::Predicate Pred;
   1135   const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, SE);
   1136 
   1137   if (OverflowLimit &&
   1138       SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
   1139     return PreStart;
   1140   }
   1141   return 0;
   1142 }
   1143 
   1144 // Get the normalized sign-extended expression for this AddRec's Start.
   1145 static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR,
   1146                                             Type *Ty,
   1147                                             ScalarEvolution *SE) {
   1148   const SCEV *PreStart = getPreStartForSignExtend(AR, Ty, SE);
   1149   if (!PreStart)
   1150     return SE->getSignExtendExpr(AR->getStart(), Ty);
   1151 
   1152   return SE->getAddExpr(SE->getSignExtendExpr(AR->getStepRecurrence(*SE), Ty),
   1153                         SE->getSignExtendExpr(PreStart, Ty));
   1154 }
   1155 
   1156 const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
   1157                                                Type *Ty) {
   1158   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
   1159          "This is not an extending conversion!");
   1160   assert(isSCEVable(Ty) &&
   1161          "This is not a conversion to a SCEVable type!");
   1162   Ty = getEffectiveSCEVType(Ty);
   1163 
   1164   // Fold if the operand is constant.
   1165   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
   1166     return getConstant(
   1167       cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));
   1168 
   1169   // sext(sext(x)) --> sext(x)
   1170   if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
   1171     return getSignExtendExpr(SS->getOperand(), Ty);
   1172 
   1173   // sext(zext(x)) --> zext(x)
   1174   if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
   1175     return getZeroExtendExpr(SZ->getOperand(), Ty);
   1176 
   1177   // Before doing any expensive analysis, check to see if we've already
   1178   // computed a SCEV for this Op and Ty.
   1179   FoldingSetNodeID ID;
   1180   ID.AddInteger(scSignExtend);
   1181   ID.AddPointer(Op);
   1182   ID.AddPointer(Ty);
   1183   void *IP = 0;
   1184   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   1185 
   1186   // If the input value is provably positive, build a zext instead.
   1187   if (isKnownNonNegative(Op))
   1188     return getZeroExtendExpr(Op, Ty);
   1189 
   1190   // sext(trunc(x)) --> sext(x) or x or trunc(x)
   1191   if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
   1192     // It's possible the bits taken off by the truncate were all sign bits. If
   1193     // so, we should be able to simplify this further.
   1194     const SCEV *X = ST->getOperand();
   1195     ConstantRange CR = getSignedRange(X);
   1196     unsigned TruncBits = getTypeSizeInBits(ST->getType());
   1197     unsigned NewBits = getTypeSizeInBits(Ty);
   1198     if (CR.truncate(TruncBits).signExtend(NewBits).contains(
   1199             CR.sextOrTrunc(NewBits)))
   1200       return getTruncateOrSignExtend(X, Ty);
   1201   }
   1202 
   1203   // If the input value is a chrec scev, and we can prove that the value
   1204   // did not overflow the old, smaller, value, we can sign extend all of the
   1205   // operands (often constants).  This allows analysis of something like
   1206   // this:  for (signed char X = 0; X < 100; ++X) { int Y = X; }
   1207   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
   1208     if (AR->isAffine()) {
   1209       const SCEV *Start = AR->getStart();
   1210       const SCEV *Step = AR->getStepRecurrence(*this);
   1211       unsigned BitWidth = getTypeSizeInBits(AR->getType());
   1212       const Loop *L = AR->getLoop();
   1213 
   1214       // If we have special knowledge that this addrec won't overflow,
   1215       // we don't need to do any further analysis.
   1216       if (AR->getNoWrapFlags(SCEV::FlagNSW))
   1217         return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
   1218                              getSignExtendExpr(Step, Ty),
   1219                              L, SCEV::FlagNSW);
   1220 
   1221       // Check whether the backedge-taken count is SCEVCouldNotCompute.
   1222       // Note that this serves two purposes: It filters out loops that are
   1223       // simply not analyzable, and it covers the case where this code is
   1224       // being called from within backedge-taken count analysis, such that
   1225       // attempting to ask for the backedge-taken count would likely result
   1226       // in infinite recursion. In the later case, the analysis code will
   1227       // cope with a conservative value, and it will take care to purge
   1228       // that value once it has finished.
   1229       const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
   1230       if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
   1231         // Manually compute the final value for AR, checking for
   1232         // overflow.
   1233 
   1234         // Check whether the backedge-taken count can be losslessly casted to
   1235         // the addrec's type. The count is always unsigned.
   1236         const SCEV *CastedMaxBECount =
   1237           getTruncateOrZeroExtend(MaxBECount, Start->getType());
   1238         const SCEV *RecastedMaxBECount =
   1239           getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
   1240         if (MaxBECount == RecastedMaxBECount) {
   1241           Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
   1242           // Check whether Start+Step*MaxBECount has no signed overflow.
   1243           const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
   1244           const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul), WideTy);
   1245           const SCEV *WideStart = getSignExtendExpr(Start, WideTy);
   1246           const SCEV *WideMaxBECount =
   1247             getZeroExtendExpr(CastedMaxBECount, WideTy);
   1248           const SCEV *OperandExtendedAdd =
   1249             getAddExpr(WideStart,
   1250                        getMulExpr(WideMaxBECount,
   1251                                   getSignExtendExpr(Step, WideTy)));
   1252           if (SAdd == OperandExtendedAdd) {
   1253             // Cache knowledge of AR NSW, which is propagated to this AddRec.
   1254             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
   1255             // Return the expression with the addrec on the outside.
   1256             return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
   1257                                  getSignExtendExpr(Step, Ty),
   1258                                  L, AR->getNoWrapFlags());
   1259           }
   1260           // Similar to above, only this time treat the step value as unsigned.
   1261           // This covers loops that count up with an unsigned step.
   1262           OperandExtendedAdd =
   1263             getAddExpr(WideStart,
   1264                        getMulExpr(WideMaxBECount,
   1265                                   getZeroExtendExpr(Step, WideTy)));
   1266           if (SAdd == OperandExtendedAdd) {
   1267             // Cache knowledge of AR NSW, which is propagated to this AddRec.
   1268             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
   1269             // Return the expression with the addrec on the outside.
   1270             return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
   1271                                  getZeroExtendExpr(Step, Ty),
   1272                                  L, AR->getNoWrapFlags());
   1273           }
   1274         }
   1275 
   1276         // If the backedge is guarded by a comparison with the pre-inc value
   1277         // the addrec is safe. Also, if the entry is guarded by a comparison
   1278         // with the start value and the backedge is guarded by a comparison
   1279         // with the post-inc value, the addrec is safe.
   1280         ICmpInst::Predicate Pred;
   1281         const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, this);
   1282         if (OverflowLimit &&
   1283             (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
   1284              (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) &&
   1285               isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this),
   1286                                           OverflowLimit)))) {
   1287           // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
   1288           const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
   1289           return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
   1290                                getSignExtendExpr(Step, Ty),
   1291                                L, AR->getNoWrapFlags());
   1292         }
   1293       }
   1294     }
   1295 
   1296   // The cast wasn't folded; create an explicit cast node.
   1297   // Recompute the insert position, as it may have been invalidated.
   1298   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   1299   SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
   1300                                                    Op, Ty);
   1301   UniqueSCEVs.InsertNode(S, IP);
   1302   return S;
   1303 }
   1304 
   1305 /// getAnyExtendExpr - Return a SCEV for the given operand extended with
   1306 /// unspecified bits out to the given type.
   1307 ///
   1308 const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
   1309                                               Type *Ty) {
   1310   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
   1311          "This is not an extending conversion!");
   1312   assert(isSCEVable(Ty) &&
   1313          "This is not a conversion to a SCEVable type!");
   1314   Ty = getEffectiveSCEVType(Ty);
   1315 
   1316   // Sign-extend negative constants.
   1317   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
   1318     if (SC->getValue()->getValue().isNegative())
   1319       return getSignExtendExpr(Op, Ty);
   1320 
   1321   // Peel off a truncate cast.
   1322   if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) {
   1323     const SCEV *NewOp = T->getOperand();
   1324     if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty))
   1325       return getAnyExtendExpr(NewOp, Ty);
   1326     return getTruncateOrNoop(NewOp, Ty);
   1327   }
   1328 
   1329   // Next try a zext cast. If the cast is folded, use it.
   1330   const SCEV *ZExt = getZeroExtendExpr(Op, Ty);
   1331   if (!isa<SCEVZeroExtendExpr>(ZExt))
   1332     return ZExt;
   1333 
   1334   // Next try a sext cast. If the cast is folded, use it.
   1335   const SCEV *SExt = getSignExtendExpr(Op, Ty);
   1336   if (!isa<SCEVSignExtendExpr>(SExt))
   1337     return SExt;
   1338 
   1339   // Force the cast to be folded into the operands of an addrec.
   1340   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
   1341     SmallVector<const SCEV *, 4> Ops;
   1342     for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
   1343          I != E; ++I)
   1344       Ops.push_back(getAnyExtendExpr(*I, Ty));
   1345     return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW);
   1346   }
   1347 
   1348   // If the expression is obviously signed, use the sext cast value.
   1349   if (isa<SCEVSMaxExpr>(Op))
   1350     return SExt;
   1351 
   1352   // Absent any other information, use the zext cast value.
   1353   return ZExt;
   1354 }
   1355 
   1356 /// CollectAddOperandsWithScales - Process the given Ops list, which is
   1357 /// a list of operands to be added under the given scale, update the given
   1358 /// map. This is a helper function for getAddRecExpr. As an example of
   1359 /// what it does, given a sequence of operands that would form an add
   1360 /// expression like this:
   1361 ///
   1362 ///    m + n + 13 + (A * (o + p + (B * q + m + 29))) + r + (-1 * r)
   1363 ///
   1364 /// where A and B are constants, update the map with these values:
   1365 ///
   1366 ///    (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0)
   1367 ///
   1368 /// and add 13 + A*B*29 to AccumulatedConstant.
   1369 /// This will allow getAddRecExpr to produce this:
   1370 ///
   1371 ///    13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B)
   1372 ///
   1373 /// This form often exposes folding opportunities that are hidden in
   1374 /// the original operand list.
   1375 ///
   1376 /// Return true iff it appears that any interesting folding opportunities
   1377 /// may be exposed. This helps getAddRecExpr short-circuit extra work in
   1378 /// the common case where no interesting opportunities are present, and
   1379 /// is also used as a check to avoid infinite recursion.
   1380 ///
   1381 static bool
   1382 CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
   1383                              SmallVector<const SCEV *, 8> &NewOps,
   1384                              APInt &AccumulatedConstant,
   1385                              const SCEV *const *Ops, size_t NumOperands,
   1386                              const APInt &Scale,
   1387                              ScalarEvolution &SE) {
   1388   bool Interesting = false;
   1389 
   1390   // Iterate over the add operands. They are sorted, with constants first.
   1391   unsigned i = 0;
   1392   while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
   1393     ++i;
   1394     // Pull a buried constant out to the outside.
   1395     if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero())
   1396       Interesting = true;
   1397     AccumulatedConstant += Scale * C->getValue()->getValue();
   1398   }
   1399 
   1400   // Next comes everything else. We're especially interested in multiplies
   1401   // here, but they're in the middle, so just visit the rest with one loop.
   1402   for (; i != NumOperands; ++i) {
   1403     const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
   1404     if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
   1405       APInt NewScale =
   1406         Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue();
   1407       if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
   1408         // A multiplication of a constant with another add; recurse.
   1409         const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1));
   1410         Interesting |=
   1411           CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
   1412                                        Add->op_begin(), Add->getNumOperands(),
   1413                                        NewScale, SE);
   1414       } else {
   1415         // A multiplication of a constant with some other value. Update
   1416         // the map.
   1417         SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
   1418         const SCEV *Key = SE.getMulExpr(MulOps);
   1419         std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
   1420           M.insert(std::make_pair(Key, NewScale));
   1421         if (Pair.second) {
   1422           NewOps.push_back(Pair.first->first);
   1423         } else {
   1424           Pair.first->second += NewScale;
   1425           // The map already had an entry for this value, which may indicate
   1426           // a folding opportunity.
   1427           Interesting = true;
   1428         }
   1429       }
   1430     } else {
   1431       // An ordinary operand. Update the map.
   1432       std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
   1433         M.insert(std::make_pair(Ops[i], Scale));
   1434       if (Pair.second) {
   1435         NewOps.push_back(Pair.first->first);
   1436       } else {
   1437         Pair.first->second += Scale;
   1438         // The map already had an entry for this value, which may indicate
   1439         // a folding opportunity.
   1440         Interesting = true;
   1441       }
   1442     }
   1443   }
   1444 
   1445   return Interesting;
   1446 }
   1447 
   1448 namespace {
   1449   struct APIntCompare {
   1450     bool operator()(const APInt &LHS, const APInt &RHS) const {
   1451       return LHS.ult(RHS);
   1452     }
   1453   };
   1454 }
   1455 
   1456 /// getAddExpr - Get a canonical add expression, or something simpler if
   1457 /// possible.
   1458 const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
   1459                                         SCEV::NoWrapFlags Flags) {
   1460   assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) &&
   1461          "only nuw or nsw allowed");
   1462   assert(!Ops.empty() && "Cannot get empty add!");
   1463   if (Ops.size() == 1) return Ops[0];
   1464 #ifndef NDEBUG
   1465   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
   1466   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
   1467     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
   1468            "SCEVAddExpr operand types don't match!");
   1469 #endif
   1470 
   1471   // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
   1472   // And vice-versa.
   1473   int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
   1474   SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
   1475   if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
   1476     bool All = true;
   1477     for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
   1478          E = Ops.end(); I != E; ++I)
   1479       if (!isKnownNonNegative(*I)) {
   1480         All = false;
   1481         break;
   1482       }
   1483     if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
   1484   }
   1485 
   1486   // Sort by complexity, this groups all similar expression types together.
   1487   GroupByComplexity(Ops, LI);
   1488 
   1489   // If there are any constants, fold them together.
   1490   unsigned Idx = 0;
   1491   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
   1492     ++Idx;
   1493     assert(Idx < Ops.size());
   1494     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
   1495       // We found two constants, fold them together!
   1496       Ops[0] = getConstant(LHSC->getValue()->getValue() +
   1497                            RHSC->getValue()->getValue());
   1498       if (Ops.size() == 2) return Ops[0];
   1499       Ops.erase(Ops.begin()+1);  // Erase the folded element
   1500       LHSC = cast<SCEVConstant>(Ops[0]);
   1501     }
   1502 
   1503     // If we are left with a constant zero being added, strip it off.
   1504     if (LHSC->getValue()->isZero()) {
   1505       Ops.erase(Ops.begin());
   1506       --Idx;
   1507     }
   1508 
   1509     if (Ops.size() == 1) return Ops[0];
   1510   }
   1511 
   1512   // Okay, check to see if the same value occurs in the operand list more than
   1513   // once.  If so, merge them together into an multiply expression.  Since we
   1514   // sorted the list, these values are required to be adjacent.
   1515   Type *Ty = Ops[0]->getType();
   1516   bool FoundMatch = false;
   1517   for (unsigned i = 0, e = Ops.size(); i != e-1; ++i)
   1518     if (Ops[i] == Ops[i+1]) {      //  X + Y + Y  -->  X + Y*2
   1519       // Scan ahead to count how many equal operands there are.
   1520       unsigned Count = 2;
   1521       while (i+Count != e && Ops[i+Count] == Ops[i])
   1522         ++Count;
   1523       // Merge the values into a multiply.
   1524       const SCEV *Scale = getConstant(Ty, Count);
   1525       const SCEV *Mul = getMulExpr(Scale, Ops[i]);
   1526       if (Ops.size() == Count)
   1527         return Mul;
   1528       Ops[i] = Mul;
   1529       Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count);
   1530       --i; e -= Count - 1;
   1531       FoundMatch = true;
   1532     }
   1533   if (FoundMatch)
   1534     return getAddExpr(Ops, Flags);
   1535 
   1536   // Check for truncates. If all the operands are truncated from the same
   1537   // type, see if factoring out the truncate would permit the result to be
   1538   // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n)
   1539   // if the contents of the resulting outer trunc fold to something simple.
   1540   for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) {
   1541     const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
   1542     Type *DstType = Trunc->getType();
   1543     Type *SrcType = Trunc->getOperand()->getType();
   1544     SmallVector<const SCEV *, 8> LargeOps;
   1545     bool Ok = true;
   1546     // Check all the operands to see if they can be represented in the
   1547     // source type of the truncate.
   1548     for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
   1549       if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) {
   1550         if (T->getOperand()->getType() != SrcType) {
   1551           Ok = false;
   1552           break;
   1553         }
   1554         LargeOps.push_back(T->getOperand());
   1555       } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
   1556         LargeOps.push_back(getAnyExtendExpr(C, SrcType));
   1557       } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
   1558         SmallVector<const SCEV *, 8> LargeMulOps;
   1559         for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
   1560           if (const SCEVTruncateExpr *T =
   1561                 dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
   1562             if (T->getOperand()->getType() != SrcType) {
   1563               Ok = false;
   1564               break;
   1565             }
   1566             LargeMulOps.push_back(T->getOperand());
   1567           } else if (const SCEVConstant *C =
   1568                        dyn_cast<SCEVConstant>(M->getOperand(j))) {
   1569             LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
   1570           } else {
   1571             Ok = false;
   1572             break;
   1573           }
   1574         }
   1575         if (Ok)
   1576           LargeOps.push_back(getMulExpr(LargeMulOps));
   1577       } else {
   1578         Ok = false;
   1579         break;
   1580       }
   1581     }
   1582     if (Ok) {
   1583       // Evaluate the expression in the larger type.
   1584       const SCEV *Fold = getAddExpr(LargeOps, Flags);
   1585       // If it folds to something simple, use it. Otherwise, don't.
   1586       if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
   1587         return getTruncateExpr(Fold, DstType);
   1588     }
   1589   }
   1590 
   1591   // Skip past any other cast SCEVs.
   1592   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr)
   1593     ++Idx;
   1594 
   1595   // If there are add operands they would be next.
   1596   if (Idx < Ops.size()) {
   1597     bool DeletedAdd = false;
   1598     while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) {
   1599       // If we have an add, expand the add operands onto the end of the operands
   1600       // list.
   1601       Ops.erase(Ops.begin()+Idx);
   1602       Ops.append(Add->op_begin(), Add->op_end());
   1603       DeletedAdd = true;
   1604     }
   1605 
   1606     // If we deleted at least one add, we added operands to the end of the list,
   1607     // and they are not necessarily sorted.  Recurse to resort and resimplify
   1608     // any operands we just acquired.
   1609     if (DeletedAdd)
   1610       return getAddExpr(Ops);
   1611   }
   1612 
   1613   // Skip over the add expression until we get to a multiply.
   1614   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
   1615     ++Idx;
   1616 
   1617   // Check to see if there are any folding opportunities present with
   1618   // operands multiplied by constant values.
   1619   if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) {
   1620     uint64_t BitWidth = getTypeSizeInBits(Ty);
   1621     DenseMap<const SCEV *, APInt> M;
   1622     SmallVector<const SCEV *, 8> NewOps;
   1623     APInt AccumulatedConstant(BitWidth, 0);
   1624     if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
   1625                                      Ops.data(), Ops.size(),
   1626                                      APInt(BitWidth, 1), *this)) {
   1627       // Some interesting folding opportunity is present, so its worthwhile to
   1628       // re-generate the operands list. Group the operands by constant scale,
   1629       // to avoid multiplying by the same constant scale multiple times.
   1630       std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
   1631       for (SmallVector<const SCEV *, 8>::const_iterator I = NewOps.begin(),
   1632            E = NewOps.end(); I != E; ++I)
   1633         MulOpLists[M.find(*I)->second].push_back(*I);
   1634       // Re-generate the operands list.
   1635       Ops.clear();
   1636       if (AccumulatedConstant != 0)
   1637         Ops.push_back(getConstant(AccumulatedConstant));
   1638       for (std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare>::iterator
   1639            I = MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I)
   1640         if (I->first != 0)
   1641           Ops.push_back(getMulExpr(getConstant(I->first),
   1642                                    getAddExpr(I->second)));
   1643       if (Ops.empty())
   1644         return getConstant(Ty, 0);
   1645       if (Ops.size() == 1)
   1646         return Ops[0];
   1647       return getAddExpr(Ops);
   1648     }
   1649   }
   1650 
   1651   // If we are adding something to a multiply expression, make sure the
   1652   // something is not already an operand of the multiply.  If so, merge it into
   1653   // the multiply.
   1654   for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) {
   1655     const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]);
   1656     for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
   1657       const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
   1658       if (isa<SCEVConstant>(MulOpSCEV))
   1659         continue;
   1660       for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
   1661         if (MulOpSCEV == Ops[AddOp]) {
   1662           // Fold W + X + (X * Y * Z)  -->  W + (X * ((Y*Z)+1))
   1663           const SCEV *InnerMul = Mul->getOperand(MulOp == 0);
   1664           if (Mul->getNumOperands() != 2) {
   1665             // If the multiply has more than two operands, we must get the
   1666             // Y*Z term.
   1667             SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
   1668                                                 Mul->op_begin()+MulOp);
   1669             MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
   1670             InnerMul = getMulExpr(MulOps);
   1671           }
   1672           const SCEV *One = getConstant(Ty, 1);
   1673           const SCEV *AddOne = getAddExpr(One, InnerMul);
   1674           const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV);
   1675           if (Ops.size() == 2) return OuterMul;
   1676           if (AddOp < Idx) {
   1677             Ops.erase(Ops.begin()+AddOp);
   1678             Ops.erase(Ops.begin()+Idx-1);
   1679           } else {
   1680             Ops.erase(Ops.begin()+Idx);
   1681             Ops.erase(Ops.begin()+AddOp-1);
   1682           }
   1683           Ops.push_back(OuterMul);
   1684           return getAddExpr(Ops);
   1685         }
   1686 
   1687       // Check this multiply against other multiplies being added together.
   1688       for (unsigned OtherMulIdx = Idx+1;
   1689            OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]);
   1690            ++OtherMulIdx) {
   1691         const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]);
   1692         // If MulOp occurs in OtherMul, we can fold the two multiplies
   1693         // together.
   1694         for (unsigned OMulOp = 0, e = OtherMul->getNumOperands();
   1695              OMulOp != e; ++OMulOp)
   1696           if (OtherMul->getOperand(OMulOp) == MulOpSCEV) {
   1697             // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
   1698             const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0);
   1699             if (Mul->getNumOperands() != 2) {
   1700               SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
   1701                                                   Mul->op_begin()+MulOp);
   1702               MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
   1703               InnerMul1 = getMulExpr(MulOps);
   1704             }
   1705             const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
   1706             if (OtherMul->getNumOperands() != 2) {
   1707               SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
   1708                                                   OtherMul->op_begin()+OMulOp);
   1709               MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end());
   1710               InnerMul2 = getMulExpr(MulOps);
   1711             }
   1712             const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2);
   1713             const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum);
   1714             if (Ops.size() == 2) return OuterMul;
   1715             Ops.erase(Ops.begin()+Idx);
   1716             Ops.erase(Ops.begin()+OtherMulIdx-1);
   1717             Ops.push_back(OuterMul);
   1718             return getAddExpr(Ops);
   1719           }
   1720       }
   1721     }
   1722   }
   1723 
   1724   // If there are any add recurrences in the operands list, see if any other
   1725   // added values are loop invariant.  If so, we can fold them into the
   1726   // recurrence.
   1727   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
   1728     ++Idx;
   1729 
   1730   // Scan over all recurrences, trying to fold loop invariants into them.
   1731   for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
   1732     // Scan all of the other operands to this add and add them to the vector if
   1733     // they are loop invariant w.r.t. the recurrence.
   1734     SmallVector<const SCEV *, 8> LIOps;
   1735     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
   1736     const Loop *AddRecLoop = AddRec->getLoop();
   1737     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
   1738       if (isLoopInvariant(Ops[i], AddRecLoop)) {
   1739         LIOps.push_back(Ops[i]);
   1740         Ops.erase(Ops.begin()+i);
   1741         --i; --e;
   1742       }
   1743 
   1744     // If we found some loop invariants, fold them into the recurrence.
   1745     if (!LIOps.empty()) {
   1746       //  NLI + LI + {Start,+,Step}  -->  NLI + {LI+Start,+,Step}
   1747       LIOps.push_back(AddRec->getStart());
   1748 
   1749       SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
   1750                                              AddRec->op_end());
   1751       AddRecOps[0] = getAddExpr(LIOps);
   1752 
   1753       // Build the new addrec. Propagate the NUW and NSW flags if both the
   1754       // outer add and the inner addrec are guaranteed to have no overflow.
   1755       // Always propagate NW.
   1756       Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW));
   1757       const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags);
   1758 
   1759       // If all of the other operands were loop invariant, we are done.
   1760       if (Ops.size() == 1) return NewRec;
   1761 
   1762       // Otherwise, add the folded AddRec by the non-invariant parts.
   1763       for (unsigned i = 0;; ++i)
   1764         if (Ops[i] == AddRec) {
   1765           Ops[i] = NewRec;
   1766           break;
   1767         }
   1768       return getAddExpr(Ops);
   1769     }
   1770 
   1771     // Okay, if there weren't any loop invariants to be folded, check to see if
   1772     // there are multiple AddRec's with the same loop induction variable being
   1773     // added together.  If so, we can fold them.
   1774     for (unsigned OtherIdx = Idx+1;
   1775          OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
   1776          ++OtherIdx)
   1777       if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
   1778         // Other + {A,+,B}<L> + {C,+,D}<L>  -->  Other + {A+C,+,B+D}<L>
   1779         SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
   1780                                                AddRec->op_end());
   1781         for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
   1782              ++OtherIdx)
   1783           if (const SCEVAddRecExpr *OtherAddRec =
   1784                 dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
   1785             if (OtherAddRec->getLoop() == AddRecLoop) {
   1786               for (unsigned i = 0, e = OtherAddRec->getNumOperands();
   1787                    i != e; ++i) {
   1788                 if (i >= AddRecOps.size()) {
   1789                   AddRecOps.append(OtherAddRec->op_begin()+i,
   1790                                    OtherAddRec->op_end());
   1791                   break;
   1792                 }
   1793                 AddRecOps[i] = getAddExpr(AddRecOps[i],
   1794                                           OtherAddRec->getOperand(i));
   1795               }
   1796               Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
   1797             }
   1798         // Step size has changed, so we cannot guarantee no self-wraparound.
   1799         Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
   1800         return getAddExpr(Ops);
   1801       }
   1802 
   1803     // Otherwise couldn't fold anything into this recurrence.  Move onto the
   1804     // next one.
   1805   }
   1806 
   1807   // Okay, it looks like we really DO need an add expr.  Check to see if we
   1808   // already have one, otherwise create a new one.
   1809   FoldingSetNodeID ID;
   1810   ID.AddInteger(scAddExpr);
   1811   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
   1812     ID.AddPointer(Ops[i]);
   1813   void *IP = 0;
   1814   SCEVAddExpr *S =
   1815     static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
   1816   if (!S) {
   1817     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
   1818     std::uninitialized_copy(Ops.begin(), Ops.end(), O);
   1819     S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator),
   1820                                         O, Ops.size());
   1821     UniqueSCEVs.InsertNode(S, IP);
   1822   }
   1823   S->setNoWrapFlags(Flags);
   1824   return S;
   1825 }
   1826 
   1827 static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) {
   1828   uint64_t k = i*j;
   1829   if (j > 1 && k / j != i) Overflow = true;
   1830   return k;
   1831 }
   1832 
   1833 /// Compute the result of "n choose k", the binomial coefficient.  If an
   1834 /// intermediate computation overflows, Overflow will be set and the return will
   1835 /// be garbage. Overflow is not cleared on absence of overflow.
   1836 static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) {
   1837   // We use the multiplicative formula:
   1838   //     n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 .
   1839   // At each iteration, we take the n-th term of the numeral and divide by the
   1840   // (k-n)th term of the denominator.  This division will always produce an
   1841   // integral result, and helps reduce the chance of overflow in the
   1842   // intermediate computations. However, we can still overflow even when the
   1843   // final result would fit.
   1844 
   1845   if (n == 0 || n == k) return 1;
   1846   if (k > n) return 0;
   1847 
   1848   if (k > n/2)
   1849     k = n-k;
   1850 
   1851   uint64_t r = 1;
   1852   for (uint64_t i = 1; i <= k; ++i) {
   1853     r = umul_ov(r, n-(i-1), Overflow);
   1854     r /= i;
   1855   }
   1856   return r;
   1857 }
   1858 
   1859 /// getMulExpr - Get a canonical multiply expression, or something simpler if
   1860 /// possible.
   1861 const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
   1862                                         SCEV::NoWrapFlags Flags) {
   1863   assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) &&
   1864          "only nuw or nsw allowed");
   1865   assert(!Ops.empty() && "Cannot get empty mul!");
   1866   if (Ops.size() == 1) return Ops[0];
   1867 #ifndef NDEBUG
   1868   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
   1869   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
   1870     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
   1871            "SCEVMulExpr operand types don't match!");
   1872 #endif
   1873 
   1874   // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
   1875   // And vice-versa.
   1876   int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
   1877   SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
   1878   if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
   1879     bool All = true;
   1880     for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
   1881          E = Ops.end(); I != E; ++I)
   1882       if (!isKnownNonNegative(*I)) {
   1883         All = false;
   1884         break;
   1885       }
   1886     if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
   1887   }
   1888 
   1889   // Sort by complexity, this groups all similar expression types together.
   1890   GroupByComplexity(Ops, LI);
   1891 
   1892   // If there are any constants, fold them together.
   1893   unsigned Idx = 0;
   1894   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
   1895 
   1896     // C1*(C2+V) -> C1*C2 + C1*V
   1897     if (Ops.size() == 2)
   1898       if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
   1899         if (Add->getNumOperands() == 2 &&
   1900             isa<SCEVConstant>(Add->getOperand(0)))
   1901           return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)),
   1902                             getMulExpr(LHSC, Add->getOperand(1)));
   1903 
   1904     ++Idx;
   1905     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
   1906       // We found two constants, fold them together!
   1907       ConstantInt *Fold = ConstantInt::get(getContext(),
   1908                                            LHSC->getValue()->getValue() *
   1909                                            RHSC->getValue()->getValue());
   1910       Ops[0] = getConstant(Fold);
   1911       Ops.erase(Ops.begin()+1);  // Erase the folded element
   1912       if (Ops.size() == 1) return Ops[0];
   1913       LHSC = cast<SCEVConstant>(Ops[0]);
   1914     }
   1915 
   1916     // If we are left with a constant one being multiplied, strip it off.
   1917     if (cast<SCEVConstant>(Ops[0])->getValue()->equalsInt(1)) {
   1918       Ops.erase(Ops.begin());
   1919       --Idx;
   1920     } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
   1921       // If we have a multiply of zero, it will always be zero.
   1922       return Ops[0];
   1923     } else if (Ops[0]->isAllOnesValue()) {
   1924       // If we have a mul by -1 of an add, try distributing the -1 among the
   1925       // add operands.
   1926       if (Ops.size() == 2) {
   1927         if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
   1928           SmallVector<const SCEV *, 4> NewOps;
   1929           bool AnyFolded = false;
   1930           for (SCEVAddRecExpr::op_iterator I = Add->op_begin(),
   1931                  E = Add->op_end(); I != E; ++I) {
   1932             const SCEV *Mul = getMulExpr(Ops[0], *I);
   1933             if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
   1934             NewOps.push_back(Mul);
   1935           }
   1936           if (AnyFolded)
   1937             return getAddExpr(NewOps);
   1938         }
   1939         else if (const SCEVAddRecExpr *
   1940                  AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
   1941           // Negation preserves a recurrence's no self-wrap property.
   1942           SmallVector<const SCEV *, 4> Operands;
   1943           for (SCEVAddRecExpr::op_iterator I = AddRec->op_begin(),
   1944                  E = AddRec->op_end(); I != E; ++I) {
   1945             Operands.push_back(getMulExpr(Ops[0], *I));
   1946           }
   1947           return getAddRecExpr(Operands, AddRec->getLoop(),
   1948                                AddRec->getNoWrapFlags(SCEV::FlagNW));
   1949         }
   1950       }
   1951     }
   1952 
   1953     if (Ops.size() == 1)
   1954       return Ops[0];
   1955   }
   1956 
   1957   // Skip over the add expression until we get to a multiply.
   1958   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
   1959     ++Idx;
   1960 
   1961   // If there are mul operands inline them all into this expression.
   1962   if (Idx < Ops.size()) {
   1963     bool DeletedMul = false;
   1964     while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
   1965       // If we have an mul, expand the mul operands onto the end of the operands
   1966       // list.
   1967       Ops.erase(Ops.begin()+Idx);
   1968       Ops.append(Mul->op_begin(), Mul->op_end());
   1969       DeletedMul = true;
   1970     }
   1971 
   1972     // If we deleted at least one mul, we added operands to the end of the list,
   1973     // and they are not necessarily sorted.  Recurse to resort and resimplify
   1974     // any operands we just acquired.
   1975     if (DeletedMul)
   1976       return getMulExpr(Ops);
   1977   }
   1978 
   1979   // If there are any add recurrences in the operands list, see if any other
   1980   // added values are loop invariant.  If so, we can fold them into the
   1981   // recurrence.
   1982   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
   1983     ++Idx;
   1984 
   1985   // Scan over all recurrences, trying to fold loop invariants into them.
   1986   for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
   1987     // Scan all of the other operands to this mul and add them to the vector if
   1988     // they are loop invariant w.r.t. the recurrence.
   1989     SmallVector<const SCEV *, 8> LIOps;
   1990     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
   1991     const Loop *AddRecLoop = AddRec->getLoop();
   1992     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
   1993       if (isLoopInvariant(Ops[i], AddRecLoop)) {
   1994         LIOps.push_back(Ops[i]);
   1995         Ops.erase(Ops.begin()+i);
   1996         --i; --e;
   1997       }
   1998 
   1999     // If we found some loop invariants, fold them into the recurrence.
   2000     if (!LIOps.empty()) {
   2001       //  NLI * LI * {Start,+,Step}  -->  NLI * {LI*Start,+,LI*Step}
   2002       SmallVector<const SCEV *, 4> NewOps;
   2003       NewOps.reserve(AddRec->getNumOperands());
   2004       const SCEV *Scale = getMulExpr(LIOps);
   2005       for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
   2006         NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i)));
   2007 
   2008       // Build the new addrec. Propagate the NUW and NSW flags if both the
   2009       // outer mul and the inner addrec are guaranteed to have no overflow.
   2010       //
   2011       // No self-wrap cannot be guaranteed after changing the step size, but
   2012       // will be inferred if either NUW or NSW is true.
   2013       Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW));
   2014       const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags);
   2015 
   2016       // If all of the other operands were loop invariant, we are done.
   2017       if (Ops.size() == 1) return NewRec;
   2018 
   2019       // Otherwise, multiply the folded AddRec by the non-invariant parts.
   2020       for (unsigned i = 0;; ++i)
   2021         if (Ops[i] == AddRec) {
   2022           Ops[i] = NewRec;
   2023           break;
   2024         }
   2025       return getMulExpr(Ops);
   2026     }
   2027 
   2028     // Okay, if there weren't any loop invariants to be folded, check to see if
   2029     // there are multiple AddRec's with the same loop induction variable being
   2030     // multiplied together.  If so, we can fold them.
   2031     for (unsigned OtherIdx = Idx+1;
   2032          OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
   2033          ++OtherIdx) {
   2034       if (AddRecLoop != cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop())
   2035         continue;
   2036 
   2037       // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
   2038       // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
   2039       //       choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
   2040       //   ]]],+,...up to x=2n}.
   2041       // Note that the arguments to choose() are always integers with values
   2042       // known at compile time, never SCEV objects.
   2043       //
   2044       // The implementation avoids pointless extra computations when the two
   2045       // addrec's are of different length (mathematically, it's equivalent to
   2046       // an infinite stream of zeros on the right).
   2047       bool OpsModified = false;
   2048       for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
   2049            ++OtherIdx) {
   2050         const SCEVAddRecExpr *OtherAddRec =
   2051           dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
   2052         if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
   2053           continue;
   2054 
   2055         bool Overflow = false;
   2056         Type *Ty = AddRec->getType();
   2057         bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
   2058         SmallVector<const SCEV*, 7> AddRecOps;
   2059         for (int x = 0, xe = AddRec->getNumOperands() +
   2060                OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
   2061           const SCEV *Term = getConstant(Ty, 0);
   2062           for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
   2063             uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
   2064             for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
   2065                    ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
   2066                  z < ze && !Overflow; ++z) {
   2067               uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
   2068               uint64_t Coeff;
   2069               if (LargerThan64Bits)
   2070                 Coeff = umul_ov(Coeff1, Coeff2, Overflow);
   2071               else
   2072                 Coeff = Coeff1*Coeff2;
   2073               const SCEV *CoeffTerm = getConstant(Ty, Coeff);
   2074               const SCEV *Term1 = AddRec->getOperand(y-z);
   2075               const SCEV *Term2 = OtherAddRec->getOperand(z);
   2076               Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2));
   2077             }
   2078           }
   2079           AddRecOps.push_back(Term);
   2080         }
   2081         if (!Overflow) {
   2082           const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
   2083                                                 SCEV::FlagAnyWrap);
   2084           if (Ops.size() == 2) return NewAddRec;
   2085           Ops[Idx] = NewAddRec;
   2086           Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
   2087           OpsModified = true;
   2088           AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec);
   2089           if (!AddRec)
   2090             break;
   2091         }
   2092       }
   2093       if (OpsModified)
   2094         return getMulExpr(Ops);
   2095     }
   2096 
   2097     // Otherwise couldn't fold anything into this recurrence.  Move onto the
   2098     // next one.
   2099   }
   2100 
   2101   // Okay, it looks like we really DO need an mul expr.  Check to see if we
   2102   // already have one, otherwise create a new one.
   2103   FoldingSetNodeID ID;
   2104   ID.AddInteger(scMulExpr);
   2105   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
   2106     ID.AddPointer(Ops[i]);
   2107   void *IP = 0;
   2108   SCEVMulExpr *S =
   2109     static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
   2110   if (!S) {
   2111     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
   2112     std::uninitialized_copy(Ops.begin(), Ops.end(), O);
   2113     S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
   2114                                         O, Ops.size());
   2115     UniqueSCEVs.InsertNode(S, IP);
   2116   }
   2117   S->setNoWrapFlags(Flags);
   2118   return S;
   2119 }
   2120 
   2121 /// getUDivExpr - Get a canonical unsigned division expression, or something
   2122 /// simpler if possible.
   2123 const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
   2124                                          const SCEV *RHS) {
   2125   assert(getEffectiveSCEVType(LHS->getType()) ==
   2126          getEffectiveSCEVType(RHS->getType()) &&
   2127          "SCEVUDivExpr operand types don't match!");
   2128 
   2129   if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
   2130     if (RHSC->getValue()->equalsInt(1))
   2131       return LHS;                               // X udiv 1 --> x
   2132     // If the denominator is zero, the result of the udiv is undefined. Don't
   2133     // try to analyze it, because the resolution chosen here may differ from
   2134     // the resolution chosen in other parts of the compiler.
   2135     if (!RHSC->getValue()->isZero()) {
   2136       // Determine if the division can be folded into the operands of
   2137       // its operands.
   2138       // TODO: Generalize this to non-constants by using known-bits information.
   2139       Type *Ty = LHS->getType();
   2140       unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros();
   2141       unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
   2142       // For non-power-of-two values, effectively round the value up to the
   2143       // nearest power of two.
   2144       if (!RHSC->getValue()->getValue().isPowerOf2())
   2145         ++MaxShiftAmt;
   2146       IntegerType *ExtTy =
   2147         IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
   2148       if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
   2149         if (const SCEVConstant *Step =
   2150             dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) {
   2151           // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
   2152           const APInt &StepInt = Step->getValue()->getValue();
   2153           const APInt &DivInt = RHSC->getValue()->getValue();
   2154           if (!StepInt.urem(DivInt) &&
   2155               getZeroExtendExpr(AR, ExtTy) ==
   2156               getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
   2157                             getZeroExtendExpr(Step, ExtTy),
   2158                             AR->getLoop(), SCEV::FlagAnyWrap)) {
   2159             SmallVector<const SCEV *, 4> Operands;
   2160             for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
   2161               Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
   2162             return getAddRecExpr(Operands, AR->getLoop(),
   2163                                  SCEV::FlagNW);
   2164           }
   2165           /// Get a canonical UDivExpr for a recurrence.
   2166           /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
   2167           // We can currently only fold X%N if X is constant.
   2168           const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
   2169           if (StartC && !DivInt.urem(StepInt) &&
   2170               getZeroExtendExpr(AR, ExtTy) ==
   2171               getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
   2172                             getZeroExtendExpr(Step, ExtTy),
   2173                             AR->getLoop(), SCEV::FlagAnyWrap)) {
   2174             const APInt &StartInt = StartC->getValue()->getValue();
   2175             const APInt &StartRem = StartInt.urem(StepInt);
   2176             if (StartRem != 0)
   2177               LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step,
   2178                                   AR->getLoop(), SCEV::FlagNW);
   2179           }
   2180         }
   2181       // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
   2182       if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
   2183         SmallVector<const SCEV *, 4> Operands;
   2184         for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i)
   2185           Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy));
   2186         if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
   2187           // Find an operand that's safely divisible.
   2188           for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
   2189             const SCEV *Op = M->getOperand(i);
   2190             const SCEV *Div = getUDivExpr(Op, RHSC);
   2191             if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
   2192               Operands = SmallVector<const SCEV *, 4>(M->op_begin(),
   2193                                                       M->op_end());
   2194               Operands[i] = Div;
   2195               return getMulExpr(Operands);
   2196             }
   2197           }
   2198       }
   2199       // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
   2200       if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {
   2201         SmallVector<const SCEV *, 4> Operands;
   2202         for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
   2203           Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
   2204         if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
   2205           Operands.clear();
   2206           for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
   2207             const SCEV *Op = getUDivExpr(A->getOperand(i), RHS);
   2208             if (isa<SCEVUDivExpr>(Op) ||
   2209                 getMulExpr(Op, RHS) != A->getOperand(i))
   2210               break;
   2211             Operands.push_back(Op);
   2212           }
   2213           if (Operands.size() == A->getNumOperands())
   2214             return getAddExpr(Operands);
   2215         }
   2216       }
   2217 
   2218       // Fold if both operands are constant.
   2219       if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
   2220         Constant *LHSCV = LHSC->getValue();
   2221         Constant *RHSCV = RHSC->getValue();
   2222         return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
   2223                                                                    RHSCV)));
   2224       }
   2225     }
   2226   }
   2227 
   2228   FoldingSetNodeID ID;
   2229   ID.AddInteger(scUDivExpr);
   2230   ID.AddPointer(LHS);
   2231   ID.AddPointer(RHS);
   2232   void *IP = 0;
   2233   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   2234   SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
   2235                                              LHS, RHS);
   2236   UniqueSCEVs.InsertNode(S, IP);
   2237   return S;
   2238 }
   2239 
   2240 
   2241 /// getAddRecExpr - Get an add recurrence expression for the specified loop.
   2242 /// Simplify the expression as much as possible.
   2243 const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step,
   2244                                            const Loop *L,
   2245                                            SCEV::NoWrapFlags Flags) {
   2246   SmallVector<const SCEV *, 4> Operands;
   2247   Operands.push_back(Start);
   2248   if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
   2249     if (StepChrec->getLoop() == L) {
   2250       Operands.append(StepChrec->op_begin(), StepChrec->op_end());
   2251       return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW));
   2252     }
   2253 
   2254   Operands.push_back(Step);
   2255   return getAddRecExpr(Operands, L, Flags);
   2256 }
   2257 
   2258 /// getAddRecExpr - Get an add recurrence expression for the specified loop.
   2259 /// Simplify the expression as much as possible.
   2260 const SCEV *
   2261 ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
   2262                                const Loop *L, SCEV::NoWrapFlags Flags) {
   2263   if (Operands.size() == 1) return Operands[0];
   2264 #ifndef NDEBUG
   2265   Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
   2266   for (unsigned i = 1, e = Operands.size(); i != e; ++i)
   2267     assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
   2268            "SCEVAddRecExpr operand types don't match!");
   2269   for (unsigned i = 0, e = Operands.size(); i != e; ++i)
   2270     assert(isLoopInvariant(Operands[i], L) &&
   2271            "SCEVAddRecExpr operand is not loop-invariant!");
   2272 #endif
   2273 
   2274   if (Operands.back()->isZero()) {
   2275     Operands.pop_back();
   2276     return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0}  -->  X
   2277   }
   2278 
   2279   // It's tempting to want to call getMaxBackedgeTakenCount count here and
   2280   // use that information to infer NUW and NSW flags. However, computing a
   2281   // BE count requires calling getAddRecExpr, so we may not yet have a
   2282   // meaningful BE count at this point (and if we don't, we'd be stuck
   2283   // with a SCEVCouldNotCompute as the cached BE count).
   2284 
   2285   // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
   2286   // And vice-versa.
   2287   int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
   2288   SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
   2289   if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
   2290     bool All = true;
   2291     for (SmallVectorImpl<const SCEV *>::const_iterator I = Operands.begin(),
   2292          E = Operands.end(); I != E; ++I)
   2293       if (!isKnownNonNegative(*I)) {
   2294         All = false;
   2295         break;
   2296       }
   2297     if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
   2298   }
   2299 
   2300   // Canonicalize nested AddRecs in by nesting them in order of loop depth.
   2301   if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
   2302     const Loop *NestedLoop = NestedAR->getLoop();
   2303     if (L->contains(NestedLoop) ?
   2304         (L->getLoopDepth() < NestedLoop->getLoopDepth()) :
   2305         (!NestedLoop->contains(L) &&
   2306          DT->dominates(L->getHeader(), NestedLoop->getHeader()))) {
   2307       SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
   2308                                                   NestedAR->op_end());
   2309       Operands[0] = NestedAR->getStart();
   2310       // AddRecs require their operands be loop-invariant with respect to their
   2311       // loops. Don't perform this transformation if it would break this
   2312       // requirement.
   2313       bool AllInvariant = true;
   2314       for (unsigned i = 0, e = Operands.size(); i != e; ++i)
   2315         if (!isLoopInvariant(Operands[i], L)) {
   2316           AllInvariant = false;
   2317           break;
   2318         }
   2319       if (AllInvariant) {
   2320         // Create a recurrence for the outer loop with the same step size.
   2321         //
   2322         // The outer recurrence keeps its NW flag but only keeps NUW/NSW if the
   2323         // inner recurrence has the same property.
   2324         SCEV::NoWrapFlags OuterFlags =
   2325           maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags());
   2326 
   2327         NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags);
   2328         AllInvariant = true;
   2329         for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i)
   2330           if (!isLoopInvariant(NestedOperands[i], NestedLoop)) {
   2331             AllInvariant = false;
   2332             break;
   2333           }
   2334         if (AllInvariant) {
   2335           // Ok, both add recurrences are valid after the transformation.
   2336           //
   2337           // The inner recurrence keeps its NW flag but only keeps NUW/NSW if
   2338           // the outer recurrence has the same property.
   2339           SCEV::NoWrapFlags InnerFlags =
   2340             maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags);
   2341           return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags);
   2342         }
   2343       }
   2344       // Reset Operands to its original state.
   2345       Operands[0] = NestedAR;
   2346     }
   2347   }
   2348 
   2349   // Okay, it looks like we really DO need an addrec expr.  Check to see if we
   2350   // already have one, otherwise create a new one.
   2351   FoldingSetNodeID ID;
   2352   ID.AddInteger(scAddRecExpr);
   2353   for (unsigned i = 0, e = Operands.size(); i != e; ++i)
   2354     ID.AddPointer(Operands[i]);
   2355   ID.AddPointer(L);
   2356   void *IP = 0;
   2357   SCEVAddRecExpr *S =
   2358     static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
   2359   if (!S) {
   2360     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size());
   2361     std::uninitialized_copy(Operands.begin(), Operands.end(), O);
   2362     S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator),
   2363                                            O, Operands.size(), L);
   2364     UniqueSCEVs.InsertNode(S, IP);
   2365   }
   2366   S->setNoWrapFlags(Flags);
   2367   return S;
   2368 }
   2369 
   2370 const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
   2371                                          const SCEV *RHS) {
   2372   SmallVector<const SCEV *, 2> Ops;
   2373   Ops.push_back(LHS);
   2374   Ops.push_back(RHS);
   2375   return getSMaxExpr(Ops);
   2376 }
   2377 
   2378 const SCEV *
   2379 ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
   2380   assert(!Ops.empty() && "Cannot get empty smax!");
   2381   if (Ops.size() == 1) return Ops[0];
   2382 #ifndef NDEBUG
   2383   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
   2384   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
   2385     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
   2386            "SCEVSMaxExpr operand types don't match!");
   2387 #endif
   2388 
   2389   // Sort by complexity, this groups all similar expression types together.
   2390   GroupByComplexity(Ops, LI);
   2391 
   2392   // If there are any constants, fold them together.
   2393   unsigned Idx = 0;
   2394   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
   2395     ++Idx;
   2396     assert(Idx < Ops.size());
   2397     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
   2398       // We found two constants, fold them together!
   2399       ConstantInt *Fold = ConstantInt::get(getContext(),
   2400                               APIntOps::smax(LHSC->getValue()->getValue(),
   2401                                              RHSC->getValue()->getValue()));
   2402       Ops[0] = getConstant(Fold);
   2403       Ops.erase(Ops.begin()+1);  // Erase the folded element
   2404       if (Ops.size() == 1) return Ops[0];
   2405       LHSC = cast<SCEVConstant>(Ops[0]);
   2406     }
   2407 
   2408     // If we are left with a constant minimum-int, strip it off.
   2409     if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) {
   2410       Ops.erase(Ops.begin());
   2411       --Idx;
   2412     } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) {
   2413       // If we have an smax with a constant maximum-int, it will always be
   2414       // maximum-int.
   2415       return Ops[0];
   2416     }
   2417 
   2418     if (Ops.size() == 1) return Ops[0];
   2419   }
   2420 
   2421   // Find the first SMax
   2422   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr)
   2423     ++Idx;
   2424 
   2425   // Check to see if one of the operands is an SMax. If so, expand its operands
   2426   // onto our operand list, and recurse to simplify.
   2427   if (Idx < Ops.size()) {
   2428     bool DeletedSMax = false;
   2429     while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) {
   2430       Ops.erase(Ops.begin()+Idx);
   2431       Ops.append(SMax->op_begin(), SMax->op_end());
   2432       DeletedSMax = true;
   2433     }
   2434 
   2435     if (DeletedSMax)
   2436       return getSMaxExpr(Ops);
   2437   }
   2438 
   2439   // Okay, check to see if the same value occurs in the operand list twice.  If
   2440   // so, delete one.  Since we sorted the list, these values are required to
   2441   // be adjacent.
   2442   for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
   2443     //  X smax Y smax Y  -->  X smax Y
   2444     //  X smax Y         -->  X, if X is always greater than Y
   2445     if (Ops[i] == Ops[i+1] ||
   2446         isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) {
   2447       Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
   2448       --i; --e;
   2449     } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) {
   2450       Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
   2451       --i; --e;
   2452     }
   2453 
   2454   if (Ops.size() == 1) return Ops[0];
   2455 
   2456   assert(!Ops.empty() && "Reduced smax down to nothing!");
   2457 
   2458   // Okay, it looks like we really DO need an smax expr.  Check to see if we
   2459   // already have one, otherwise create a new one.
   2460   FoldingSetNodeID ID;
   2461   ID.AddInteger(scSMaxExpr);
   2462   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
   2463     ID.AddPointer(Ops[i]);
   2464   void *IP = 0;
   2465   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   2466   const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
   2467   std::uninitialized_copy(Ops.begin(), Ops.end(), O);
   2468   SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator),
   2469                                              O, Ops.size());
   2470   UniqueSCEVs.InsertNode(S, IP);
   2471   return S;
   2472 }
   2473 
   2474 const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS,
   2475                                          const SCEV *RHS) {
   2476   SmallVector<const SCEV *, 2> Ops;
   2477   Ops.push_back(LHS);
   2478   Ops.push_back(RHS);
   2479   return getUMaxExpr(Ops);
   2480 }
   2481 
   2482 const SCEV *
   2483 ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
   2484   assert(!Ops.empty() && "Cannot get empty umax!");
   2485   if (Ops.size() == 1) return Ops[0];
   2486 #ifndef NDEBUG
   2487   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
   2488   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
   2489     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
   2490            "SCEVUMaxExpr operand types don't match!");
   2491 #endif
   2492 
   2493   // Sort by complexity, this groups all similar expression types together.
   2494   GroupByComplexity(Ops, LI);
   2495 
   2496   // If there are any constants, fold them together.
   2497   unsigned Idx = 0;
   2498   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
   2499     ++Idx;
   2500     assert(Idx < Ops.size());
   2501     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
   2502       // We found two constants, fold them together!
   2503       ConstantInt *Fold = ConstantInt::get(getContext(),
   2504                               APIntOps::umax(LHSC->getValue()->getValue(),
   2505                                              RHSC->getValue()->getValue()));
   2506       Ops[0] = getConstant(Fold);
   2507       Ops.erase(Ops.begin()+1);  // Erase the folded element
   2508       if (Ops.size() == 1) return Ops[0];
   2509       LHSC = cast<SCEVConstant>(Ops[0]);
   2510     }
   2511 
   2512     // If we are left with a constant minimum-int, strip it off.
   2513     if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) {
   2514       Ops.erase(Ops.begin());
   2515       --Idx;
   2516     } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) {
   2517       // If we have an umax with a constant maximum-int, it will always be
   2518       // maximum-int.
   2519       return Ops[0];
   2520     }
   2521 
   2522     if (Ops.size() == 1) return Ops[0];
   2523   }
   2524 
   2525   // Find the first UMax
   2526   while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr)
   2527     ++Idx;
   2528 
   2529   // Check to see if one of the operands is a UMax. If so, expand its operands
   2530   // onto our operand list, and recurse to simplify.
   2531   if (Idx < Ops.size()) {
   2532     bool DeletedUMax = false;
   2533     while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) {
   2534       Ops.erase(Ops.begin()+Idx);
   2535       Ops.append(UMax->op_begin(), UMax->op_end());
   2536       DeletedUMax = true;
   2537     }
   2538 
   2539     if (DeletedUMax)
   2540       return getUMaxExpr(Ops);
   2541   }
   2542 
   2543   // Okay, check to see if the same value occurs in the operand list twice.  If
   2544   // so, delete one.  Since we sorted the list, these values are required to
   2545   // be adjacent.
   2546   for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
   2547     //  X umax Y umax Y  -->  X umax Y
   2548     //  X umax Y         -->  X, if X is always greater than Y
   2549     if (Ops[i] == Ops[i+1] ||
   2550         isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) {
   2551       Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
   2552       --i; --e;
   2553     } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) {
   2554       Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
   2555       --i; --e;
   2556     }
   2557 
   2558   if (Ops.size() == 1) return Ops[0];
   2559 
   2560   assert(!Ops.empty() && "Reduced umax down to nothing!");
   2561 
   2562   // Okay, it looks like we really DO need a umax expr.  Check to see if we
   2563   // already have one, otherwise create a new one.
   2564   FoldingSetNodeID ID;
   2565   ID.AddInteger(scUMaxExpr);
   2566   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
   2567     ID.AddPointer(Ops[i]);
   2568   void *IP = 0;
   2569   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   2570   const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
   2571   std::uninitialized_copy(Ops.begin(), Ops.end(), O);
   2572   SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator),
   2573                                              O, Ops.size());
   2574   UniqueSCEVs.InsertNode(S, IP);
   2575   return S;
   2576 }
   2577 
   2578 const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
   2579                                          const SCEV *RHS) {
   2580   // ~smax(~x, ~y) == smin(x, y).
   2581   return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
   2582 }
   2583 
   2584 const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
   2585                                          const SCEV *RHS) {
   2586   // ~umax(~x, ~y) == umin(x, y)
   2587   return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
   2588 }
   2589 
   2590 const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) {
   2591   // If we have DataLayout, we can bypass creating a target-independent
   2592   // constant expression and then folding it back into a ConstantInt.
   2593   // This is just a compile-time optimization.
   2594   if (TD)
   2595     return getConstant(TD->getIntPtrType(getContext()),
   2596                        TD->getTypeAllocSize(AllocTy));
   2597 
   2598   Constant *C = ConstantExpr::getSizeOf(AllocTy);
   2599   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
   2600     if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
   2601       C = Folded;
   2602   Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
   2603   return getTruncateOrZeroExtend(getSCEV(C), Ty);
   2604 }
   2605 
   2606 const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) {
   2607   Constant *C = ConstantExpr::getAlignOf(AllocTy);
   2608   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
   2609     if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
   2610       C = Folded;
   2611   Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
   2612   return getTruncateOrZeroExtend(getSCEV(C), Ty);
   2613 }
   2614 
   2615 const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy,
   2616                                              unsigned FieldNo) {
   2617   // If we have DataLayout, we can bypass creating a target-independent
   2618   // constant expression and then folding it back into a ConstantInt.
   2619   // This is just a compile-time optimization.
   2620   if (TD)
   2621     return getConstant(TD->getIntPtrType(getContext()),
   2622                        TD->getStructLayout(STy)->getElementOffset(FieldNo));
   2623 
   2624   Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
   2625   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
   2626     if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
   2627       C = Folded;
   2628   Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
   2629   return getTruncateOrZeroExtend(getSCEV(C), Ty);
   2630 }
   2631 
   2632 const SCEV *ScalarEvolution::getOffsetOfExpr(Type *CTy,
   2633                                              Constant *FieldNo) {
   2634   Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo);
   2635   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
   2636     if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
   2637       C = Folded;
   2638   Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy));
   2639   return getTruncateOrZeroExtend(getSCEV(C), Ty);
   2640 }
   2641 
   2642 const SCEV *ScalarEvolution::getUnknown(Value *V) {
   2643   // Don't attempt to do anything other than create a SCEVUnknown object
   2644   // here.  createSCEV only calls getUnknown after checking for all other
   2645   // interesting possibilities, and any other code that calls getUnknown
   2646   // is doing so in order to hide a value from SCEV canonicalization.
   2647 
   2648   FoldingSetNodeID ID;
   2649   ID.AddInteger(scUnknown);
   2650   ID.AddPointer(V);
   2651   void *IP = 0;
   2652   if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
   2653     assert(cast<SCEVUnknown>(S)->getValue() == V &&
   2654            "Stale SCEVUnknown in uniquing map!");
   2655     return S;
   2656   }
   2657   SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this,
   2658                                             FirstUnknown);
   2659   FirstUnknown = cast<SCEVUnknown>(S);
   2660   UniqueSCEVs.InsertNode(S, IP);
   2661   return S;
   2662 }
   2663 
   2664 //===----------------------------------------------------------------------===//
   2665 //            Basic SCEV Analysis and PHI Idiom Recognition Code
   2666 //
   2667 
   2668 /// isSCEVable - Test if values of the given type are analyzable within
   2669 /// the SCEV framework. This primarily includes integer types, and it
   2670 /// can optionally include pointer types if the ScalarEvolution class
   2671 /// has access to target-specific information.
   2672 bool ScalarEvolution::isSCEVable(Type *Ty) const {
   2673   // Integers and pointers are always SCEVable.
   2674   return Ty->isIntegerTy() || Ty->isPointerTy();
   2675 }
   2676 
   2677 /// getTypeSizeInBits - Return the size in bits of the specified type,
   2678 /// for which isSCEVable must return true.
   2679 uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
   2680   assert(isSCEVable(Ty) && "Type is not SCEVable!");
   2681 
   2682   // If we have a DataLayout, use it!
   2683   if (TD)
   2684     return TD->getTypeSizeInBits(Ty);
   2685 
   2686   // Integer types have fixed sizes.
   2687   if (Ty->isIntegerTy())
   2688     return Ty->getPrimitiveSizeInBits();
   2689 
   2690   // The only other support type is pointer. Without DataLayout, conservatively
   2691   // assume pointers are 64-bit.
   2692   assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!");
   2693   return 64;
   2694 }
   2695 
   2696 /// getEffectiveSCEVType - Return a type with the same bitwidth as
   2697 /// the given type and which represents how SCEV will treat the given
   2698 /// type, for which isSCEVable must return true. For pointer types,
   2699 /// this is the pointer-sized integer type.
   2700 Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
   2701   assert(isSCEVable(Ty) && "Type is not SCEVable!");
   2702 
   2703   if (Ty->isIntegerTy())
   2704     return Ty;
   2705 
   2706   // The only other support type is pointer.
   2707   assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
   2708   if (TD) return TD->getIntPtrType(getContext());
   2709 
   2710   // Without DataLayout, conservatively assume pointers are 64-bit.
   2711   return Type::getInt64Ty(getContext());
   2712 }
   2713 
   2714 const SCEV *ScalarEvolution::getCouldNotCompute() {
   2715   return &CouldNotCompute;
   2716 }
   2717 
   2718 /// getSCEV - Return an existing SCEV if it exists, otherwise analyze the
   2719 /// expression and create a new one.
   2720 const SCEV *ScalarEvolution::getSCEV(Value *V) {
   2721   assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
   2722 
   2723   ValueExprMapType::const_iterator I = ValueExprMap.find_as(V);
   2724   if (I != ValueExprMap.end()) return I->second;
   2725   const SCEV *S = createSCEV(V);
   2726 
   2727   // The process of creating a SCEV for V may have caused other SCEVs
   2728   // to have been created, so it's necessary to insert the new entry
   2729   // from scratch, rather than trying to remember the insert position
   2730   // above.
   2731   ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
   2732   return S;
   2733 }
   2734 
   2735 /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
   2736 ///
   2737 const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) {
   2738   if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
   2739     return getConstant(
   2740                cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
   2741 
   2742   Type *Ty = V->getType();
   2743   Ty = getEffectiveSCEVType(Ty);
   2744   return getMulExpr(V,
   2745                   getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))));
   2746 }
   2747 
   2748 /// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
   2749 const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
   2750   if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
   2751     return getConstant(
   2752                 cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
   2753 
   2754   Type *Ty = V->getType();
   2755   Ty = getEffectiveSCEVType(Ty);
   2756   const SCEV *AllOnes =
   2757                    getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)));
   2758   return getMinusSCEV(AllOnes, V);
   2759 }
   2760 
   2761 /// getMinusSCEV - Return LHS-RHS.  Minus is represented in SCEV as A+B*-1.
   2762 const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
   2763                                           SCEV::NoWrapFlags Flags) {
   2764   assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW");
   2765 
   2766   // Fast path: X - X --> 0.
   2767   if (LHS == RHS)
   2768     return getConstant(LHS->getType(), 0);
   2769 
   2770   // X - Y --> X + -Y
   2771   return getAddExpr(LHS, getNegativeSCEV(RHS), Flags);
   2772 }
   2773 
   2774 /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
   2775 /// input value to the specified type.  If the type must be extended, it is zero
   2776 /// extended.
   2777 const SCEV *
   2778 ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) {
   2779   Type *SrcTy = V->getType();
   2780   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
   2781          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
   2782          "Cannot truncate or zero extend with non-integer arguments!");
   2783   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
   2784     return V;  // No conversion
   2785   if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
   2786     return getTruncateExpr(V, Ty);
   2787   return getZeroExtendExpr(V, Ty);
   2788 }
   2789 
   2790 /// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion of the
   2791 /// input value to the specified type.  If the type must be extended, it is sign
   2792 /// extended.
   2793 const SCEV *
   2794 ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
   2795                                          Type *Ty) {
   2796   Type *SrcTy = V->getType();
   2797   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
   2798          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
   2799          "Cannot truncate or zero extend with non-integer arguments!");
   2800   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
   2801     return V;  // No conversion
   2802   if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
   2803     return getTruncateExpr(V, Ty);
   2804   return getSignExtendExpr(V, Ty);
   2805 }
   2806 
   2807 /// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of the
   2808 /// input value to the specified type.  If the type must be extended, it is zero
   2809 /// extended.  The conversion must not be narrowing.
   2810 const SCEV *
   2811 ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) {
   2812   Type *SrcTy = V->getType();
   2813   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
   2814          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
   2815          "Cannot noop or zero extend with non-integer arguments!");
   2816   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
   2817          "getNoopOrZeroExtend cannot truncate!");
   2818   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
   2819     return V;  // No conversion
   2820   return getZeroExtendExpr(V, Ty);
   2821 }
   2822 
   2823 /// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the
   2824 /// input value to the specified type.  If the type must be extended, it is sign
   2825 /// extended.  The conversion must not be narrowing.
   2826 const SCEV *
   2827 ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) {
   2828   Type *SrcTy = V->getType();
   2829   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
   2830          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
   2831          "Cannot noop or sign extend with non-integer arguments!");
   2832   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
   2833          "getNoopOrSignExtend cannot truncate!");
   2834   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
   2835     return V;  // No conversion
   2836   return getSignExtendExpr(V, Ty);
   2837 }
   2838 
   2839 /// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of
   2840 /// the input value to the specified type. If the type must be extended,
   2841 /// it is extended with unspecified bits. The conversion must not be
   2842 /// narrowing.
   2843 const SCEV *
   2844 ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) {
   2845   Type *SrcTy = V->getType();
   2846   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
   2847          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
   2848          "Cannot noop or any extend with non-integer arguments!");
   2849   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
   2850          "getNoopOrAnyExtend cannot truncate!");
   2851   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
   2852     return V;  // No conversion
   2853   return getAnyExtendExpr(V, Ty);
   2854 }
   2855 
   2856 /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
   2857 /// input value to the specified type.  The conversion must not be widening.
   2858 const SCEV *
   2859 ScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) {
   2860   Type *SrcTy = V->getType();
   2861   assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
   2862          (Ty->isIntegerTy() || Ty->isPointerTy()) &&
   2863          "Cannot truncate or noop with non-integer arguments!");
   2864   assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
   2865          "getTruncateOrNoop cannot extend!");
   2866   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
   2867     return V;  // No conversion
   2868   return getTruncateExpr(V, Ty);
   2869 }
   2870 
   2871 /// getUMaxFromMismatchedTypes - Promote the operands to the wider of
   2872 /// the types using zero-extension, and then perform a umax operation
   2873 /// with them.
   2874 const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS,
   2875                                                         const SCEV *RHS) {
   2876   const SCEV *PromotedLHS = LHS;
   2877   const SCEV *PromotedRHS = RHS;
   2878 
   2879   if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
   2880     PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
   2881   else
   2882     PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
   2883 
   2884   return getUMaxExpr(PromotedLHS, PromotedRHS);
   2885 }
   2886 
   2887 /// getUMinFromMismatchedTypes - Promote the operands to the wider of
   2888 /// the types using zero-extension, and then perform a umin operation
   2889 /// with them.
   2890 const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS,
   2891                                                         const SCEV *RHS) {
   2892   const SCEV *PromotedLHS = LHS;
   2893   const SCEV *PromotedRHS = RHS;
   2894 
   2895   if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
   2896     PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
   2897   else
   2898     PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
   2899 
   2900   return getUMinExpr(PromotedLHS, PromotedRHS);
   2901 }
   2902 
   2903 /// getPointerBase - Transitively follow the chain of pointer-type operands
   2904 /// until reaching a SCEV that does not have a single pointer operand. This
   2905 /// returns a SCEVUnknown pointer for well-formed pointer-type expressions,
   2906 /// but corner cases do exist.
   2907 const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
   2908   // A pointer operand may evaluate to a nonpointer expression, such as null.
   2909   if (!V->getType()->isPointerTy())
   2910     return V;
   2911 
   2912   if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) {
   2913     return getPointerBase(Cast->getOperand());
   2914   }
   2915   else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
   2916     const SCEV *PtrOp = 0;
   2917     for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
   2918          I != E; ++I) {
   2919       if ((*I)->getType()->isPointerTy()) {
   2920         // Cannot find the base of an expression with multiple pointer operands.
   2921         if (PtrOp)
   2922           return V;
   2923         PtrOp = *I;
   2924       }
   2925     }
   2926     if (!PtrOp)
   2927       return V;
   2928     return getPointerBase(PtrOp);
   2929   }
   2930   return V;
   2931 }
   2932 
   2933 /// PushDefUseChildren - Push users of the given Instruction
   2934 /// onto the given Worklist.
   2935 static void
   2936 PushDefUseChildren(Instruction *I,
   2937                    SmallVectorImpl<Instruction *> &Worklist) {
   2938   // Push the def-use children onto the Worklist stack.
   2939   for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
   2940        UI != UE; ++UI)
   2941     Worklist.push_back(cast<Instruction>(*UI));
   2942 }
   2943 
   2944 /// ForgetSymbolicValue - This looks up computed SCEV values for all
   2945 /// instructions that depend on the given instruction and removes them from
   2946 /// the ValueExprMapType map if they reference SymName. This is used during PHI
   2947 /// resolution.
   2948 void
   2949 ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
   2950   SmallVector<Instruction *, 16> Worklist;
   2951   PushDefUseChildren(PN, Worklist);
   2952 
   2953   SmallPtrSet<Instruction *, 8> Visited;
   2954   Visited.insert(PN);
   2955   while (!Worklist.empty()) {
   2956     Instruction *I = Worklist.pop_back_val();
   2957     if (!Visited.insert(I)) continue;
   2958 
   2959     ValueExprMapType::iterator It =
   2960       ValueExprMap.find_as(static_cast<Value *>(I));
   2961     if (It != ValueExprMap.end()) {
   2962       const SCEV *Old = It->second;
   2963 
   2964       // Short-circuit the def-use traversal if the symbolic name
   2965       // ceases to appear in expressions.
   2966       if (Old != SymName && !hasOperand(Old, SymName))
   2967         continue;
   2968 
   2969       // SCEVUnknown for a PHI either means that it has an unrecognized
   2970       // structure, it's a PHI that's in the progress of being computed
   2971       // by createNodeForPHI, or it's a single-value PHI. In the first case,
   2972       // additional loop trip count information isn't going to change anything.
   2973       // In the second case, createNodeForPHI will perform the necessary
   2974       // updates on its own when it gets to that point. In the third, we do
   2975       // want to forget the SCEVUnknown.
   2976       if (!isa<PHINode>(I) ||
   2977           !isa<SCEVUnknown>(Old) ||
   2978           (I != PN && Old == SymName)) {
   2979         forgetMemoizedResults(Old);
   2980         ValueExprMap.erase(It);
   2981       }
   2982     }
   2983 
   2984     PushDefUseChildren(I, Worklist);
   2985   }
   2986 }
   2987 
   2988 /// createNodeForPHI - PHI nodes have two cases.  Either the PHI node exists in
   2989 /// a loop header, making it a potential recurrence, or it doesn't.
   2990 ///
   2991 const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
   2992   if (const Loop *L = LI->getLoopFor(PN->getParent()))
   2993     if (L->getHeader() == PN->getParent()) {
   2994       // The loop may have multiple entrances or multiple exits; we can analyze
   2995       // this phi as an addrec if it has a unique entry value and a unique
   2996       // backedge value.
   2997       Value *BEValueV = 0, *StartValueV = 0;
   2998       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
   2999         Value *V = PN->getIncomingValue(i);
   3000         if (L->contains(PN->getIncomingBlock(i))) {
   3001           if (!BEValueV) {
   3002             BEValueV = V;
   3003           } else if (BEValueV != V) {
   3004             BEValueV = 0;
   3005             break;
   3006           }
   3007         } else if (!StartValueV) {
   3008           StartValueV = V;
   3009         } else if (StartValueV != V) {
   3010           StartValueV = 0;
   3011           break;
   3012         }
   3013       }
   3014       if (BEValueV && StartValueV) {
   3015         // While we are analyzing this PHI node, handle its value symbolically.
   3016         const SCEV *SymbolicName = getUnknown(PN);
   3017         assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
   3018                "PHI node already processed?");
   3019         ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
   3020 
   3021         // Using this symbolic name for the PHI, analyze the value coming around
   3022         // the back-edge.
   3023         const SCEV *BEValue = getSCEV(BEValueV);
   3024 
   3025         // NOTE: If BEValue is loop invariant, we know that the PHI node just
   3026         // has a special value for the first iteration of the loop.
   3027 
   3028         // If the value coming around the backedge is an add with the symbolic
   3029         // value we just inserted, then we found a simple induction variable!
   3030         if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
   3031           // If there is a single occurrence of the symbolic value, replace it
   3032           // with a recurrence.
   3033           unsigned FoundIndex = Add->getNumOperands();
   3034           for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
   3035             if (Add->getOperand(i) == SymbolicName)
   3036               if (FoundIndex == e) {
   3037                 FoundIndex = i;
   3038                 break;
   3039               }
   3040 
   3041           if (FoundIndex != Add->getNumOperands()) {
   3042             // Create an add with everything but the specified operand.
   3043             SmallVector<const SCEV *, 8> Ops;
   3044             for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
   3045               if (i != FoundIndex)
   3046                 Ops.push_back(Add->getOperand(i));
   3047             const SCEV *Accum = getAddExpr(Ops);
   3048 
   3049             // This is not a valid addrec if the step amount is varying each
   3050             // loop iteration, but is not itself an addrec in this loop.
   3051             if (isLoopInvariant(Accum, L) ||
   3052                 (isa<SCEVAddRecExpr>(Accum) &&
   3053                  cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
   3054               SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
   3055 
   3056               // If the increment doesn't overflow, then neither the addrec nor
   3057               // the post-increment will overflow.
   3058               if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
   3059                 if (OBO->hasNoUnsignedWrap())
   3060                   Flags = setFlags(Flags, SCEV::FlagNUW);
   3061                 if (OBO->hasNoSignedWrap())
   3062                   Flags = setFlags(Flags, SCEV::FlagNSW);
   3063               } else if (const GEPOperator *GEP =
   3064                          dyn_cast<GEPOperator>(BEValueV)) {
   3065                 // If the increment is an inbounds GEP, then we know the address
   3066                 // space cannot be wrapped around. We cannot make any guarantee
   3067                 // about signed or unsigned overflow because pointers are
   3068                 // unsigned but we may have a negative index from the base
   3069                 // pointer.
   3070                 if (GEP->isInBounds())
   3071                   Flags = setFlags(Flags, SCEV::FlagNW);
   3072               }
   3073 
   3074               const SCEV *StartVal = getSCEV(StartValueV);
   3075               const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
   3076 
   3077               // Since the no-wrap flags are on the increment, they apply to the
   3078               // post-incremented value as well.
   3079               if (isLoopInvariant(Accum, L))
   3080                 (void)getAddRecExpr(getAddExpr(StartVal, Accum),
   3081                                     Accum, L, Flags);
   3082 
   3083               // Okay, for the entire analysis of this edge we assumed the PHI
   3084               // to be symbolic.  We now need to go back and purge all of the
   3085               // entries for the scalars that use the symbolic expression.
   3086               ForgetSymbolicName(PN, SymbolicName);
   3087               ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
   3088               return PHISCEV;
   3089             }
   3090           }
   3091         } else if (const SCEVAddRecExpr *AddRec =
   3092                      dyn_cast<SCEVAddRecExpr>(BEValue)) {
   3093           // Otherwise, this could be a loop like this:
   3094           //     i = 0;  for (j = 1; ..; ++j) { ....  i = j; }
   3095           // In this case, j = {1,+,1}  and BEValue is j.
   3096           // Because the other in-value of i (0) fits the evolution of BEValue
   3097           // i really is an addrec evolution.
   3098           if (AddRec->getLoop() == L && AddRec->isAffine()) {
   3099             const SCEV *StartVal = getSCEV(StartValueV);
   3100 
   3101             // If StartVal = j.start - j.stride, we can use StartVal as the
   3102             // initial step of the addrec evolution.
   3103             if (StartVal == getMinusSCEV(AddRec->getOperand(0),
   3104                                          AddRec->getOperand(1))) {
   3105               // FIXME: For constant StartVal, we should be able to infer
   3106               // no-wrap flags.
   3107               const SCEV *PHISCEV =
   3108                 getAddRecExpr(StartVal, AddRec->getOperand(1), L,
   3109                               SCEV::FlagAnyWrap);
   3110 
   3111               // Okay, for the entire analysis of this edge we assumed the PHI
   3112               // to be symbolic.  We now need to go back and purge all of the
   3113               // entries for the scalars that use the symbolic expression.
   3114               ForgetSymbolicName(PN, SymbolicName);
   3115               ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
   3116               return PHISCEV;
   3117             }
   3118           }
   3119         }
   3120       }
   3121     }
   3122 
   3123   // If the PHI has a single incoming value, follow that value, unless the
   3124   // PHI's incoming blocks are in a different loop, in which case doing so
   3125   // risks breaking LCSSA form. Instcombine would normally zap these, but
   3126   // it doesn't have DominatorTree information, so it may miss cases.
   3127   if (Value *V = SimplifyInstruction(PN, TD, TLI, DT))
   3128     if (LI->replacementPreservesLCSSAForm(PN, V))
   3129       return getSCEV(V);
   3130 
   3131   // If it's not a loop phi, we can't handle it yet.
   3132   return getUnknown(PN);
   3133 }
   3134 
   3135 /// createNodeForGEP - Expand GEP instructions into add and multiply
   3136 /// operations. This allows them to be analyzed by regular SCEV code.
   3137 ///
   3138 const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
   3139 
   3140   // Don't blindly transfer the inbounds flag from the GEP instruction to the
   3141   // Add expression, because the Instruction may be guarded by control flow
   3142   // and the no-overflow bits may not be valid for the expression in any
   3143   // context.
   3144   bool isInBounds = GEP->isInBounds();
   3145 
   3146   Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
   3147   Value *Base = GEP->getOperand(0);
   3148   // Don't attempt to analyze GEPs over unsized objects.
   3149   if (!cast<PointerType>(Base->getType())->getElementType()->isSized())
   3150     return getUnknown(GEP);
   3151   const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
   3152   gep_type_iterator GTI = gep_type_begin(GEP);
   3153   for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()),
   3154                                       E = GEP->op_end();
   3155        I != E; ++I) {
   3156     Value *Index = *I;
   3157     // Compute the (potentially symbolic) offset in bytes for this index.
   3158     if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
   3159       // For a struct, add the member offset.
   3160       unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
   3161       const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo);
   3162 
   3163       // Add the field offset to the running total offset.
   3164       TotalOffset = getAddExpr(TotalOffset, FieldOffset);
   3165     } else {
   3166       // For an array, add the element offset, explicitly scaled.
   3167       const SCEV *ElementSize = getSizeOfExpr(*GTI);
   3168       const SCEV *IndexS = getSCEV(Index);
   3169       // Getelementptr indices are signed.
   3170       IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy);
   3171 
   3172       // Multiply the index by the element size to compute the element offset.
   3173       const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize,
   3174                                            isInBounds ? SCEV::FlagNSW :
   3175                                            SCEV::FlagAnyWrap);
   3176 
   3177       // Add the element offset to the running total offset.
   3178       TotalOffset = getAddExpr(TotalOffset, LocalOffset);
   3179     }
   3180   }
   3181 
   3182   // Get the SCEV for the GEP base.
   3183   const SCEV *BaseS = getSCEV(Base);
   3184 
   3185   // Add the total offset from all the GEP indices to the base.
   3186   return getAddExpr(BaseS, TotalOffset,
   3187                     isInBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap);
   3188 }
   3189 
   3190 /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
   3191 /// guaranteed to end in (at every loop iteration).  It is, at the same time,
   3192 /// the minimum number of times S is divisible by 2.  For example, given {4,+,8}
   3193 /// it returns 2.  If S is guaranteed to be 0, it returns the bitwidth of S.
   3194 uint32_t
   3195 ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
   3196   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
   3197     return C->getValue()->getValue().countTrailingZeros();
   3198 
   3199   if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
   3200     return std::min(GetMinTrailingZeros(T->getOperand()),
   3201                     (uint32_t)getTypeSizeInBits(T->getType()));
   3202 
   3203   if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) {
   3204     uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
   3205     return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
   3206              getTypeSizeInBits(E->getType()) : OpRes;
   3207   }
   3208 
   3209   if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) {
   3210     uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
   3211     return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
   3212              getTypeSizeInBits(E->getType()) : OpRes;
   3213   }
   3214 
   3215   if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
   3216     // The result is the min of all operands results.
   3217     uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
   3218     for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
   3219       MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
   3220     return MinOpRes;
   3221   }
   3222 
   3223   if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
   3224     // The result is the sum of all operands results.
   3225     uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0));
   3226     uint32_t BitWidth = getTypeSizeInBits(M->getType());
   3227     for (unsigned i = 1, e = M->getNumOperands();
   3228          SumOpRes != BitWidth && i != e; ++i)
   3229       SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)),
   3230                           BitWidth);
   3231     return SumOpRes;
   3232   }
   3233 
   3234   if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
   3235     // The result is the min of all operands results.
   3236     uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
   3237     for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
   3238       MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
   3239     return MinOpRes;
   3240   }
   3241 
   3242   if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) {
   3243     // The result is the min of all operands results.
   3244     uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
   3245     for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
   3246       MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
   3247     return MinOpRes;
   3248   }
   3249 
   3250   if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) {
   3251     // The result is the min of all operands results.
   3252     uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
   3253     for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
   3254       MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
   3255     return MinOpRes;
   3256   }
   3257 
   3258   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
   3259     // For a SCEVUnknown, ask ValueTracking.
   3260     unsigned BitWidth = getTypeSizeInBits(U->getType());
   3261     APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
   3262     ComputeMaskedBits(U->getValue(), Zeros, Ones);
   3263     return Zeros.countTrailingOnes();
   3264   }
   3265 
   3266   // SCEVUDivExpr
   3267   return 0;
   3268 }
   3269 
   3270 /// getUnsignedRange - Determine the unsigned range for a particular SCEV.
   3271 ///
   3272 ConstantRange
   3273 ScalarEvolution::getUnsignedRange(const SCEV *S) {
   3274   // See if we've computed this range already.
   3275   DenseMap<const SCEV *, ConstantRange>::iterator I = UnsignedRanges.find(S);
   3276   if (I != UnsignedRanges.end())
   3277     return I->second;
   3278 
   3279   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
   3280     return setUnsignedRange(C, ConstantRange(C->getValue()->getValue()));
   3281 
   3282   unsigned BitWidth = getTypeSizeInBits(S->getType());
   3283   ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
   3284 
   3285   // If the value has known zeros, the maximum unsigned value will have those
   3286   // known zeros as well.
   3287   uint32_t TZ = GetMinTrailingZeros(S);
   3288   if (TZ != 0)
   3289     ConservativeResult =
   3290       ConstantRange(APInt::getMinValue(BitWidth),
   3291                     APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
   3292 
   3293   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
   3294     ConstantRange X = getUnsignedRange(Add->getOperand(0));
   3295     for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
   3296       X = X.add(getUnsignedRange(Add->getOperand(i)));
   3297     return setUnsignedRange(Add, ConservativeResult.intersectWith(X));
   3298   }
   3299 
   3300   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
   3301     ConstantRange X = getUnsignedRange(Mul->getOperand(0));
   3302     for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
   3303       X = X.multiply(getUnsignedRange(Mul->getOperand(i)));
   3304     return setUnsignedRange(Mul, ConservativeResult.intersectWith(X));
   3305   }
   3306 
   3307   if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
   3308     ConstantRange X = getUnsignedRange(SMax->getOperand(0));
   3309     for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
   3310       X = X.smax(getUnsignedRange(SMax->getOperand(i)));
   3311     return setUnsignedRange(SMax, ConservativeResult.intersectWith(X));
   3312   }
   3313 
   3314   if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
   3315     ConstantRange X = getUnsignedRange(UMax->getOperand(0));
   3316     for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
   3317       X = X.umax(getUnsignedRange(UMax->getOperand(i)));
   3318     return setUnsignedRange(UMax, ConservativeResult.intersectWith(X));
   3319   }
   3320 
   3321   if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
   3322     ConstantRange X = getUnsignedRange(UDiv->getLHS());
   3323     ConstantRange Y = getUnsignedRange(UDiv->getRHS());
   3324     return setUnsignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
   3325   }
   3326 
   3327   if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
   3328     ConstantRange X = getUnsignedRange(ZExt->getOperand());
   3329     return setUnsignedRange(ZExt,
   3330       ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
   3331   }
   3332 
   3333   if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
   3334     ConstantRange X = getUnsignedRange(SExt->getOperand());
   3335     return setUnsignedRange(SExt,
   3336       ConservativeResult.intersectWith(X.signExtend(BitWidth)));
   3337   }
   3338 
   3339   if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
   3340     ConstantRange X = getUnsignedRange(Trunc->getOperand());
   3341     return setUnsignedRange(Trunc,
   3342       ConservativeResult.intersectWith(X.truncate(BitWidth)));
   3343   }
   3344 
   3345   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
   3346     // If there's no unsigned wrap, the value will never be less than its
   3347     // initial value.
   3348     if (AddRec->getNoWrapFlags(SCEV::FlagNUW))
   3349       if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
   3350         if (!C->getValue()->isZero())
   3351           ConservativeResult =
   3352             ConservativeResult.intersectWith(
   3353               ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0)));
   3354 
   3355     // TODO: non-affine addrec
   3356     if (AddRec->isAffine()) {
   3357       Type *Ty = AddRec->getType();
   3358       const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
   3359       if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
   3360           getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
   3361         MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
   3362 
   3363         const SCEV *Start = AddRec->getStart();
   3364         const SCEV *Step = AddRec->getStepRecurrence(*this);
   3365 
   3366         ConstantRange StartRange = getUnsignedRange(Start);
   3367         ConstantRange StepRange = getSignedRange(Step);
   3368         ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
   3369         ConstantRange EndRange =
   3370           StartRange.add(MaxBECountRange.multiply(StepRange));
   3371 
   3372         // Check for overflow. This must be done with ConstantRange arithmetic
   3373         // because we could be called from within the ScalarEvolution overflow
   3374         // checking code.
   3375         ConstantRange ExtStartRange = StartRange.zextOrTrunc(BitWidth*2+1);
   3376         ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
   3377         ConstantRange ExtMaxBECountRange =
   3378           MaxBECountRange.zextOrTrunc(BitWidth*2+1);
   3379         ConstantRange ExtEndRange = EndRange.zextOrTrunc(BitWidth*2+1);
   3380         if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
   3381             ExtEndRange)
   3382           return setUnsignedRange(AddRec, ConservativeResult);
   3383 
   3384         APInt Min = APIntOps::umin(StartRange.getUnsignedMin(),
   3385                                    EndRange.getUnsignedMin());
   3386         APInt Max = APIntOps::umax(StartRange.getUnsignedMax(),
   3387                                    EndRange.getUnsignedMax());
   3388         if (Min.isMinValue() && Max.isMaxValue())
   3389           return setUnsignedRange(AddRec, ConservativeResult);
   3390         return setUnsignedRange(AddRec,
   3391           ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
   3392       }
   3393     }
   3394 
   3395     return setUnsignedRange(AddRec, ConservativeResult);
   3396   }
   3397 
   3398   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
   3399     // For a SCEVUnknown, ask ValueTracking.
   3400     APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
   3401     ComputeMaskedBits(U->getValue(), Zeros, Ones, TD);
   3402     if (Ones == ~Zeros + 1)
   3403       return setUnsignedRange(U, ConservativeResult);
   3404     return setUnsignedRange(U,
   3405       ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)));
   3406   }
   3407 
   3408   return setUnsignedRange(S, ConservativeResult);
   3409 }
   3410 
   3411 /// getSignedRange - Determine the signed range for a particular SCEV.
   3412 ///
   3413 ConstantRange
   3414 ScalarEvolution::getSignedRange(const SCEV *S) {
   3415   // See if we've computed this range already.
   3416   DenseMap<const SCEV *, ConstantRange>::iterator I = SignedRanges.find(S);
   3417   if (I != SignedRanges.end())
   3418     return I->second;
   3419 
   3420   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
   3421     return setSignedRange(C, ConstantRange(C->getValue()->getValue()));
   3422 
   3423   unsigned BitWidth = getTypeSizeInBits(S->getType());
   3424   ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
   3425 
   3426   // If the value has known zeros, the maximum signed value will have those
   3427   // known zeros as well.
   3428   uint32_t TZ = GetMinTrailingZeros(S);
   3429   if (TZ != 0)
   3430     ConservativeResult =
   3431       ConstantRange(APInt::getSignedMinValue(BitWidth),
   3432                     APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
   3433 
   3434   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
   3435     ConstantRange X = getSignedRange(Add->getOperand(0));
   3436     for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
   3437       X = X.add(getSignedRange(Add->getOperand(i)));
   3438     return setSignedRange(Add, ConservativeResult.intersectWith(X));
   3439   }
   3440 
   3441   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
   3442     ConstantRange X = getSignedRange(Mul->getOperand(0));
   3443     for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
   3444       X = X.multiply(getSignedRange(Mul->getOperand(i)));
   3445     return setSignedRange(Mul, ConservativeResult.intersectWith(X));
   3446   }
   3447 
   3448   if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
   3449     ConstantRange X = getSignedRange(SMax->getOperand(0));
   3450     for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
   3451       X = X.smax(getSignedRange(SMax->getOperand(i)));
   3452     return setSignedRange(SMax, ConservativeResult.intersectWith(X));
   3453   }
   3454 
   3455   if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
   3456     ConstantRange X = getSignedRange(UMax->getOperand(0));
   3457     for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
   3458       X = X.umax(getSignedRange(UMax->getOperand(i)));
   3459     return setSignedRange(UMax, ConservativeResult.intersectWith(X));
   3460   }
   3461 
   3462   if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
   3463     ConstantRange X = getSignedRange(UDiv->getLHS());
   3464     ConstantRange Y = getSignedRange(UDiv->getRHS());
   3465     return setSignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
   3466   }
   3467 
   3468   if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
   3469     ConstantRange X = getSignedRange(ZExt->getOperand());
   3470     return setSignedRange(ZExt,
   3471       ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
   3472   }
   3473 
   3474   if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
   3475     ConstantRange X = getSignedRange(SExt->getOperand());
   3476     return setSignedRange(SExt,
   3477       ConservativeResult.intersectWith(X.signExtend(BitWidth)));
   3478   }
   3479 
   3480   if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
   3481     ConstantRange X = getSignedRange(Trunc->getOperand());
   3482     return setSignedRange(Trunc,
   3483       ConservativeResult.intersectWith(X.truncate(BitWidth)));
   3484   }
   3485 
   3486   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
   3487     // If there's no signed wrap, and all the operands have the same sign or
   3488     // zero, the value won't ever change sign.
   3489     if (AddRec->getNoWrapFlags(SCEV::FlagNSW)) {
   3490       bool AllNonNeg = true;
   3491       bool AllNonPos = true;
   3492       for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
   3493         if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false;
   3494         if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false;
   3495       }
   3496       if (AllNonNeg)
   3497         ConservativeResult = ConservativeResult.intersectWith(
   3498           ConstantRange(APInt(BitWidth, 0),
   3499                         APInt::getSignedMinValue(BitWidth)));
   3500       else if (AllNonPos)
   3501         ConservativeResult = ConservativeResult.intersectWith(
   3502           ConstantRange(APInt::getSignedMinValue(BitWidth),
   3503                         APInt(BitWidth, 1)));
   3504     }
   3505 
   3506     // TODO: non-affine addrec
   3507     if (AddRec->isAffine()) {
   3508       Type *Ty = AddRec->getType();
   3509       const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
   3510       if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
   3511           getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
   3512         MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
   3513 
   3514         const SCEV *Start = AddRec->getStart();
   3515         const SCEV *Step = AddRec->getStepRecurrence(*this);
   3516 
   3517         ConstantRange StartRange = getSignedRange(Start);
   3518         ConstantRange StepRange = getSignedRange(Step);
   3519         ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
   3520         ConstantRange EndRange =
   3521           StartRange.add(MaxBECountRange.multiply(StepRange));
   3522 
   3523         // Check for overflow. This must be done with ConstantRange arithmetic
   3524         // because we could be called from within the ScalarEvolution overflow
   3525         // checking code.
   3526         ConstantRange ExtStartRange = StartRange.sextOrTrunc(BitWidth*2+1);
   3527         ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
   3528         ConstantRange ExtMaxBECountRange =
   3529           MaxBECountRange.zextOrTrunc(BitWidth*2+1);
   3530         ConstantRange ExtEndRange = EndRange.sextOrTrunc(BitWidth*2+1);
   3531         if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
   3532             ExtEndRange)
   3533           return setSignedRange(AddRec, ConservativeResult);
   3534 
   3535         APInt Min = APIntOps::smin(StartRange.getSignedMin(),
   3536                                    EndRange.getSignedMin());
   3537         APInt Max = APIntOps::smax(StartRange.getSignedMax(),
   3538                                    EndRange.getSignedMax());
   3539         if (Min.isMinSignedValue() && Max.isMaxSignedValue())
   3540           return setSignedRange(AddRec, ConservativeResult);
   3541         return setSignedRange(AddRec,
   3542           ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
   3543       }
   3544     }
   3545 
   3546     return setSignedRange(AddRec, ConservativeResult);
   3547   }
   3548 
   3549   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
   3550     // For a SCEVUnknown, ask ValueTracking.
   3551     if (!U->getValue()->getType()->isIntegerTy() && !TD)
   3552       return setSignedRange(U, ConservativeResult);
   3553     unsigned NS = ComputeNumSignBits(U->getValue(), TD);
   3554     if (NS == 1)
   3555       return setSignedRange(U, ConservativeResult);
   3556     return setSignedRange(U, ConservativeResult.intersectWith(
   3557       ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
   3558                     APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1)));
   3559   }
   3560 
   3561   return setSignedRange(S, ConservativeResult);
   3562 }
   3563 
   3564 /// createSCEV - We know that there is no SCEV for the specified value.
   3565 /// Analyze the expression.
   3566 ///
   3567 const SCEV *ScalarEvolution::createSCEV(Value *V) {
   3568   if (!isSCEVable(V->getType()))
   3569     return getUnknown(V);
   3570 
   3571   unsigned Opcode = Instruction::UserOp1;
   3572   if (Instruction *I = dyn_cast<Instruction>(V)) {
   3573     Opcode = I->getOpcode();
   3574 
   3575     // Don't attempt to analyze instructions in blocks that aren't
   3576     // reachable. Such instructions don't matter, and they aren't required
   3577     // to obey basic rules for definitions dominating uses which this
   3578     // analysis depends on.
   3579     if (!DT->isReachableFromEntry(I->getParent()))
   3580       return getUnknown(V);
   3581   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
   3582     Opcode = CE->getOpcode();
   3583   else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
   3584     return getConstant(CI);
   3585   else if (isa<ConstantPointerNull>(V))
   3586     return getConstant(V->getType(), 0);
   3587   else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
   3588     return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee());
   3589   else
   3590     return getUnknown(V);
   3591 
   3592   Operator *U = cast<Operator>(V);
   3593   switch (Opcode) {
   3594   case Instruction::Add: {
   3595     // The simple thing to do would be to just call getSCEV on both operands
   3596     // and call getAddExpr with the result. However if we're looking at a
   3597     // bunch of things all added together, this can be quite inefficient,
   3598     // because it leads to N-1 getAddExpr calls for N ultimate operands.
   3599     // Instead, gather up all the operands and make a single getAddExpr call.
   3600     // LLVM IR canonical form means we need only traverse the left operands.
   3601     //
   3602     // Don't apply this instruction's NSW or NUW flags to the new
   3603     // expression. The instruction may be guarded by control flow that the
   3604     // no-wrap behavior depends on. Non-control-equivalent instructions can be
   3605     // mapped to the same SCEV expression, and it would be incorrect to transfer
   3606     // NSW/NUW semantics to those operations.
   3607     SmallVector<const SCEV *, 4> AddOps;
   3608     AddOps.push_back(getSCEV(U->getOperand(1)));
   3609     for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) {
   3610       unsigned Opcode = Op->getValueID() - Value::InstructionVal;
   3611       if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
   3612         break;
   3613       U = cast<Operator>(Op);
   3614       const SCEV *Op1 = getSCEV(U->getOperand(1));
   3615       if (Opcode == Instruction::Sub)
   3616         AddOps.push_back(getNegativeSCEV(Op1));
   3617       else
   3618         AddOps.push_back(Op1);
   3619     }
   3620     AddOps.push_back(getSCEV(U->getOperand(0)));
   3621     return getAddExpr(AddOps);
   3622   }
   3623   case Instruction::Mul: {
   3624     // Don't transfer NSW/NUW for the same reason as AddExpr.
   3625     SmallVector<const SCEV *, 4> MulOps;
   3626     MulOps.push_back(getSCEV(U->getOperand(1)));
   3627     for (Value *Op = U->getOperand(0);
   3628          Op->getValueID() == Instruction::Mul + Value::InstructionVal;
   3629          Op = U->getOperand(0)) {
   3630       U = cast<Operator>(Op);
   3631       MulOps.push_back(getSCEV(U->getOperand(1)));
   3632     }
   3633     MulOps.push_back(getSCEV(U->getOperand(0)));
   3634     return getMulExpr(MulOps);
   3635   }
   3636   case Instruction::UDiv:
   3637     return getUDivExpr(getSCEV(U->getOperand(0)),
   3638                        getSCEV(U->getOperand(1)));
   3639   case Instruction::Sub:
   3640     return getMinusSCEV(getSCEV(U->getOperand(0)),
   3641                         getSCEV(U->getOperand(1)));
   3642   case Instruction::And:
   3643     // For an expression like x&255 that merely masks off the high bits,
   3644     // use zext(trunc(x)) as the SCEV expression.
   3645     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
   3646       if (CI->isNullValue())
   3647         return getSCEV(U->getOperand(1));
   3648       if (CI->isAllOnesValue())
   3649         return getSCEV(U->getOperand(0));
   3650       const APInt &A = CI->getValue();
   3651 
   3652       // Instcombine's ShrinkDemandedConstant may strip bits out of
   3653       // constants, obscuring what would otherwise be a low-bits mask.
   3654       // Use ComputeMaskedBits to compute what ShrinkDemandedConstant
   3655       // knew about to reconstruct a low-bits mask value.
   3656       unsigned LZ = A.countLeadingZeros();
   3657       unsigned BitWidth = A.getBitWidth();
   3658       APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
   3659       ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD);
   3660 
   3661       APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ);
   3662 
   3663       if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask))
   3664         return
   3665           getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)),
   3666                                 IntegerType::get(getContext(), BitWidth - LZ)),
   3667                             U->getType());
   3668     }
   3669     break;
   3670 
   3671   case Instruction::Or:
   3672     // If the RHS of the Or is a constant, we may have something like:
   3673     // X*4+1 which got turned into X*4|1.  Handle this as an Add so loop
   3674     // optimizations will transparently handle this case.
   3675     //
   3676     // In order for this transformation to be safe, the LHS must be of the
   3677     // form X*(2^n) and the Or constant must be less than 2^n.
   3678     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
   3679       const SCEV *LHS = getSCEV(U->getOperand(0));
   3680       const APInt &CIVal = CI->getValue();
   3681       if (GetMinTrailingZeros(LHS) >=
   3682           (CIVal.getBitWidth() - CIVal.countLeadingZeros())) {
   3683         // Build a plain add SCEV.
   3684         const SCEV *S = getAddExpr(LHS, getSCEV(CI));
   3685         // If the LHS of the add was an addrec and it has no-wrap flags,
   3686         // transfer the no-wrap flags, since an or won't introduce a wrap.
   3687         if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) {
   3688           const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS);
   3689           const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags(
   3690             OldAR->getNoWrapFlags());
   3691         }
   3692         return S;
   3693       }
   3694     }
   3695     break;
   3696   case Instruction::Xor:
   3697     if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
   3698       // If the RHS of the xor is a signbit, then this is just an add.
   3699       //