Home | History | Annotate | Download | only in Analysis
      1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // DependenceAnalysis is an LLVM pass that analyses dependences between memory
     11 // accesses. Currently, it is an (incomplete) implementation of the approach
     12 // described in
     13 //
     14 //            Practical Dependence Testing
     15 //            Goff, Kennedy, Tseng
     16 //            PLDI 1991
     17 //
     18 // There's a single entry point that analyzes the dependence between a pair
     19 // of memory references in a function, returning either NULL, for no dependence,
     20 // or a more-or-less detailed description of the dependence between them.
     21 //
     22 // Currently, the implementation cannot propagate constraints between
     23 // coupled RDIV subscripts and lacks a multi-subscript MIV test.
     24 // Both of these are conservative weaknesses;
     25 // that is, not a source of correctness problems.
     26 //
     27 // The implementation depends on the GEP instruction to
     28 // differentiate subscripts. Since Clang linearizes subscripts
     29 // for most arrays, we give up some precision (though the existing MIV tests
     30 // will help). We trust that the GEP instruction will eventually be extended.
     31 // In the meantime, we should explore Maslov's ideas about delinearization.
     32 //
     33 // We should pay some careful attention to the possibility of integer overflow
     34 // in the implementation of the various tests. This could happen with Add,
     35 // Subtract, or Multiply, with both APInt's and SCEV's.
     36 //
     37 // Some non-linear subscript pairs can be handled by the GCD test
     38 // (and perhaps other tests).
     39 // Should explore how often these things occur.
     40 //
     41 // Finally, it seems like certain test cases expose weaknesses in the SCEV
     42 // simplification, especially in the handling of sign and zero extensions.
     43 // It could be useful to spend time exploring these.
     44 //
     45 // Please note that this is work in progress and the interface is subject to
     46 // change.
     47 //
     48 //===----------------------------------------------------------------------===//
     49 //                                                                            //
     50 //                   In memory of Ken Kennedy, 1945 - 2007                    //
     51 //                                                                            //
     52 //===----------------------------------------------------------------------===//
     53 
     54 #define DEBUG_TYPE "da"
     55 
     56 #include "llvm/Analysis/DependenceAnalysis.h"
     57 #include "llvm/ADT/Statistic.h"
     58 #include "llvm/Analysis/AliasAnalysis.h"
     59 #include "llvm/Analysis/LoopInfo.h"
     60 #include "llvm/Analysis/ScalarEvolution.h"
     61 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
     62 #include "llvm/Analysis/ValueTracking.h"
     63 #include "llvm/IR/Operator.h"
     64 #include "llvm/Support/Debug.h"
     65 #include "llvm/Support/ErrorHandling.h"
     66 #include "llvm/Support/InstIterator.h"
     67 #include "llvm/Support/raw_ostream.h"
     68 
     69 using namespace llvm;
     70 
     71 //===----------------------------------------------------------------------===//
     72 // statistics
     73 
     74 STATISTIC(TotalArrayPairs, "Array pairs tested");
     75 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs");
     76 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs");
     77 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs");
     78 STATISTIC(ZIVapplications, "ZIV applications");
     79 STATISTIC(ZIVindependence, "ZIV independence");
     80 STATISTIC(StrongSIVapplications, "Strong SIV applications");
     81 STATISTIC(StrongSIVsuccesses, "Strong SIV successes");
     82 STATISTIC(StrongSIVindependence, "Strong SIV independence");
     83 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications");
     84 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes");
     85 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence");
     86 STATISTIC(ExactSIVapplications, "Exact SIV applications");
     87 STATISTIC(ExactSIVsuccesses, "Exact SIV successes");
     88 STATISTIC(ExactSIVindependence, "Exact SIV independence");
     89 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications");
     90 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes");
     91 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence");
     92 STATISTIC(ExactRDIVapplications, "Exact RDIV applications");
     93 STATISTIC(ExactRDIVindependence, "Exact RDIV independence");
     94 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications");
     95 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence");
     96 STATISTIC(DeltaApplications, "Delta applications");
     97 STATISTIC(DeltaSuccesses, "Delta successes");
     98 STATISTIC(DeltaIndependence, "Delta independence");
     99 STATISTIC(DeltaPropagations, "Delta propagations");
    100 STATISTIC(GCDapplications, "GCD applications");
    101 STATISTIC(GCDsuccesses, "GCD successes");
    102 STATISTIC(GCDindependence, "GCD independence");
    103 STATISTIC(BanerjeeApplications, "Banerjee applications");
    104 STATISTIC(BanerjeeIndependence, "Banerjee independence");
    105 STATISTIC(BanerjeeSuccesses, "Banerjee successes");
    106 
    107 //===----------------------------------------------------------------------===//
    108 // basics
    109 
    110 INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da",
    111                       "Dependence Analysis", true, true)
    112 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
    113 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
    114 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
    115 INITIALIZE_PASS_END(DependenceAnalysis, "da",
    116                     "Dependence Analysis", true, true)
    117 
    118 char DependenceAnalysis::ID = 0;
    119 
    120 
    121 FunctionPass *llvm::createDependenceAnalysisPass() {
    122   return new DependenceAnalysis();
    123 }
    124 
    125 
    126 bool DependenceAnalysis::runOnFunction(Function &F) {
    127   this->F = &F;
    128   AA = &getAnalysis<AliasAnalysis>();
    129   SE = &getAnalysis<ScalarEvolution>();
    130   LI = &getAnalysis<LoopInfo>();
    131   return false;
    132 }
    133 
    134 
    135 void DependenceAnalysis::releaseMemory() {
    136 }
    137 
    138 
    139 void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
    140   AU.setPreservesAll();
    141   AU.addRequiredTransitive<AliasAnalysis>();
    142   AU.addRequiredTransitive<ScalarEvolution>();
    143   AU.addRequiredTransitive<LoopInfo>();
    144 }
    145 
    146 
    147 // Used to test the dependence analyzer.
    148 // Looks through the function, noting loads and stores.
    149 // Calls depends() on every possible pair and prints out the result.
    150 // Ignores all other instructions.
    151 static
    152 void dumpExampleDependence(raw_ostream &OS, Function *F,
    153                            DependenceAnalysis *DA) {
    154   for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F);
    155        SrcI != SrcE; ++SrcI) {
    156     if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) {
    157       for (inst_iterator DstI = SrcI, DstE = inst_end(F);
    158            DstI != DstE; ++DstI) {
    159         if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) {
    160           OS << "da analyze - ";
    161           if (Dependence *D = DA->depends(&*SrcI, &*DstI, true)) {
    162             D->dump(OS);
    163             for (unsigned Level = 1; Level <= D->getLevels(); Level++) {
    164               if (D->isSplitable(Level)) {
    165                 OS << "da analyze - split level = " << Level;
    166                 OS << ", iteration = " << *DA->getSplitIteration(D, Level);
    167                 OS << "!\n";
    168               }
    169             }
    170             delete D;
    171           }
    172           else
    173             OS << "none!\n";
    174         }
    175       }
    176     }
    177   }
    178 }
    179 
    180 
    181 void DependenceAnalysis::print(raw_ostream &OS, const Module*) const {
    182   dumpExampleDependence(OS, F, const_cast<DependenceAnalysis *>(this));
    183 }
    184 
    185 //===----------------------------------------------------------------------===//
    186 // Dependence methods
    187 
    188 // Returns true if this is an input dependence.
    189 bool Dependence::isInput() const {
    190   return Src->mayReadFromMemory() && Dst->mayReadFromMemory();
    191 }
    192 
    193 
    194 // Returns true if this is an output dependence.
    195 bool Dependence::isOutput() const {
    196   return Src->mayWriteToMemory() && Dst->mayWriteToMemory();
    197 }
    198 
    199 
    200 // Returns true if this is an flow (aka true)  dependence.
    201 bool Dependence::isFlow() const {
    202   return Src->mayWriteToMemory() && Dst->mayReadFromMemory();
    203 }
    204 
    205 
    206 // Returns true if this is an anti dependence.
    207 bool Dependence::isAnti() const {
    208   return Src->mayReadFromMemory() && Dst->mayWriteToMemory();
    209 }
    210 
    211 
    212 // Returns true if a particular level is scalar; that is,
    213 // if no subscript in the source or destination mention the induction
    214 // variable associated with the loop at this level.
    215 // Leave this out of line, so it will serve as a virtual method anchor
    216 bool Dependence::isScalar(unsigned level) const {
    217   return false;
    218 }
    219 
    220 
    221 //===----------------------------------------------------------------------===//
    222 // FullDependence methods
    223 
    224 FullDependence::FullDependence(Instruction *Source,
    225                                Instruction *Destination,
    226                                bool PossiblyLoopIndependent,
    227                                unsigned CommonLevels) :
    228   Dependence(Source, Destination),
    229   Levels(CommonLevels),
    230   LoopIndependent(PossiblyLoopIndependent) {
    231   Consistent = true;
    232   DV = CommonLevels ? new DVEntry[CommonLevels] : NULL;
    233 }
    234 
    235 // The rest are simple getters that hide the implementation.
    236 
    237 // getDirection - Returns the direction associated with a particular level.
    238 unsigned FullDependence::getDirection(unsigned Level) const {
    239   assert(0 < Level && Level <= Levels && "Level out of range");
    240   return DV[Level - 1].Direction;
    241 }
    242 
    243 
    244 // Returns the distance (or NULL) associated with a particular level.
    245 const SCEV *FullDependence::getDistance(unsigned Level) const {
    246   assert(0 < Level && Level <= Levels && "Level out of range");
    247   return DV[Level - 1].Distance;
    248 }
    249 
    250 
    251 // Returns true if a particular level is scalar; that is,
    252 // if no subscript in the source or destination mention the induction
    253 // variable associated with the loop at this level.
    254 bool FullDependence::isScalar(unsigned Level) const {
    255   assert(0 < Level && Level <= Levels && "Level out of range");
    256   return DV[Level - 1].Scalar;
    257 }
    258 
    259 
    260 // Returns true if peeling the first iteration from this loop
    261 // will break this dependence.
    262 bool FullDependence::isPeelFirst(unsigned Level) const {
    263   assert(0 < Level && Level <= Levels && "Level out of range");
    264   return DV[Level - 1].PeelFirst;
    265 }
    266 
    267 
    268 // Returns true if peeling the last iteration from this loop
    269 // will break this dependence.
    270 bool FullDependence::isPeelLast(unsigned Level) const {
    271   assert(0 < Level && Level <= Levels && "Level out of range");
    272   return DV[Level - 1].PeelLast;
    273 }
    274 
    275 
    276 // Returns true if splitting this loop will break the dependence.
    277 bool FullDependence::isSplitable(unsigned Level) const {
    278   assert(0 < Level && Level <= Levels && "Level out of range");
    279   return DV[Level - 1].Splitable;
    280 }
    281 
    282 
    283 //===----------------------------------------------------------------------===//
    284 // DependenceAnalysis::Constraint methods
    285 
    286 // If constraint is a point <X, Y>, returns X.
    287 // Otherwise assert.
    288 const SCEV *DependenceAnalysis::Constraint::getX() const {
    289   assert(Kind == Point && "Kind should be Point");
    290   return A;
    291 }
    292 
    293 
    294 // If constraint is a point <X, Y>, returns Y.
    295 // Otherwise assert.
    296 const SCEV *DependenceAnalysis::Constraint::getY() const {
    297   assert(Kind == Point && "Kind should be Point");
    298   return B;
    299 }
    300 
    301 
    302 // If constraint is a line AX + BY = C, returns A.
    303 // Otherwise assert.
    304 const SCEV *DependenceAnalysis::Constraint::getA() const {
    305   assert((Kind == Line || Kind == Distance) &&
    306          "Kind should be Line (or Distance)");
    307   return A;
    308 }
    309 
    310 
    311 // If constraint is a line AX + BY = C, returns B.
    312 // Otherwise assert.
    313 const SCEV *DependenceAnalysis::Constraint::getB() const {
    314   assert((Kind == Line || Kind == Distance) &&
    315          "Kind should be Line (or Distance)");
    316   return B;
    317 }
    318 
    319 
    320 // If constraint is a line AX + BY = C, returns C.
    321 // Otherwise assert.
    322 const SCEV *DependenceAnalysis::Constraint::getC() const {
    323   assert((Kind == Line || Kind == Distance) &&
    324          "Kind should be Line (or Distance)");
    325   return C;
    326 }
    327 
    328 
    329 // If constraint is a distance, returns D.
    330 // Otherwise assert.
    331 const SCEV *DependenceAnalysis::Constraint::getD() const {
    332   assert(Kind == Distance && "Kind should be Distance");
    333   return SE->getNegativeSCEV(C);
    334 }
    335 
    336 
    337 // Returns the loop associated with this constraint.
    338 const Loop *DependenceAnalysis::Constraint::getAssociatedLoop() const {
    339   assert((Kind == Distance || Kind == Line || Kind == Point) &&
    340          "Kind should be Distance, Line, or Point");
    341   return AssociatedLoop;
    342 }
    343 
    344 
    345 void DependenceAnalysis::Constraint::setPoint(const SCEV *X,
    346                                               const SCEV *Y,
    347                                               const Loop *CurLoop) {
    348   Kind = Point;
    349   A = X;
    350   B = Y;
    351   AssociatedLoop = CurLoop;
    352 }
    353 
    354 
    355 void DependenceAnalysis::Constraint::setLine(const SCEV *AA,
    356                                              const SCEV *BB,
    357                                              const SCEV *CC,
    358                                              const Loop *CurLoop) {
    359   Kind = Line;
    360   A = AA;
    361   B = BB;
    362   C = CC;
    363   AssociatedLoop = CurLoop;
    364 }
    365 
    366 
    367 void DependenceAnalysis::Constraint::setDistance(const SCEV *D,
    368                                                  const Loop *CurLoop) {
    369   Kind = Distance;
    370   A = SE->getConstant(D->getType(), 1);
    371   B = SE->getNegativeSCEV(A);
    372   C = SE->getNegativeSCEV(D);
    373   AssociatedLoop = CurLoop;
    374 }
    375 
    376 
    377 void DependenceAnalysis::Constraint::setEmpty() {
    378   Kind = Empty;
    379 }
    380 
    381 
    382 void DependenceAnalysis::Constraint::setAny(ScalarEvolution *NewSE) {
    383   SE = NewSE;
    384   Kind = Any;
    385 }
    386 
    387 
    388 // For debugging purposes. Dumps the constraint out to OS.
    389 void DependenceAnalysis::Constraint::dump(raw_ostream &OS) const {
    390   if (isEmpty())
    391     OS << " Empty\n";
    392   else if (isAny())
    393     OS << " Any\n";
    394   else if (isPoint())
    395     OS << " Point is <" << *getX() << ", " << *getY() << ">\n";
    396   else if (isDistance())
    397     OS << " Distance is " << *getD() <<
    398       " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n";
    399   else if (isLine())
    400     OS << " Line is " << *getA() << "*X + " <<
    401       *getB() << "*Y = " << *getC() << "\n";
    402   else
    403     llvm_unreachable("unknown constraint type in Constraint::dump");
    404 }
    405 
    406 
    407 // Updates X with the intersection
    408 // of the Constraints X and Y. Returns true if X has changed.
    409 // Corresponds to Figure 4 from the paper
    410 //
    411 //            Practical Dependence Testing
    412 //            Goff, Kennedy, Tseng
    413 //            PLDI 1991
    414 bool DependenceAnalysis::intersectConstraints(Constraint *X,
    415                                               const Constraint *Y) {
    416   ++DeltaApplications;
    417   DEBUG(dbgs() << "\tintersect constraints\n");
    418   DEBUG(dbgs() << "\t    X ="; X->dump(dbgs()));
    419   DEBUG(dbgs() << "\t    Y ="; Y->dump(dbgs()));
    420   assert(!Y->isPoint() && "Y must not be a Point");
    421   if (X->isAny()) {
    422     if (Y->isAny())
    423       return false;
    424     *X = *Y;
    425     return true;
    426   }
    427   if (X->isEmpty())
    428     return false;
    429   if (Y->isEmpty()) {
    430     X->setEmpty();
    431     return true;
    432   }
    433 
    434   if (X->isDistance() && Y->isDistance()) {
    435     DEBUG(dbgs() << "\t    intersect 2 distances\n");
    436     if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD()))
    437       return false;
    438     if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) {
    439       X->setEmpty();
    440       ++DeltaSuccesses;
    441       return true;
    442     }
    443     // Hmmm, interesting situation.
    444     // I guess if either is constant, keep it and ignore the other.
    445     if (isa<SCEVConstant>(Y->getD())) {
    446       *X = *Y;
    447       return true;
    448     }
    449     return false;
    450   }
    451 
    452   // At this point, the pseudo-code in Figure 4 of the paper
    453   // checks if (X->isPoint() && Y->isPoint()).
    454   // This case can't occur in our implementation,
    455   // since a Point can only arise as the result of intersecting
    456   // two Line constraints, and the right-hand value, Y, is never
    457   // the result of an intersection.
    458   assert(!(X->isPoint() && Y->isPoint()) &&
    459          "We shouldn't ever see X->isPoint() && Y->isPoint()");
    460 
    461   if (X->isLine() && Y->isLine()) {
    462     DEBUG(dbgs() << "\t    intersect 2 lines\n");
    463     const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB());
    464     const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA());
    465     if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) {
    466       // slopes are equal, so lines are parallel
    467       DEBUG(dbgs() << "\t\tsame slope\n");
    468       Prod1 = SE->getMulExpr(X->getC(), Y->getB());
    469       Prod2 = SE->getMulExpr(X->getB(), Y->getC());
    470       if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2))
    471         return false;
    472       if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {
    473         X->setEmpty();
    474         ++DeltaSuccesses;
    475         return true;
    476       }
    477       return false;
    478     }
    479     if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {
    480       // slopes differ, so lines intersect
    481       DEBUG(dbgs() << "\t\tdifferent slopes\n");
    482       const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB());
    483       const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA());
    484       const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB());
    485       const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA());
    486       const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB());
    487       const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB());
    488       const SCEVConstant *C1A2_C2A1 =
    489         dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1));
    490       const SCEVConstant *C1B2_C2B1 =
    491         dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1));
    492       const SCEVConstant *A1B2_A2B1 =
    493         dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1));
    494       const SCEVConstant *A2B1_A1B2 =
    495         dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2));
    496       if (!C1B2_C2B1 || !C1A2_C2A1 ||
    497           !A1B2_A2B1 || !A2B1_A1B2)
    498         return false;
    499       APInt Xtop = C1B2_C2B1->getValue()->getValue();
    500       APInt Xbot = A1B2_A2B1->getValue()->getValue();
    501       APInt Ytop = C1A2_C2A1->getValue()->getValue();
    502       APInt Ybot = A2B1_A1B2->getValue()->getValue();
    503       DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n");
    504       DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n");
    505       DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n");
    506       DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n");
    507       APInt Xq = Xtop; // these need to be initialized, even
    508       APInt Xr = Xtop; // though they're just going to be overwritten
    509       APInt::sdivrem(Xtop, Xbot, Xq, Xr);
    510       APInt Yq = Ytop;
    511       APInt Yr = Ytop;
    512       APInt::sdivrem(Ytop, Ybot, Yq, Yr);
    513       if (Xr != 0 || Yr != 0) {
    514         X->setEmpty();
    515         ++DeltaSuccesses;
    516         return true;
    517       }
    518       DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n");
    519       if (Xq.slt(0) || Yq.slt(0)) {
    520         X->setEmpty();
    521         ++DeltaSuccesses;
    522         return true;
    523       }
    524       if (const SCEVConstant *CUB =
    525           collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) {
    526         APInt UpperBound = CUB->getValue()->getValue();
    527         DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n");
    528         if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) {
    529           X->setEmpty();
    530           ++DeltaSuccesses;
    531           return true;
    532         }
    533       }
    534       X->setPoint(SE->getConstant(Xq),
    535                   SE->getConstant(Yq),
    536                   X->getAssociatedLoop());
    537       ++DeltaSuccesses;
    538       return true;
    539     }
    540     return false;
    541   }
    542 
    543   // if (X->isLine() && Y->isPoint()) This case can't occur.
    544   assert(!(X->isLine() && Y->isPoint()) && "This case should never occur");
    545 
    546   if (X->isPoint() && Y->isLine()) {
    547     DEBUG(dbgs() << "\t    intersect Point and Line\n");
    548     const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX());
    549     const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY());
    550     const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1);
    551     if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC()))
    552       return false;
    553     if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) {
    554       X->setEmpty();
    555       ++DeltaSuccesses;
    556       return true;
    557     }
    558     return false;
    559   }
    560 
    561   llvm_unreachable("shouldn't reach the end of Constraint intersection");
    562   return false;
    563 }
    564 
    565 
    566 //===----------------------------------------------------------------------===//
    567 // DependenceAnalysis methods
    568 
    569 // For debugging purposes. Dumps a dependence to OS.
    570 void Dependence::dump(raw_ostream &OS) const {
    571   bool Splitable = false;
    572   if (isConfused())
    573     OS << "confused";
    574   else {
    575     if (isConsistent())
    576       OS << "consistent ";
    577     if (isFlow())
    578       OS << "flow";
    579     else if (isOutput())
    580       OS << "output";
    581     else if (isAnti())
    582       OS << "anti";
    583     else if (isInput())
    584       OS << "input";
    585     unsigned Levels = getLevels();
    586     OS << " [";
    587     for (unsigned II = 1; II <= Levels; ++II) {
    588       if (isSplitable(II))
    589         Splitable = true;
    590       if (isPeelFirst(II))
    591         OS << 'p';
    592       const SCEV *Distance = getDistance(II);
    593       if (Distance)
    594         OS << *Distance;
    595       else if (isScalar(II))
    596         OS << "S";
    597       else {
    598         unsigned Direction = getDirection(II);
    599         if (Direction == DVEntry::ALL)
    600           OS << "*";
    601         else {
    602           if (Direction & DVEntry::LT)
    603             OS << "<";
    604           if (Direction & DVEntry::EQ)
    605             OS << "=";
    606           if (Direction & DVEntry::GT)
    607             OS << ">";
    608         }
    609       }
    610       if (isPeelLast(II))
    611         OS << 'p';
    612       if (II < Levels)
    613         OS << " ";
    614     }
    615     if (isLoopIndependent())
    616       OS << "|<";
    617     OS << "]";
    618     if (Splitable)
    619       OS << " splitable";
    620   }
    621   OS << "!\n";
    622 }
    623 
    624 
    625 
    626 static
    627 AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA,
    628                                                   const Value *A,
    629                                                   const Value *B) {
    630   const Value *AObj = GetUnderlyingObject(A);
    631   const Value *BObj = GetUnderlyingObject(B);
    632   return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()),
    633                    BObj, AA->getTypeStoreSize(BObj->getType()));
    634 }
    635 
    636 
    637 // Returns true if the load or store can be analyzed. Atomic and volatile
    638 // operations have properties which this analysis does not understand.
    639 static
    640 bool isLoadOrStore(const Instruction *I) {
    641   if (const LoadInst *LI = dyn_cast<LoadInst>(I))
    642     return LI->isUnordered();
    643   else if (const StoreInst *SI = dyn_cast<StoreInst>(I))
    644     return SI->isUnordered();
    645   return false;
    646 }
    647 
    648 
    649 static
    650 Value *getPointerOperand(Instruction *I) {
    651   if (LoadInst *LI = dyn_cast<LoadInst>(I))
    652     return LI->getPointerOperand();
    653   if (StoreInst *SI = dyn_cast<StoreInst>(I))
    654     return SI->getPointerOperand();
    655   llvm_unreachable("Value is not load or store instruction");
    656   return 0;
    657 }
    658 
    659 
    660 // Examines the loop nesting of the Src and Dst
    661 // instructions and establishes their shared loops. Sets the variables
    662 // CommonLevels, SrcLevels, and MaxLevels.
    663 // The source and destination instructions needn't be contained in the same
    664 // loop. The routine establishNestingLevels finds the level of most deeply
    665 // nested loop that contains them both, CommonLevels. An instruction that's
    666 // not contained in a loop is at level = 0. MaxLevels is equal to the level
    667 // of the source plus the level of the destination, minus CommonLevels.
    668 // This lets us allocate vectors MaxLevels in length, with room for every
    669 // distinct loop referenced in both the source and destination subscripts.
    670 // The variable SrcLevels is the nesting depth of the source instruction.
    671 // It's used to help calculate distinct loops referenced by the destination.
    672 // Here's the map from loops to levels:
    673 //            0 - unused
    674 //            1 - outermost common loop
    675 //          ... - other common loops
    676 // CommonLevels - innermost common loop
    677 //          ... - loops containing Src but not Dst
    678 //    SrcLevels - innermost loop containing Src but not Dst
    679 //          ... - loops containing Dst but not Src
    680 //    MaxLevels - innermost loops containing Dst but not Src
    681 // Consider the follow code fragment:
    682 //   for (a = ...) {
    683 //     for (b = ...) {
    684 //       for (c = ...) {
    685 //         for (d = ...) {
    686 //           A[] = ...;
    687 //         }
    688 //       }
    689 //       for (e = ...) {
    690 //         for (f = ...) {
    691 //           for (g = ...) {
    692 //             ... = A[];
    693 //           }
    694 //         }
    695 //       }
    696 //     }
    697 //   }
    698 // If we're looking at the possibility of a dependence between the store
    699 // to A (the Src) and the load from A (the Dst), we'll note that they
    700 // have 2 loops in common, so CommonLevels will equal 2 and the direction
    701 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7.
    702 // A map from loop names to loop numbers would look like
    703 //     a - 1
    704 //     b - 2 = CommonLevels
    705 //     c - 3
    706 //     d - 4 = SrcLevels
    707 //     e - 5
    708 //     f - 6
    709 //     g - 7 = MaxLevels
    710 void DependenceAnalysis::establishNestingLevels(const Instruction *Src,
    711                                                 const Instruction *Dst) {
    712   const BasicBlock *SrcBlock = Src->getParent();
    713   const BasicBlock *DstBlock = Dst->getParent();
    714   unsigned SrcLevel = LI->getLoopDepth(SrcBlock);
    715   unsigned DstLevel = LI->getLoopDepth(DstBlock);
    716   const Loop *SrcLoop = LI->getLoopFor(SrcBlock);
    717   const Loop *DstLoop = LI->getLoopFor(DstBlock);
    718   SrcLevels = SrcLevel;
    719   MaxLevels = SrcLevel + DstLevel;
    720   while (SrcLevel > DstLevel) {
    721     SrcLoop = SrcLoop->getParentLoop();
    722     SrcLevel--;
    723   }
    724   while (DstLevel > SrcLevel) {
    725     DstLoop = DstLoop->getParentLoop();
    726     DstLevel--;
    727   }
    728   while (SrcLoop != DstLoop) {
    729     SrcLoop = SrcLoop->getParentLoop();
    730     DstLoop = DstLoop->getParentLoop();
    731     SrcLevel--;
    732   }
    733   CommonLevels = SrcLevel;
    734   MaxLevels -= CommonLevels;
    735 }
    736 
    737 
    738 // Given one of the loops containing the source, return
    739 // its level index in our numbering scheme.
    740 unsigned DependenceAnalysis::mapSrcLoop(const Loop *SrcLoop) const {
    741   return SrcLoop->getLoopDepth();
    742 }
    743 
    744 
    745 // Given one of the loops containing the destination,
    746 // return its level index in our numbering scheme.
    747 unsigned DependenceAnalysis::mapDstLoop(const Loop *DstLoop) const {
    748   unsigned D = DstLoop->getLoopDepth();
    749   if (D > CommonLevels)
    750     return D - CommonLevels + SrcLevels;
    751   else
    752     return D;
    753 }
    754 
    755 
    756 // Returns true if Expression is loop invariant in LoopNest.
    757 bool DependenceAnalysis::isLoopInvariant(const SCEV *Expression,
    758                                          const Loop *LoopNest) const {
    759   if (!LoopNest)
    760     return true;
    761   return SE->isLoopInvariant(Expression, LoopNest) &&
    762     isLoopInvariant(Expression, LoopNest->getParentLoop());
    763 }
    764 
    765 
    766 
    767 // Finds the set of loops from the LoopNest that
    768 // have a level <= CommonLevels and are referred to by the SCEV Expression.
    769 void DependenceAnalysis::collectCommonLoops(const SCEV *Expression,
    770                                             const Loop *LoopNest,
    771                                             SmallBitVector &Loops) const {
    772   while (LoopNest) {
    773     unsigned Level = LoopNest->getLoopDepth();
    774     if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest))
    775       Loops.set(Level);
    776     LoopNest = LoopNest->getParentLoop();
    777   }
    778 }
    779 
    780 
    781 // removeMatchingExtensions - Examines a subscript pair.
    782 // If the source and destination are identically sign (or zero)
    783 // extended, it strips off the extension in an effect to simplify
    784 // the actual analysis.
    785 void DependenceAnalysis::removeMatchingExtensions(Subscript *Pair) {
    786   const SCEV *Src = Pair->Src;
    787   const SCEV *Dst = Pair->Dst;
    788   if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) ||
    789       (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) {
    790     const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src);
    791     const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst);
    792     if (SrcCast->getType() == DstCast->getType()) {
    793       Pair->Src = SrcCast->getOperand();
    794       Pair->Dst = DstCast->getOperand();
    795     }
    796   }
    797 }
    798 
    799 
    800 // Examine the scev and return true iff it's linear.
    801 // Collect any loops mentioned in the set of "Loops".
    802 bool DependenceAnalysis::checkSrcSubscript(const SCEV *Src,
    803                                            const Loop *LoopNest,
    804                                            SmallBitVector &Loops) {
    805   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src);
    806   if (!AddRec)
    807     return isLoopInvariant(Src, LoopNest);
    808   const SCEV *Start = AddRec->getStart();
    809   const SCEV *Step = AddRec->getStepRecurrence(*SE);
    810   if (!isLoopInvariant(Step, LoopNest))
    811     return false;
    812   Loops.set(mapSrcLoop(AddRec->getLoop()));
    813   return checkSrcSubscript(Start, LoopNest, Loops);
    814 }
    815 
    816 
    817 
    818 // Examine the scev and return true iff it's linear.
    819 // Collect any loops mentioned in the set of "Loops".
    820 bool DependenceAnalysis::checkDstSubscript(const SCEV *Dst,
    821                                            const Loop *LoopNest,
    822                                            SmallBitVector &Loops) {
    823   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst);
    824   if (!AddRec)
    825     return isLoopInvariant(Dst, LoopNest);
    826   const SCEV *Start = AddRec->getStart();
    827   const SCEV *Step = AddRec->getStepRecurrence(*SE);
    828   if (!isLoopInvariant(Step, LoopNest))
    829     return false;
    830   Loops.set(mapDstLoop(AddRec->getLoop()));
    831   return checkDstSubscript(Start, LoopNest, Loops);
    832 }
    833 
    834 
    835 // Examines the subscript pair (the Src and Dst SCEVs)
    836 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear.
    837 // Collects the associated loops in a set.
    838 DependenceAnalysis::Subscript::ClassificationKind
    839 DependenceAnalysis::classifyPair(const SCEV *Src, const Loop *SrcLoopNest,
    840                                  const SCEV *Dst, const Loop *DstLoopNest,
    841                                  SmallBitVector &Loops) {
    842   SmallBitVector SrcLoops(MaxLevels + 1);
    843   SmallBitVector DstLoops(MaxLevels + 1);
    844   if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops))
    845     return Subscript::NonLinear;
    846   if (!checkDstSubscript(Dst, DstLoopNest, DstLoops))
    847     return Subscript::NonLinear;
    848   Loops = SrcLoops;
    849   Loops |= DstLoops;
    850   unsigned N = Loops.count();
    851   if (N == 0)
    852     return Subscript::ZIV;
    853   if (N == 1)
    854     return Subscript::SIV;
    855   if (N == 2 && (SrcLoops.count() == 0 ||
    856                  DstLoops.count() == 0 ||
    857                  (SrcLoops.count() == 1 && DstLoops.count() == 1)))
    858     return Subscript::RDIV;
    859   return Subscript::MIV;
    860 }
    861 
    862 
    863 // A wrapper around SCEV::isKnownPredicate.
    864 // Looks for cases where we're interested in comparing for equality.
    865 // If both X and Y have been identically sign or zero extended,
    866 // it strips off the (confusing) extensions before invoking
    867 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package
    868 // will be similarly updated.
    869 //
    870 // If SCEV::isKnownPredicate can't prove the predicate,
    871 // we try simple subtraction, which seems to help in some cases
    872 // involving symbolics.
    873 bool DependenceAnalysis::isKnownPredicate(ICmpInst::Predicate Pred,
    874                                           const SCEV *X,
    875                                           const SCEV *Y) const {
    876   if (Pred == CmpInst::ICMP_EQ ||
    877       Pred == CmpInst::ICMP_NE) {
    878     if ((isa<SCEVSignExtendExpr>(X) &&
    879          isa<SCEVSignExtendExpr>(Y)) ||
    880         (isa<SCEVZeroExtendExpr>(X) &&
    881          isa<SCEVZeroExtendExpr>(Y))) {
    882       const SCEVCastExpr *CX = cast<SCEVCastExpr>(X);
    883       const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y);
    884       const SCEV *Xop = CX->getOperand();
    885       const SCEV *Yop = CY->getOperand();
    886       if (Xop->getType() == Yop->getType()) {
    887         X = Xop;
    888         Y = Yop;
    889       }
    890     }
    891   }
    892   if (SE->isKnownPredicate(Pred, X, Y))
    893     return true;
    894   // If SE->isKnownPredicate can't prove the condition,
    895   // we try the brute-force approach of subtracting
    896   // and testing the difference.
    897   // By testing with SE->isKnownPredicate first, we avoid
    898   // the possibility of overflow when the arguments are constants.
    899   const SCEV *Delta = SE->getMinusSCEV(X, Y);
    900   switch (Pred) {
    901   case CmpInst::ICMP_EQ:
    902     return Delta->isZero();
    903   case CmpInst::ICMP_NE:
    904     return SE->isKnownNonZero(Delta);
    905   case CmpInst::ICMP_SGE:
    906     return SE->isKnownNonNegative(Delta);
    907   case CmpInst::ICMP_SLE:
    908     return SE->isKnownNonPositive(Delta);
    909   case CmpInst::ICMP_SGT:
    910     return SE->isKnownPositive(Delta);
    911   case CmpInst::ICMP_SLT:
    912     return SE->isKnownNegative(Delta);
    913   default:
    914     llvm_unreachable("unexpected predicate in isKnownPredicate");
    915   }
    916 }
    917 
    918 
    919 // All subscripts are all the same type.
    920 // Loop bound may be smaller (e.g., a char).
    921 // Should zero extend loop bound, since it's always >= 0.
    922 // This routine collects upper bound and extends if needed.
    923 // Return null if no bound available.
    924 const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L,
    925                                                   Type *T) const {
    926   if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
    927     const SCEV *UB = SE->getBackedgeTakenCount(L);
    928     return SE->getNoopOrZeroExtend(UB, T);
    929   }
    930   return NULL;
    931 }
    932 
    933 
    934 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant.
    935 // If the cast fails, returns NULL.
    936 const SCEVConstant *DependenceAnalysis::collectConstantUpperBound(const Loop *L,
    937                                                                   Type *T
    938                                                                   ) const {
    939   if (const SCEV *UB = collectUpperBound(L, T))
    940     return dyn_cast<SCEVConstant>(UB);
    941   return NULL;
    942 }
    943 
    944 
    945 // testZIV -
    946 // When we have a pair of subscripts of the form [c1] and [c2],
    947 // where c1 and c2 are both loop invariant, we attack it using
    948 // the ZIV test. Basically, we test by comparing the two values,
    949 // but there are actually three possible results:
    950 // 1) the values are equal, so there's a dependence
    951 // 2) the values are different, so there's no dependence
    952 // 3) the values might be equal, so we have to assume a dependence.
    953 //
    954 // Return true if dependence disproved.
    955 bool DependenceAnalysis::testZIV(const SCEV *Src,
    956                                  const SCEV *Dst,
    957                                  FullDependence &Result) const {
    958   DEBUG(dbgs() << "    src = " << *Src << "\n");
    959   DEBUG(dbgs() << "    dst = " << *Dst << "\n");
    960   ++ZIVapplications;
    961   if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) {
    962     DEBUG(dbgs() << "    provably dependent\n");
    963     return false; // provably dependent
    964   }
    965   if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) {
    966     DEBUG(dbgs() << "    provably independent\n");
    967     ++ZIVindependence;
    968     return true; // provably independent
    969   }
    970   DEBUG(dbgs() << "    possibly dependent\n");
    971   Result.Consistent = false;
    972   return false; // possibly dependent
    973 }
    974 
    975 
    976 // strongSIVtest -
    977 // From the paper, Practical Dependence Testing, Section 4.2.1
    978 //
    979 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i],
    980 // where i is an induction variable, c1 and c2 are loop invariant,
    981 //  and a is a constant, we can solve it exactly using the Strong SIV test.
    982 //
    983 // Can prove independence. Failing that, can compute distance (and direction).
    984 // In the presence of symbolic terms, we can sometimes make progress.
    985 //
    986 // If there's a dependence,
    987 //
    988 //    c1 + a*i = c2 + a*i'
    989 //
    990 // The dependence distance is
    991 //
    992 //    d = i' - i = (c1 - c2)/a
    993 //
    994 // A dependence only exists if d is an integer and abs(d) <= U, where U is the
    995 // loop's upper bound. If a dependence exists, the dependence direction is
    996 // defined as
    997 //
    998 //                { < if d > 0
    999 //    direction = { = if d = 0
   1000 //                { > if d < 0
   1001 //
   1002 // Return true if dependence disproved.
   1003 bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff,
   1004                                        const SCEV *SrcConst,
   1005                                        const SCEV *DstConst,
   1006                                        const Loop *CurLoop,
   1007                                        unsigned Level,
   1008                                        FullDependence &Result,
   1009                                        Constraint &NewConstraint) const {
   1010   DEBUG(dbgs() << "\tStrong SIV test\n");
   1011   DEBUG(dbgs() << "\t    Coeff = " << *Coeff);
   1012   DEBUG(dbgs() << ", " << *Coeff->getType() << "\n");
   1013   DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst);
   1014   DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n");
   1015   DEBUG(dbgs() << "\t    DstConst = " << *DstConst);
   1016   DEBUG(dbgs() << ", " << *DstConst->getType() << "\n");
   1017   ++StrongSIVapplications;
   1018   assert(0 < Level && Level <= CommonLevels && "level out of range");
   1019   Level--;
   1020 
   1021   const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
   1022   DEBUG(dbgs() << "\t    Delta = " << *Delta);
   1023   DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
   1024 
   1025   // check that |Delta| < iteration count
   1026   if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
   1027     DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound);
   1028     DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n");
   1029     const SCEV *AbsDelta =
   1030       SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta);
   1031     const SCEV *AbsCoeff =
   1032       SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff);
   1033     const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff);
   1034     if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) {
   1035       // Distance greater than trip count - no dependence
   1036       ++StrongSIVindependence;
   1037       ++StrongSIVsuccesses;
   1038       return true;
   1039     }
   1040   }
   1041 
   1042   // Can we compute distance?
   1043   if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) {
   1044     APInt ConstDelta = cast<SCEVConstant>(Delta)->getValue()->getValue();
   1045     APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getValue()->getValue();
   1046     APInt Distance  = ConstDelta; // these need to be initialized
   1047     APInt Remainder = ConstDelta;
   1048     APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder);
   1049     DEBUG(dbgs() << "\t    Distance = " << Distance << "\n");
   1050     DEBUG(dbgs() << "\t    Remainder = " << Remainder << "\n");
   1051     // Make sure Coeff divides Delta exactly
   1052     if (Remainder != 0) {
   1053       // Coeff doesn't divide Distance, no dependence
   1054       ++StrongSIVindependence;
   1055       ++StrongSIVsuccesses;
   1056       return true;
   1057     }
   1058     Result.DV[Level].Distance = SE->getConstant(Distance);
   1059     NewConstraint.setDistance(SE->getConstant(Distance), CurLoop);
   1060     if (Distance.sgt(0))
   1061       Result.DV[Level].Direction &= Dependence::DVEntry::LT;
   1062     else if (Distance.slt(0))
   1063       Result.DV[Level].Direction &= Dependence::DVEntry::GT;
   1064     else
   1065       Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
   1066     ++StrongSIVsuccesses;
   1067   }
   1068   else if (Delta->isZero()) {
   1069     // since 0/X == 0
   1070     Result.DV[Level].Distance = Delta;
   1071     NewConstraint.setDistance(Delta, CurLoop);
   1072     Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
   1073     ++StrongSIVsuccesses;
   1074   }
   1075   else {
   1076     if (Coeff->isOne()) {
   1077       DEBUG(dbgs() << "\t    Distance = " << *Delta << "\n");
   1078       Result.DV[Level].Distance = Delta; // since X/1 == X
   1079       NewConstraint.setDistance(Delta, CurLoop);
   1080     }
   1081     else {
   1082       Result.Consistent = false;
   1083       NewConstraint.setLine(Coeff,
   1084                             SE->getNegativeSCEV(Coeff),
   1085                             SE->getNegativeSCEV(Delta), CurLoop);
   1086     }
   1087 
   1088     // maybe we can get a useful direction
   1089     bool DeltaMaybeZero     = !SE->isKnownNonZero(Delta);
   1090     bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta);
   1091     bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta);
   1092     bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff);
   1093     bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff);
   1094     // The double negatives above are confusing.
   1095     // It helps to read !SE->isKnownNonZero(Delta)
   1096     // as "Delta might be Zero"
   1097     unsigned NewDirection = Dependence::DVEntry::NONE;
   1098     if ((DeltaMaybePositive && CoeffMaybePositive) ||
   1099         (DeltaMaybeNegative && CoeffMaybeNegative))
   1100       NewDirection = Dependence::DVEntry::LT;
   1101     if (DeltaMaybeZero)
   1102       NewDirection |= Dependence::DVEntry::EQ;
   1103     if ((DeltaMaybeNegative && CoeffMaybePositive) ||
   1104         (DeltaMaybePositive && CoeffMaybeNegative))
   1105       NewDirection |= Dependence::DVEntry::GT;
   1106     if (NewDirection < Result.DV[Level].Direction)
   1107       ++StrongSIVsuccesses;
   1108     Result.DV[Level].Direction &= NewDirection;
   1109   }
   1110   return false;
   1111 }
   1112 
   1113 
   1114 // weakCrossingSIVtest -
   1115 // From the paper, Practical Dependence Testing, Section 4.2.2
   1116 //
   1117 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i],
   1118 // where i is an induction variable, c1 and c2 are loop invariant,
   1119 // and a is a constant, we can solve it exactly using the
   1120 // Weak-Crossing SIV test.
   1121 //
   1122 // Given c1 + a*i = c2 - a*i', we can look for the intersection of
   1123 // the two lines, where i = i', yielding
   1124 //
   1125 //    c1 + a*i = c2 - a*i
   1126 //    2a*i = c2 - c1
   1127 //    i = (c2 - c1)/2a
   1128 //
   1129 // If i < 0, there is no dependence.
   1130 // If i > upperbound, there is no dependence.
   1131 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0.
   1132 // If i = upperbound, there's a dependence with distance = 0.
   1133 // If i is integral, there's a dependence (all directions).
   1134 // If the non-integer part = 1/2, there's a dependence (<> directions).
   1135 // Otherwise, there's no dependence.
   1136 //
   1137 // Can prove independence. Failing that,
   1138 // can sometimes refine the directions.
   1139 // Can determine iteration for splitting.
   1140 //
   1141 // Return true if dependence disproved.
   1142 bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff,
   1143                                              const SCEV *SrcConst,
   1144                                              const SCEV *DstConst,
   1145                                              const Loop *CurLoop,
   1146                                              unsigned Level,
   1147                                              FullDependence &Result,
   1148                                              Constraint &NewConstraint,
   1149                                              const SCEV *&SplitIter) const {
   1150   DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
   1151   DEBUG(dbgs() << "\t    Coeff = " << *Coeff << "\n");
   1152   DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
   1153   DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
   1154   ++WeakCrossingSIVapplications;
   1155   assert(0 < Level && Level <= CommonLevels && "Level out of range");
   1156   Level--;
   1157   Result.Consistent = false;
   1158   const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
   1159   DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
   1160   NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop);
   1161   if (Delta->isZero()) {
   1162     Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT);
   1163     Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT);
   1164     ++WeakCrossingSIVsuccesses;
   1165     if (!Result.DV[Level].Direction) {
   1166       ++WeakCrossingSIVindependence;
   1167       return true;
   1168     }
   1169     Result.DV[Level].Distance = Delta; // = 0
   1170     return false;
   1171   }
   1172   const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff);
   1173   if (!ConstCoeff)
   1174     return false;
   1175 
   1176   Result.DV[Level].Splitable = true;
   1177   if (SE->isKnownNegative(ConstCoeff)) {
   1178     ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff));
   1179     assert(ConstCoeff &&
   1180            "dynamic cast of negative of ConstCoeff should yield constant");
   1181     Delta = SE->getNegativeSCEV(Delta);
   1182   }
   1183   assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive");
   1184 
   1185   // compute SplitIter for use by DependenceAnalysis::getSplitIteration()
   1186   SplitIter =
   1187     SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0),
   1188                                     Delta),
   1189                     SE->getMulExpr(SE->getConstant(Delta->getType(), 2),
   1190                                    ConstCoeff));
   1191   DEBUG(dbgs() << "\t    Split iter = " << *SplitIter << "\n");
   1192 
   1193   const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
   1194   if (!ConstDelta)
   1195     return false;
   1196 
   1197   // We're certain that ConstCoeff > 0; therefore,
   1198   // if Delta < 0, then no dependence.
   1199   DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
   1200   DEBUG(dbgs() << "\t    ConstCoeff = " << *ConstCoeff << "\n");
   1201   if (SE->isKnownNegative(Delta)) {
   1202     // No dependence, Delta < 0
   1203     ++WeakCrossingSIVindependence;
   1204     ++WeakCrossingSIVsuccesses;
   1205     return true;
   1206   }
   1207 
   1208   // We're certain that Delta > 0 and ConstCoeff > 0.
   1209   // Check Delta/(2*ConstCoeff) against upper loop bound
   1210   if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
   1211     DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
   1212     const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2);
   1213     const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound),
   1214                                     ConstantTwo);
   1215     DEBUG(dbgs() << "\t    ML = " << *ML << "\n");
   1216     if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) {
   1217       // Delta too big, no dependence
   1218       ++WeakCrossingSIVindependence;
   1219       ++WeakCrossingSIVsuccesses;
   1220       return true;
   1221     }
   1222     if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) {
   1223       // i = i' = UB
   1224       Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT);
   1225       Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT);
   1226       ++WeakCrossingSIVsuccesses;
   1227       if (!Result.DV[Level].Direction) {
   1228         ++WeakCrossingSIVindependence;
   1229         return true;
   1230       }
   1231       Result.DV[Level].Splitable = false;
   1232       Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0);
   1233       return false;
   1234     }
   1235   }
   1236 
   1237   // check that Coeff divides Delta
   1238   APInt APDelta = ConstDelta->getValue()->getValue();
   1239   APInt APCoeff = ConstCoeff->getValue()->getValue();
   1240   APInt Distance = APDelta; // these need to be initialzed
   1241   APInt Remainder = APDelta;
   1242   APInt::sdivrem(APDelta, APCoeff, Distance, Remainder);
   1243   DEBUG(dbgs() << "\t    Remainder = " << Remainder << "\n");
   1244   if (Remainder != 0) {
   1245     // Coeff doesn't divide Delta, no dependence
   1246     ++WeakCrossingSIVindependence;
   1247     ++WeakCrossingSIVsuccesses;
   1248     return true;
   1249   }
   1250   DEBUG(dbgs() << "\t    Distance = " << Distance << "\n");
   1251 
   1252   // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible
   1253   APInt Two = APInt(Distance.getBitWidth(), 2, true);
   1254   Remainder = Distance.srem(Two);
   1255   DEBUG(dbgs() << "\t    Remainder = " << Remainder << "\n");
   1256   if (Remainder != 0) {
   1257     // Equal direction isn't possible
   1258     Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ);
   1259     ++WeakCrossingSIVsuccesses;
   1260   }
   1261   return false;
   1262 }
   1263 
   1264 
   1265 // Kirch's algorithm, from
   1266 //
   1267 //        Optimizing Supercompilers for Supercomputers
   1268 //        Michael Wolfe
   1269 //        MIT Press, 1989
   1270 //
   1271 // Program 2.1, page 29.
   1272 // Computes the GCD of AM and BM.
   1273 // Also finds a solution to the equation ax - by = gdc(a, b).
   1274 // Returns true iff the gcd divides Delta.
   1275 static
   1276 bool findGCD(unsigned Bits, APInt AM, APInt BM, APInt Delta,
   1277              APInt &G, APInt &X, APInt &Y) {
   1278   APInt A0(Bits, 1, true), A1(Bits, 0, true);
   1279   APInt B0(Bits, 0, true), B1(Bits, 1, true);
   1280   APInt G0 = AM.abs();
   1281   APInt G1 = BM.abs();
   1282   APInt Q = G0; // these need to be initialized
   1283   APInt R = G0;
   1284   APInt::sdivrem(G0, G1, Q, R);
   1285   while (R != 0) {
   1286     APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2;
   1287     APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2;
   1288     G0 = G1; G1 = R;
   1289     APInt::sdivrem(G0, G1, Q, R);
   1290   }
   1291   G = G1;
   1292   DEBUG(dbgs() << "\t    GCD = " << G << "\n");
   1293   X = AM.slt(0) ? -A1 : A1;
   1294   Y = BM.slt(0) ? B1 : -B1;
   1295 
   1296   // make sure gcd divides Delta
   1297   R = Delta.srem(G);
   1298   if (R != 0)
   1299     return true; // gcd doesn't divide Delta, no dependence
   1300   Q = Delta.sdiv(G);
   1301   X *= Q;
   1302   Y *= Q;
   1303   return false;
   1304 }
   1305 
   1306 
   1307 static
   1308 APInt floorOfQuotient(APInt A, APInt B) {
   1309   APInt Q = A; // these need to be initialized
   1310   APInt R = A;
   1311   APInt::sdivrem(A, B, Q, R);
   1312   if (R == 0)
   1313     return Q;
   1314   if ((A.sgt(0) && B.sgt(0)) ||
   1315       (A.slt(0) && B.slt(0)))
   1316     return Q;
   1317   else
   1318     return Q - 1;
   1319 }
   1320 
   1321 
   1322 static
   1323 APInt ceilingOfQuotient(APInt A, APInt B) {
   1324   APInt Q = A; // these need to be initialized
   1325   APInt R = A;
   1326   APInt::sdivrem(A, B, Q, R);
   1327   if (R == 0)
   1328     return Q;
   1329   if ((A.sgt(0) && B.sgt(0)) ||
   1330       (A.slt(0) && B.slt(0)))
   1331     return Q + 1;
   1332   else
   1333     return Q;
   1334 }
   1335 
   1336 
   1337 static
   1338 APInt maxAPInt(APInt A, APInt B) {
   1339   return A.sgt(B) ? A : B;
   1340 }
   1341 
   1342 
   1343 static
   1344 APInt minAPInt(APInt A, APInt B) {
   1345   return A.slt(B) ? A : B;
   1346 }
   1347 
   1348 
   1349 // exactSIVtest -
   1350 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i],
   1351 // where i is an induction variable, c1 and c2 are loop invariant, and a1
   1352 // and a2 are constant, we can solve it exactly using an algorithm developed
   1353 // by Banerjee and Wolfe. See Section 2.5.3 in
   1354 //
   1355 //        Optimizing Supercompilers for Supercomputers
   1356 //        Michael Wolfe
   1357 //        MIT Press, 1989
   1358 //
   1359 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc),
   1360 // so use them if possible. They're also a bit better with symbolics and,
   1361 // in the case of the strong SIV test, can compute Distances.
   1362 //
   1363 // Return true if dependence disproved.
   1364 bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff,
   1365                                       const SCEV *DstCoeff,
   1366                                       const SCEV *SrcConst,
   1367                                       const SCEV *DstConst,
   1368                                       const Loop *CurLoop,
   1369                                       unsigned Level,
   1370                                       FullDependence &Result,
   1371                                       Constraint &NewConstraint) const {
   1372   DEBUG(dbgs() << "\tExact SIV test\n");
   1373   DEBUG(dbgs() << "\t    SrcCoeff = " << *SrcCoeff << " = AM\n");
   1374   DEBUG(dbgs() << "\t    DstCoeff = " << *DstCoeff << " = BM\n");
   1375   DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
   1376   DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
   1377   ++ExactSIVapplications;
   1378   assert(0 < Level && Level <= CommonLevels && "Level out of range");
   1379   Level--;
   1380   Result.Consistent = false;
   1381   const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
   1382   DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
   1383   NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff),
   1384                         Delta, CurLoop);
   1385   const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
   1386   const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
   1387   const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
   1388   if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
   1389     return false;
   1390 
   1391   // find gcd
   1392   APInt G, X, Y;
   1393   APInt AM = ConstSrcCoeff->getValue()->getValue();
   1394   APInt BM = ConstDstCoeff->getValue()->getValue();
   1395   unsigned Bits = AM.getBitWidth();
   1396   if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) {
   1397     // gcd doesn't divide Delta, no dependence
   1398     ++ExactSIVindependence;
   1399     ++ExactSIVsuccesses;
   1400     return true;
   1401   }
   1402 
   1403   DEBUG(dbgs() << "\t    X = " << X << ", Y = " << Y << "\n");
   1404 
   1405   // since SCEV construction normalizes, LM = 0
   1406   APInt UM(Bits, 1, true);
   1407   bool UMvalid = false;
   1408   // UM is perhaps unavailable, let's check
   1409   if (const SCEVConstant *CUB =
   1410       collectConstantUpperBound(CurLoop, Delta->getType())) {
   1411     UM = CUB->getValue()->getValue();
   1412     DEBUG(dbgs() << "\t    UM = " << UM << "\n");
   1413     UMvalid = true;
   1414   }
   1415 
   1416   APInt TU(APInt::getSignedMaxValue(Bits));
   1417   APInt TL(APInt::getSignedMinValue(Bits));
   1418 
   1419   // test(BM/G, LM-X) and test(-BM/G, X-UM)
   1420   APInt TMUL = BM.sdiv(G);
   1421   if (TMUL.sgt(0)) {
   1422     TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL));
   1423     DEBUG(dbgs() << "\t    TL = " << TL << "\n");
   1424     if (UMvalid) {
   1425       TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL));
   1426       DEBUG(dbgs() << "\t    TU = " << TU << "\n");
   1427     }
   1428   }
   1429   else {
   1430     TU = minAPInt(TU, floorOfQuotient(-X, TMUL));
   1431     DEBUG(dbgs() << "\t    TU = " << TU << "\n");
   1432     if (UMvalid) {
   1433       TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL));
   1434       DEBUG(dbgs() << "\t    TL = " << TL << "\n");
   1435     }
   1436   }
   1437 
   1438   // test(AM/G, LM-Y) and test(-AM/G, Y-UM)
   1439   TMUL = AM.sdiv(G);
   1440   if (TMUL.sgt(0)) {
   1441     TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL));
   1442     DEBUG(dbgs() << "\t    TL = " << TL << "\n");
   1443     if (UMvalid) {
   1444       TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL));
   1445       DEBUG(dbgs() << "\t    TU = " << TU << "\n");
   1446     }
   1447   }
   1448   else {
   1449     TU = minAPInt(TU, floorOfQuotient(-Y, TMUL));
   1450     DEBUG(dbgs() << "\t    TU = " << TU << "\n");
   1451     if (UMvalid) {
   1452       TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL));
   1453       DEBUG(dbgs() << "\t    TL = " << TL << "\n");
   1454     }
   1455   }
   1456   if (TL.sgt(TU)) {
   1457     ++ExactSIVindependence;
   1458     ++ExactSIVsuccesses;
   1459     return true;
   1460   }
   1461 
   1462   // explore directions
   1463   unsigned NewDirection = Dependence::DVEntry::NONE;
   1464 
   1465   // less than
   1466   APInt SaveTU(TU); // save these
   1467   APInt SaveTL(TL);
   1468   DEBUG(dbgs() << "\t    exploring LT direction\n");
   1469   TMUL = AM - BM;
   1470   if (TMUL.sgt(0)) {
   1471     TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL));
   1472     DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
   1473   }
   1474   else {
   1475     TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL));
   1476     DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
   1477   }
   1478   if (TL.sle(TU)) {
   1479     NewDirection |= Dependence::DVEntry::LT;
   1480     ++ExactSIVsuccesses;
   1481   }
   1482 
   1483   // equal
   1484   TU = SaveTU; // restore
   1485   TL = SaveTL;
   1486   DEBUG(dbgs() << "\t    exploring EQ direction\n");
   1487   if (TMUL.sgt(0)) {
   1488     TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL));
   1489     DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
   1490   }
   1491   else {
   1492     TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL));
   1493     DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
   1494   }
   1495   TMUL = BM - AM;
   1496   if (TMUL.sgt(0)) {
   1497     TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL));
   1498     DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
   1499   }
   1500   else {
   1501     TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL));
   1502     DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
   1503   }
   1504   if (TL.sle(TU)) {
   1505     NewDirection |= Dependence::DVEntry::EQ;
   1506     ++ExactSIVsuccesses;
   1507   }
   1508 
   1509   // greater than
   1510   TU = SaveTU; // restore
   1511   TL = SaveTL;
   1512   DEBUG(dbgs() << "\t    exploring GT direction\n");
   1513   if (TMUL.sgt(0)) {
   1514     TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL));
   1515     DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
   1516   }
   1517   else {
   1518     TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL));
   1519     DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
   1520   }
   1521   if (TL.sle(TU)) {
   1522     NewDirection |= Dependence::DVEntry::GT;
   1523     ++ExactSIVsuccesses;
   1524   }
   1525 
   1526   // finished
   1527   Result.DV[Level].Direction &= NewDirection;
   1528   if (Result.DV[Level].Direction == Dependence::DVEntry::NONE)
   1529     ++ExactSIVindependence;
   1530   return Result.DV[Level].Direction == Dependence::DVEntry::NONE;
   1531 }
   1532 
   1533 
   1534 
   1535 // Return true if the divisor evenly divides the dividend.
   1536 static
   1537 bool isRemainderZero(const SCEVConstant *Dividend,
   1538                      const SCEVConstant *Divisor) {
   1539   APInt ConstDividend = Dividend->getValue()->getValue();
   1540   APInt ConstDivisor = Divisor->getValue()->getValue();
   1541   return ConstDividend.srem(ConstDivisor) == 0;
   1542 }
   1543 
   1544 
   1545 // weakZeroSrcSIVtest -
   1546 // From the paper, Practical Dependence Testing, Section 4.2.2
   1547 //
   1548 // When we have a pair of subscripts of the form [c1] and [c2 + a*i],
   1549 // where i is an induction variable, c1 and c2 are loop invariant,
   1550 // and a is a constant, we can solve it exactly using the
   1551 // Weak-Zero SIV test.
   1552 //
   1553 // Given
   1554 //
   1555 //    c1 = c2 + a*i
   1556 //
   1557 // we get
   1558 //
   1559 //    (c1 - c2)/a = i
   1560 //
   1561 // If i is not an integer, there's no dependence.
   1562 // If i < 0 or > UB, there's no dependence.
   1563 // If i = 0, the direction is <= and peeling the
   1564 // 1st iteration will break the dependence.
   1565 // If i = UB, the direction is >= and peeling the
   1566 // last iteration will break the dependence.
   1567 // Otherwise, the direction is *.
   1568 //
   1569 // Can prove independence. Failing that, we can sometimes refine
   1570 // the directions. Can sometimes show that first or last
   1571 // iteration carries all the dependences (so worth peeling).
   1572 //
   1573 // (see also weakZeroDstSIVtest)
   1574 //
   1575 // Return true if dependence disproved.
   1576 bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff,
   1577                                             const SCEV *SrcConst,
   1578                                             const SCEV *DstConst,
   1579                                             const Loop *CurLoop,
   1580                                             unsigned Level,
   1581                                             FullDependence &Result,
   1582                                             Constraint &NewConstraint) const {
   1583   // For the WeakSIV test, it's possible the loop isn't common to
   1584   // the Src and Dst loops. If it isn't, then there's no need to
   1585   // record a direction.
   1586   DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n");
   1587   DEBUG(dbgs() << "\t    DstCoeff = " << *DstCoeff << "\n");
   1588   DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
   1589   DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
   1590   ++WeakZeroSIVapplications;
   1591   assert(0 < Level && Level <= MaxLevels && "Level out of range");
   1592   Level--;
   1593   Result.Consistent = false;
   1594   const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
   1595   NewConstraint.setLine(SE->getConstant(Delta->getType(), 0),
   1596                         DstCoeff, Delta, CurLoop);
   1597   DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
   1598   if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) {
   1599     if (Level < CommonLevels) {
   1600       Result.DV[Level].Direction &= Dependence::DVEntry::LE;
   1601       Result.DV[Level].PeelFirst = true;
   1602       ++WeakZeroSIVsuccesses;
   1603     }
   1604     return false; // dependences caused by first iteration
   1605   }
   1606   const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
   1607   if (!ConstCoeff)
   1608     return false;
   1609   const SCEV *AbsCoeff =
   1610     SE->isKnownNegative(ConstCoeff) ?
   1611     SE->getNegativeSCEV(ConstCoeff) : ConstCoeff;
   1612   const SCEV *NewDelta =
   1613     SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta;
   1614 
   1615   // check that Delta/SrcCoeff < iteration count
   1616   // really check NewDelta < count*AbsCoeff
   1617   if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
   1618     DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
   1619     const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
   1620     if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
   1621       ++WeakZeroSIVindependence;
   1622       ++WeakZeroSIVsuccesses;
   1623       return true;
   1624     }
   1625     if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
   1626       // dependences caused by last iteration
   1627       if (Level < CommonLevels) {
   1628         Result.DV[Level].Direction &= Dependence::DVEntry::GE;
   1629         Result.DV[Level].PeelLast = true;
   1630         ++WeakZeroSIVsuccesses;
   1631       }
   1632       return false;
   1633     }
   1634   }
   1635 
   1636   // check that Delta/SrcCoeff >= 0
   1637   // really check that NewDelta >= 0
   1638   if (SE->isKnownNegative(NewDelta)) {
   1639     // No dependence, newDelta < 0
   1640     ++WeakZeroSIVindependence;
   1641     ++WeakZeroSIVsuccesses;
   1642     return true;
   1643   }
   1644 
   1645   // if SrcCoeff doesn't divide Delta, then no dependence
   1646   if (isa<SCEVConstant>(Delta) &&
   1647       !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {
   1648     ++WeakZeroSIVindependence;
   1649     ++WeakZeroSIVsuccesses;
   1650     return true;
   1651   }
   1652   return false;
   1653 }
   1654 
   1655 
   1656 // weakZeroDstSIVtest -
   1657 // From the paper, Practical Dependence Testing, Section 4.2.2
   1658 //
   1659 // When we have a pair of subscripts of the form [c1 + a*i] and [c2],
   1660 // where i is an induction variable, c1 and c2 are loop invariant,
   1661 // and a is a constant, we can solve it exactly using the
   1662 // Weak-Zero SIV test.
   1663 //
   1664 // Given
   1665 //
   1666 //    c1 + a*i = c2
   1667 //
   1668 // we get
   1669 //
   1670 //    i = (c2 - c1)/a
   1671 //
   1672 // If i is not an integer, there's no dependence.
   1673 // If i < 0 or > UB, there's no dependence.
   1674 // If i = 0, the direction is <= and peeling the
   1675 // 1st iteration will break the dependence.
   1676 // If i = UB, the direction is >= and peeling the
   1677 // last iteration will break the dependence.
   1678 // Otherwise, the direction is *.
   1679 //
   1680 // Can prove independence. Failing that, we can sometimes refine
   1681 // the directions. Can sometimes show that first or last
   1682 // iteration carries all the dependences (so worth peeling).
   1683 //
   1684 // (see also weakZeroSrcSIVtest)
   1685 //
   1686 // Return true if dependence disproved.
   1687 bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff,
   1688                                             const SCEV *SrcConst,
   1689                                             const SCEV *DstConst,
   1690                                             const Loop *CurLoop,
   1691                                             unsigned Level,
   1692                                             FullDependence &Result,
   1693                                             Constraint &NewConstraint) const {
   1694   // For the WeakSIV test, it's possible the loop isn't common to the
   1695   // Src and Dst loops. If it isn't, then there's no need to record a direction.
   1696   DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
   1697   DEBUG(dbgs() << "\t    SrcCoeff = " << *SrcCoeff << "\n");
   1698   DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
   1699   DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
   1700   ++WeakZeroSIVapplications;
   1701   assert(0 < Level && Level <= SrcLevels && "Level out of range");
   1702   Level--;
   1703   Result.Consistent = false;
   1704   const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
   1705   NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0),
   1706                         Delta, CurLoop);
   1707   DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
   1708   if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) {
   1709     if (Level < CommonLevels) {
   1710       Result.DV[Level].Direction &= Dependence::DVEntry::LE;
   1711       Result.DV[Level].PeelFirst = true;
   1712       ++WeakZeroSIVsuccesses;
   1713     }
   1714     return false; // dependences caused by first iteration
   1715   }
   1716   const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
   1717   if (!ConstCoeff)
   1718     return false;
   1719   const SCEV *AbsCoeff =
   1720     SE->isKnownNegative(ConstCoeff) ?
   1721     SE->getNegativeSCEV(ConstCoeff) : ConstCoeff;
   1722   const SCEV *NewDelta =
   1723     SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta;
   1724 
   1725   // check that Delta/SrcCoeff < iteration count
   1726   // really check NewDelta < count*AbsCoeff
   1727   if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
   1728     DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
   1729     const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
   1730     if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
   1731       ++WeakZeroSIVindependence;
   1732       ++WeakZeroSIVsuccesses;
   1733       return true;
   1734     }
   1735     if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
   1736       // dependences caused by last iteration
   1737       if (Level < CommonLevels) {
   1738         Result.DV[Level].Direction &= Dependence::DVEntry::GE;
   1739         Result.DV[Level].PeelLast = true;
   1740         ++WeakZeroSIVsuccesses;
   1741       }
   1742       return false;
   1743     }
   1744   }
   1745 
   1746   // check that Delta/SrcCoeff >= 0
   1747   // really check that NewDelta >= 0
   1748   if (SE->isKnownNegative(NewDelta)) {
   1749     // No dependence, newDelta < 0
   1750     ++WeakZeroSIVindependence;
   1751     ++WeakZeroSIVsuccesses;
   1752     return true;
   1753   }
   1754 
   1755   // if SrcCoeff doesn't divide Delta, then no dependence
   1756   if (isa<SCEVConstant>(Delta) &&
   1757       !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {
   1758     ++WeakZeroSIVindependence;
   1759     ++WeakZeroSIVsuccesses;
   1760     return true;
   1761   }
   1762   return false;
   1763 }
   1764 
   1765 
   1766 // exactRDIVtest - Tests the RDIV subscript pair for dependence.
   1767 // Things of the form [c1 + a*i] and [c2 + b*j],
   1768 // where i and j are induction variable, c1 and c2 are loop invariant,
   1769 // and a and b are constants.
   1770 // Returns true if any possible dependence is disproved.
   1771 // Marks the result as inconsistent.
   1772 // Works in some cases that symbolicRDIVtest doesn't, and vice versa.
   1773 bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff,
   1774                                        const SCEV *DstCoeff,
   1775                                        const SCEV *SrcConst,
   1776                                        const SCEV *DstConst,
   1777                                        const Loop *SrcLoop,
   1778                                        const Loop *DstLoop,
   1779                                        FullDependence &Result) const {
   1780   DEBUG(dbgs() << "\tExact RDIV test\n");
   1781   DEBUG(dbgs() << "\t    SrcCoeff = " << *SrcCoeff << " = AM\n");
   1782   DEBUG(dbgs() << "\t    DstCoeff = " << *DstCoeff << " = BM\n");
   1783   DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
   1784   DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
   1785   ++ExactRDIVapplications;
   1786   Result.Consistent = false;
   1787   const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
   1788   DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
   1789   const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
   1790   const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
   1791   const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
   1792   if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
   1793     return false;
   1794 
   1795   // find gcd
   1796   APInt G, X, Y;
   1797   APInt AM = ConstSrcCoeff->getValue()->getValue();
   1798   APInt BM = ConstDstCoeff->getValue()->getValue();
   1799   unsigned Bits = AM.getBitWidth();
   1800   if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) {
   1801     // gcd doesn't divide Delta, no dependence
   1802     ++ExactRDIVindependence;
   1803     return true;
   1804   }
   1805 
   1806   DEBUG(dbgs() << "\t    X = " << X << ", Y = " << Y << "\n");
   1807 
   1808   // since SCEV construction seems to normalize, LM = 0
   1809   APInt SrcUM(Bits, 1, true);
   1810   bool SrcUMvalid = false;
   1811   // SrcUM is perhaps unavailable, let's check
   1812   if (const SCEVConstant *UpperBound =
   1813       collectConstantUpperBound(SrcLoop, Delta->getType())) {
   1814     SrcUM = UpperBound->getValue()->getValue();
   1815     DEBUG(dbgs() << "\t    SrcUM = " << SrcUM << "\n");
   1816     SrcUMvalid = true;
   1817   }
   1818 
   1819   APInt DstUM(Bits, 1, true);
   1820   bool DstUMvalid = false;
   1821   // UM is perhaps unavailable, let's check
   1822   if (const SCEVConstant *UpperBound =
   1823       collectConstantUpperBound(DstLoop, Delta->getType())) {
   1824     DstUM = UpperBound->getValue()->getValue();
   1825     DEBUG(dbgs() << "\t    DstUM = " << DstUM << "\n");
   1826     DstUMvalid = true;
   1827   }
   1828 
   1829   APInt TU(APInt::getSignedMaxValue(Bits));
   1830   APInt TL(APInt::getSignedMinValue(Bits));
   1831 
   1832   // test(BM/G, LM-X) and test(-BM/G, X-UM)
   1833   APInt TMUL = BM.sdiv(G);
   1834   if (TMUL.sgt(0)) {
   1835     TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL));
   1836     DEBUG(dbgs() << "\t    TL = " << TL << "\n");
   1837     if (SrcUMvalid) {
   1838       TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL));
   1839       DEBUG(dbgs() << "\t    TU = " << TU << "\n");
   1840     }
   1841   }
   1842   else {
   1843     TU = minAPInt(TU, floorOfQuotient(-X, TMUL));
   1844     DEBUG(dbgs() << "\t    TU = " << TU << "\n");
   1845     if (SrcUMvalid) {
   1846       TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL));
   1847       DEBUG(dbgs() << "\t    TL = " << TL << "\n");
   1848     }
   1849   }
   1850 
   1851   // test(AM/G, LM-Y) and test(-AM/G, Y-UM)
   1852   TMUL = AM.sdiv(G);
   1853   if (TMUL.sgt(0)) {
   1854     TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL));
   1855     DEBUG(dbgs() << "\t    TL = " << TL << "\n");
   1856     if (DstUMvalid) {
   1857       TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL));
   1858       DEBUG(dbgs() << "\t    TU = " << TU << "\n");
   1859     }
   1860   }
   1861   else {
   1862     TU = minAPInt(TU, floorOfQuotient(-Y, TMUL));
   1863     DEBUG(dbgs() << "\t    TU = " << TU << "\n");
   1864     if (DstUMvalid) {
   1865       TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL));
   1866       DEBUG(dbgs() << "\t    TL = " << TL << "\n");
   1867     }
   1868   }
   1869   if (TL.sgt(TU))
   1870     ++ExactRDIVindependence;
   1871   return TL.sgt(TU);
   1872 }
   1873 
   1874 
   1875 // symbolicRDIVtest -
   1876 // In Section 4.5 of the Practical Dependence Testing paper,the authors
   1877 // introduce a special case of Banerjee's Inequalities (also called the
   1878 // Extreme-Value Test) that can handle some of the SIV and RDIV cases,
   1879 // particularly cases with symbolics. Since it's only able to disprove
   1880 // dependence (not compute distances or directions), we'll use it as a
   1881 // fall back for the other tests.
   1882 //
   1883 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
   1884 // where i and j are induction variables and c1 and c2 are loop invariants,
   1885 // we can use the symbolic tests to disprove some dependences, serving as a
   1886 // backup for the RDIV test. Note that i and j can be the same variable,
   1887 // letting this test serve as a backup for the various SIV tests.
   1888 //
   1889 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some
   1890 //  0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized)
   1891 // loop bounds for the i and j loops, respectively. So, ...
   1892 //
   1893 // c1 + a1*i = c2 + a2*j
   1894 // a1*i - a2*j = c2 - c1
   1895 //
   1896 // To test for a dependence, we compute c2 - c1 and make sure it's in the
   1897 // range of the maximum and minimum possible values of a1*i - a2*j.
   1898 // Considering the signs of a1 and a2, we have 4 possible cases:
   1899 //
   1900 // 1) If a1 >= 0 and a2 >= 0, then
   1901 //        a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0
   1902 //              -a2*N2 <= c2 - c1 <= a1*N1
   1903 //
   1904 // 2) If a1 >= 0 and a2 <= 0, then
   1905 //        a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2
   1906 //                  0 <= c2 - c1 <= a1*N1 - a2*N2
   1907 //
   1908 // 3) If a1 <= 0 and a2 >= 0, then
   1909 //        a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0
   1910 //        a1*N1 - a2*N2 <= c2 - c1 <= 0
   1911 //
   1912 // 4) If a1 <= 0 and a2 <= 0, then
   1913 //        a1*N1 - a2*0  <= c2 - c1 <= a1*0 - a2*N2
   1914 //        a1*N1         <= c2 - c1 <=       -a2*N2
   1915 //
   1916 // return true if dependence disproved
   1917 bool DependenceAnalysis::symbolicRDIVtest(const SCEV *A1,
   1918                                           const SCEV *A2,
   1919                                           const SCEV *C1,
   1920                                           const SCEV *C2,
   1921                                           const Loop *Loop1,
   1922                                           const Loop *Loop2) const {
   1923   ++SymbolicRDIVapplications;
   1924   DEBUG(dbgs() << "\ttry symbolic RDIV test\n");
   1925   DEBUG(dbgs() << "\t    A1 = " << *A1);
   1926   DEBUG(dbgs() << ", type = " << *A1->getType() << "\n");
   1927   DEBUG(dbgs() << "\t    A2 = " << *A2 << "\n");
   1928   DEBUG(dbgs() << "\t    C1 = " << *C1 << "\n");
   1929   DEBUG(dbgs() << "\t    C2 = " << *C2 << "\n");
   1930   const SCEV *N1 = collectUpperBound(Loop1, A1->getType());
   1931   const SCEV *N2 = collectUpperBound(Loop2, A1->getType());
   1932   DEBUG(if (N1) dbgs() << "\t    N1 = " << *N1 << "\n");
   1933   DEBUG(if (N2) dbgs() << "\t    N2 = " << *N2 << "\n");
   1934   const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1);
   1935   const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2);
   1936   DEBUG(dbgs() << "\t    C2 - C1 = " << *C2_C1 << "\n");
   1937   DEBUG(dbgs() << "\t    C1 - C2 = " << *C1_C2 << "\n");
   1938   if (SE->isKnownNonNegative(A1)) {
   1939     if (SE->isKnownNonNegative(A2)) {
   1940       // A1 >= 0 && A2 >= 0
   1941       if (N1) {
   1942         // make sure that c2 - c1 <= a1*N1
   1943         const SCEV *A1N1 = SE->getMulExpr(A1, N1);
   1944         DEBUG(dbgs() << "\t    A1*N1 = " << *A1N1 << "\n");
   1945         if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) {
   1946           ++SymbolicRDIVindependence;
   1947           return true;
   1948         }
   1949       }
   1950       if (N2) {
   1951         // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2
   1952         const SCEV *A2N2 = SE->getMulExpr(A2, N2);
   1953         DEBUG(dbgs() << "\t    A2*N2 = " << *A2N2 << "\n");
   1954         if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) {
   1955           ++SymbolicRDIVindependence;
   1956           return true;
   1957         }
   1958       }
   1959     }
   1960     else if (SE->isKnownNonPositive(A2)) {
   1961       // a1 >= 0 && a2 <= 0
   1962       if (N1 && N2) {
   1963         // make sure that c2 - c1 <= a1*N1 - a2*N2
   1964         const SCEV *A1N1 = SE->getMulExpr(A1, N1);
   1965         const SCEV *A2N2 = SE->getMulExpr(A2, N2);
   1966         const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
   1967         DEBUG(dbgs() << "\t    A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
   1968         if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) {
   1969           ++SymbolicRDIVindependence;
   1970           return true;
   1971         }
   1972       }
   1973       // make sure that 0 <= c2 - c1
   1974       if (SE->isKnownNegative(C2_C1)) {
   1975         ++SymbolicRDIVindependence;
   1976         return true;
   1977       }
   1978     }
   1979   }
   1980   else if (SE->isKnownNonPositive(A1)) {
   1981     if (SE->isKnownNonNegative(A2)) {
   1982       // a1 <= 0 && a2 >= 0
   1983       if (N1 && N2) {
   1984         // make sure that a1*N1 - a2*N2 <= c2 - c1
   1985         const SCEV *A1N1 = SE->getMulExpr(A1, N1);
   1986         const SCEV *A2N2 = SE->getMulExpr(A2, N2);
   1987         const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
   1988         DEBUG(dbgs() << "\t    A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
   1989         if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) {
   1990           ++SymbolicRDIVindependence;
   1991           return true;
   1992         }
   1993       }
   1994       // make sure that c2 - c1 <= 0
   1995       if (SE->isKnownPositive(C2_C1)) {
   1996         ++SymbolicRDIVindependence;
   1997         return true;
   1998       }
   1999     }
   2000     else if (SE->isKnownNonPositive(A2)) {
   2001       // a1 <= 0 && a2 <= 0
   2002       if (N1) {
   2003         // make sure that a1*N1 <= c2 - c1
   2004         const SCEV *A1N1 = SE->getMulExpr(A1, N1);
   2005         DEBUG(dbgs() << "\t    A1*N1 = " << *A1N1 << "\n");
   2006         if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) {
   2007           ++SymbolicRDIVindependence;
   2008           return true;
   2009         }
   2010       }
   2011       if (N2) {
   2012         // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2
   2013         const SCEV *A2N2 = SE->getMulExpr(A2, N2);
   2014         DEBUG(dbgs() << "\t    A2*N2 = " << *A2N2 << "\n");
   2015         if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) {
   2016           ++SymbolicRDIVindependence;
   2017           return true;
   2018         }
   2019       }
   2020     }
   2021   }
   2022   return false;
   2023 }
   2024 
   2025 
   2026 // testSIV -
   2027 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i]
   2028 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and
   2029 // a2 are constant, we attack it with an SIV test. While they can all be
   2030 // solved with the Exact SIV test, it's worthwhile to use simpler tests when
   2031 // they apply; they're cheaper and sometimes more precise.
   2032 //
   2033 // Return true if dependence disproved.
   2034 bool DependenceAnalysis::testSIV(const SCEV *Src,
   2035                                  const SCEV *Dst,
   2036                                  unsigned &Level,
   2037                                  FullDependence &Result,
   2038                                  Constraint &NewConstraint,
   2039                                  const SCEV *&SplitIter) const {
   2040   DEBUG(dbgs() << "    src = " << *Src << "\n");
   2041   DEBUG(dbgs() << "    dst = " << *Dst << "\n");
   2042   const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
   2043   const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
   2044   if (SrcAddRec && DstAddRec) {
   2045     const SCEV *SrcConst = SrcAddRec->getStart();
   2046     const SCEV *DstConst = DstAddRec->getStart();
   2047     const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
   2048     const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
   2049     const Loop *CurLoop = SrcAddRec->getLoop();
   2050     assert(CurLoop == DstAddRec->getLoop() &&
   2051            "both loops in SIV should be same");
   2052     Level = mapSrcLoop(CurLoop);
   2053     bool disproven;
   2054     if (SrcCoeff == DstCoeff)
   2055       disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
   2056                                 Level, Result, NewConstraint);
   2057     else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff))
   2058       disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
   2059                                       Level, Result, NewConstraint, SplitIter);
   2060     else
   2061       disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop,
   2062                                Level, Result, NewConstraint);
   2063     return disproven ||
   2064       gcdMIVtest(Src, Dst, Result) ||
   2065       symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop);
   2066   }
   2067   if (SrcAddRec) {
   2068     const SCEV *SrcConst = SrcAddRec->getStart();
   2069     const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
   2070     const SCEV *DstConst = Dst;
   2071     const Loop *CurLoop = SrcAddRec->getLoop();
   2072     Level = mapSrcLoop(CurLoop);
   2073     return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
   2074                               Level, Result, NewConstraint) ||
   2075       gcdMIVtest(Src, Dst, Result);
   2076   }
   2077   if (DstAddRec) {
   2078     const SCEV *DstConst = DstAddRec->getStart();
   2079     const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
   2080     const SCEV *SrcConst = Src;
   2081     const Loop *CurLoop = DstAddRec->getLoop();
   2082     Level = mapDstLoop(CurLoop);
   2083     return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst,
   2084                               CurLoop, Level, Result, NewConstraint) ||
   2085       gcdMIVtest(Src, Dst, Result);
   2086   }
   2087   llvm_unreachable("SIV test expected at least one AddRec");
   2088   return false;
   2089 }
   2090 
   2091 
   2092 // testRDIV -
   2093 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
   2094 // where i and j are induction variables, c1 and c2 are loop invariant,
   2095 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation
   2096 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test.
   2097 // It doesn't make sense to talk about distance or direction in this case,
   2098 // so there's no point in making special versions of the Strong SIV test or
   2099 // the Weak-crossing SIV test.
   2100 //
   2101 // With minor algebra, this test can also be used for things like
   2102 // [c1 + a1*i + a2*j][c2].
   2103 //
   2104 // Return true if dependence disproved.
   2105 bool DependenceAnalysis::testRDIV(const SCEV *Src,
   2106                                   const SCEV *Dst,
   2107                                   FullDependence &Result) const {
   2108   // we have 3 possible situations here:
   2109   //   1) [a*i + b] and [c*j + d]
   2110   //   2) [a*i + c*j + b] and [d]
   2111   //   3) [b] and [a*i + c*j + d]
   2112   // We need to find what we've got and get organized
   2113 
   2114   const SCEV *SrcConst, *DstConst;
   2115   const SCEV *SrcCoeff, *DstCoeff;
   2116   const Loop *SrcLoop, *DstLoop;
   2117 
   2118   DEBUG(dbgs() << "    src = " << *Src << "\n");
   2119   DEBUG(dbgs() << "    dst = " << *Dst << "\n");
   2120   const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
   2121   const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
   2122   if (SrcAddRec && DstAddRec) {
   2123     SrcConst = SrcAddRec->getStart();
   2124     SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
   2125     SrcLoop = SrcAddRec->getLoop();
   2126     DstConst = DstAddRec->getStart();
   2127     DstCoeff = DstAddRec->getStepRecurrence(*SE);
   2128     DstLoop = DstAddRec->getLoop();
   2129   }
   2130   else if (SrcAddRec) {
   2131     if (const SCEVAddRecExpr *tmpAddRec =
   2132         dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) {
   2133       SrcConst = tmpAddRec->getStart();
   2134       SrcCoeff = tmpAddRec->getStepRecurrence(*SE);
   2135       SrcLoop = tmpAddRec->getLoop();
   2136       DstConst = Dst;
   2137       DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE));
   2138       DstLoop = SrcAddRec->getLoop();
   2139     }
   2140     else
   2141       llvm_unreachable("RDIV reached by surprising SCEVs");
   2142   }
   2143   else if (DstAddRec) {
   2144     if (const SCEVAddRecExpr *tmpAddRec =
   2145         dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) {
   2146       DstConst = tmpAddRec->getStart();
   2147       DstCoeff = tmpAddRec->getStepRecurrence(*SE);
   2148       DstLoop = tmpAddRec->getLoop();
   2149       SrcConst = Src;
   2150       SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE));
   2151       SrcLoop = DstAddRec->getLoop();
   2152     }
   2153     else
   2154       llvm_unreachable("RDIV reached by surprising SCEVs");
   2155   }
   2156   else
   2157     llvm_unreachable("RDIV expected at least one AddRec");
   2158   return exactRDIVtest(SrcCoeff, DstCoeff,
   2159                        SrcConst, DstConst,
   2160                        SrcLoop, DstLoop,
   2161                        Result) ||
   2162     gcdMIVtest(Src, Dst, Result) ||
   2163     symbolicRDIVtest(SrcCoeff, DstCoeff,
   2164                      SrcConst, DstConst,
   2165                      SrcLoop, DstLoop);
   2166 }
   2167 
   2168 
   2169 // Tests the single-subscript MIV pair (Src and Dst) for dependence.
   2170 // Return true if dependence disproved.
   2171 // Can sometimes refine direction vectors.
   2172 bool DependenceAnalysis::testMIV(const SCEV *Src,
   2173                                  const SCEV *Dst,
   2174                                  const SmallBitVector &Loops,
   2175                                  FullDependence &Result) const {
   2176   DEBUG(dbgs() << "    src = " << *Src << "\n");
   2177   DEBUG(dbgs() << "    dst = " << *Dst << "\n");
   2178   Result.Consistent = false;
   2179   return gcdMIVtest(Src, Dst, Result) ||
   2180     banerjeeMIVtest(Src, Dst, Loops, Result);
   2181 }
   2182 
   2183 
   2184 // Given a product, e.g., 10*X*Y, returns the first constant operand,
   2185 // in this case 10. If there is no constant part, returns NULL.
   2186 static
   2187 const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) {
   2188   for (unsigned Op = 0, Ops = Product->getNumOperands(); Op < Ops; Op++) {
   2189     if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Product->getOperand(Op)))
   2190       return Constant;
   2191   }
   2192   return NULL;
   2193 }
   2194 
   2195 
   2196 //===----------------------------------------------------------------------===//
   2197 // gcdMIVtest -
   2198 // Tests an MIV subscript pair for dependence.
   2199 // Returns true if any possible dependence is disproved.
   2200 // Marks the result as inconsistent.
   2201 // Can sometimes disprove the equal direction for 1 or more loops,
   2202 // as discussed in Michael Wolfe's book,
   2203 // High Performance Compilers for Parallel Computing, page 235.
   2204 //
   2205 // We spend some effort (code!) to handle cases like
   2206 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables,
   2207 // but M and N are just loop-invariant variables.
   2208 // This should help us handle linearized subscripts;
   2209 // also makes this test a useful backup to the various SIV tests.
   2210 //
   2211 // It occurs to me that the presence of loop-invariant variables
   2212 // changes the nature of the test from "greatest common divisor"
   2213 // to "a common divisor".
   2214 bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
   2215                                     const SCEV *Dst,
   2216                                     FullDependence &Result) const {
   2217   DEBUG(dbgs() << "starting gcd\n");
   2218   ++GCDapplications;
   2219   unsigned BitWidth = SE->getTypeSizeInBits(Src->getType());
   2220   APInt RunningGCD = APInt::getNullValue(BitWidth);
   2221 
   2222   // Examine Src coefficients.
   2223   // Compute running GCD and record source constant.
   2224   // Because we're looking for the constant at the end of the chain,
   2225   // we can't quit the loop just because the GCD == 1.
   2226   const SCEV *Coefficients = Src;
   2227   while (const SCEVAddRecExpr *AddRec =
   2228          dyn_cast<SCEVAddRecExpr>(Coefficients)) {
   2229     const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
   2230     const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff);
   2231     if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff))
   2232       // If the coefficient is the product of a constant and other stuff,
   2233       // we can use the constant in the GCD computation.
   2234       Constant = getConstantPart(Product);
   2235     if (!Constant)
   2236       return false;
   2237     APInt ConstCoeff = Constant->getValue()->getValue();
   2238     RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
   2239     Coefficients = AddRec->getStart();
   2240   }
   2241   const SCEV *SrcConst = Coefficients;
   2242 
   2243   // Examine Dst coefficients.
   2244   // Compute running GCD and record destination constant.
   2245   // Because we're looking for the constant at the end of the chain,
   2246   // we can't quit the loop just because the GCD == 1.
   2247   Coefficients = Dst;
   2248   while (const SCEVAddRecExpr *AddRec =
   2249          dyn_cast<SCEVAddRecExpr>(Coefficients)) {
   2250     const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
   2251     const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff);
   2252     if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff))
   2253       // If the coefficient is the product of a constant and other stuff,
   2254       // we can use the constant in the GCD computation.
   2255       Constant = getConstantPart(Product);
   2256     if (!Constant)
   2257       return false;
   2258     APInt ConstCoeff = Constant->getValue()->getValue();
   2259     RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
   2260     Coefficients = AddRec->getStart();
   2261   }
   2262   const SCEV *DstConst = Coefficients;
   2263 
   2264   APInt ExtraGCD = APInt::getNullValue(BitWidth);
   2265   const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
   2266   DEBUG(dbgs() << "    Delta = " << *Delta << "\n");
   2267   const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta);
   2268   if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) {
   2269     // If Delta is a sum of products, we may be able to make further progress.
   2270     for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) {
   2271       const SCEV *Operand = Sum->getOperand(Op);
   2272       if (isa<SCEVConstant>(Operand)) {
   2273         assert(!Constant && "Surprised to find multiple constants");
   2274         Constant = cast<SCEVConstant>(Operand);
   2275       }
   2276       else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) {
   2277         // Search for constant operand to participate in GCD;
   2278         // If none found; return false.
   2279         const SCEVConstant *ConstOp = getConstantPart(Product);
   2280         if (!ConstOp)
   2281           return false;
   2282         APInt ConstOpValue = ConstOp->getValue()->getValue();
   2283         ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD,
   2284                                                    ConstOpValue.abs());
   2285       }
   2286       else
   2287         return false;
   2288     }
   2289   }
   2290   if (!Constant)
   2291     return false;
   2292   APInt ConstDelta = cast<SCEVConstant>(Constant)->getValue()->getValue();
   2293   DEBUG(dbgs() << "    ConstDelta = " << ConstDelta << "\n");
   2294   if (ConstDelta == 0)
   2295     return false;
   2296   RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD);
   2297   DEBUG(dbgs() << "    RunningGCD = " << RunningGCD << "\n");
   2298   APInt Remainder = ConstDelta.srem(RunningGCD);
   2299   if (Remainder != 0) {
   2300     ++GCDindependence;
   2301     return true;
   2302   }
   2303 
   2304   // Try to disprove equal directions.
   2305   // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1],
   2306   // the code above can't disprove the dependence because the GCD = 1.
   2307   // So we consider what happen if i = i' and what happens if j = j'.
   2308   // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1],
   2309   // which is infeasible, so we can disallow the = direction for the i level.
   2310   // Setting j = j' doesn't help matters, so we end up with a direction vector
   2311   // of [<>, *]
   2312   //
   2313   // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5],
   2314   // we need to remember that the constant part is 5 and the RunningGCD should
   2315   // be initialized to ExtraGCD = 30.
   2316   DEBUG(dbgs() << "    ExtraGCD = " << ExtraGCD << '\n');
   2317 
   2318   bool Improved = false;
   2319   Coefficients = Src;
   2320   while (const SCEVAddRecExpr *AddRec =
   2321          dyn_cast<SCEVAddRecExpr>(Coefficients)) {
   2322     Coefficients = AddRec->getStart();
   2323     const Loop *CurLoop = AddRec->getLoop();
   2324     RunningGCD = ExtraGCD;
   2325     const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE);
   2326     const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff);
   2327     const SCEV *Inner = Src;
   2328     while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) {
   2329       AddRec = cast<SCEVAddRecExpr>(Inner);
   2330       const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
   2331       if (CurLoop == AddRec->getLoop())
   2332         ; // SrcCoeff == Coeff
   2333       else {
   2334         if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff))
   2335           // If the coefficient is the product of a constant and other stuff,
   2336           // we can use the constant in the GCD computation.
   2337           Constant = getConstantPart(Product);
   2338         else
   2339           Constant = cast<SCEVConstant>(Coeff);
   2340         APInt ConstCoeff = Constant->getValue()->getValue();
   2341         RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
   2342       }
   2343       Inner = AddRec->getStart();
   2344     }
   2345     Inner = Dst;
   2346     while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) {
   2347       AddRec = cast<SCEVAddRecExpr>(Inner);
   2348       const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
   2349       if (CurLoop == AddRec->getLoop())
   2350         DstCoeff = Coeff;
   2351       else {
   2352         if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff))
   2353           // If the coefficient is the product of a constant and other stuff,
   2354           // we can use the constant in the GCD computation.
   2355           Constant = getConstantPart(Product);
   2356         else
   2357           Constant = cast<SCEVConstant>(Coeff);
   2358         APInt ConstCoeff = Constant->getValue()->getValue();
   2359         RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
   2360       }
   2361       Inner = AddRec->getStart();
   2362     }
   2363     Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff);
   2364     if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Delta))
   2365       // If the coefficient is the product of a constant and other stuff,
   2366       // we can use the constant in the GCD computation.
   2367       Constant = getConstantPart(Product);
   2368     else if (isa<SCEVConstant>(Delta))
   2369       Constant = cast<SCEVConstant>(Delta);
   2370     else {
   2371       // The difference of the two coefficients might not be a product
   2372       // or constant, in which case we give up on this direction.
   2373       continue;
   2374     }
   2375     APInt ConstCoeff = Constant->getValue()->getValue();
   2376     RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
   2377     DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");
   2378     if (RunningGCD != 0) {
   2379       Remainder = ConstDelta.srem(RunningGCD);
   2380       DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n");
   2381       if (Remainder != 0) {
   2382         unsigned Level = mapSrcLoop(CurLoop);
   2383         Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ);
   2384         Improved = true;
   2385       }
   2386     }
   2387   }
   2388   if (Improved)
   2389     ++GCDsuccesses;
   2390   DEBUG(dbgs() << "all done\n");
   2391   return false;
   2392 }
   2393 
   2394 
   2395 //===----------------------------------------------------------------------===//
   2396 // banerjeeMIVtest -
   2397 // Use Banerjee's Inequalities to test an MIV subscript pair.
   2398 // (Wolfe, in the race-car book, calls this the Extreme Value Test.)
   2399 // Generally follows the discussion in Section 2.5.2 of
   2400 //
   2401 //    Optimizing Supercompilers for Supercomputers
   2402 //    Michael Wolfe
   2403 //
   2404 // The inequalities given on page 25 are simplified in that loops are
   2405 // normalized so that the lower bound is always 0 and the stride is always 1.
   2406 // For example, Wolfe gives
   2407 //
   2408 //     LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
   2409 //
   2410 // where A_k is the coefficient of the kth index in the source subscript,
   2411 // B_k is the coefficient of the kth index in the destination subscript,
   2412 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth
   2413 // index, and N_k is the stride of the kth index. Since all loops are normalized
   2414 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the
   2415 // equation to
   2416 //
   2417 //     LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1
   2418 //            = (A^-_k - B_k)^- (U_k - 1)  - B_k
   2419 //
   2420 // Similar simplifications are possible for the other equations.
   2421 //
   2422 // When we can't determine the number of iterations for a loop,
   2423 // we use NULL as an indicator for the worst case, infinity.
   2424 // When computing the upper bound, NULL denotes +inf;
   2425 // for the lower bound, NULL denotes -inf.
   2426 //
   2427 // Return true if dependence disproved.
   2428 bool DependenceAnalysis::banerjeeMIVtest(const SCEV *Src,
   2429                                          const SCEV *Dst,
   2430                                          const SmallBitVector &Loops,
   2431                                          FullDependence &Result) const {
   2432   DEBUG(dbgs() << "starting Banerjee\n");
   2433   ++BanerjeeApplications;
   2434   DEBUG(dbgs() << "    Src = " << *Src << '\n');
   2435   const SCEV *A0;
   2436   CoefficientInfo *A = collectCoeffInfo(Src, true, A0);
   2437   DEBUG(dbgs() << "    Dst = " << *Dst << '\n');
   2438   const SCEV *B0;
   2439   CoefficientInfo *B = collectCoeffInfo(Dst, false, B0);
   2440   BoundInfo *Bound = new BoundInfo[MaxLevels + 1];
   2441   const SCEV *Delta = SE->getMinusSCEV(B0, A0);
   2442   DEBUG(dbgs() << "\tDelta = " << *Delta << '\n');
   2443 
   2444   // Compute bounds for all the * directions.
   2445   DEBUG(dbgs() << "\tBounds[*]\n");
   2446   for (unsigned K = 1; K <= MaxLevels; ++K) {
   2447     Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations;
   2448     Bound[K].Direction = Dependence::DVEntry::ALL;
   2449     Bound[K].DirSet = Dependence::DVEntry::NONE;
   2450     findBoundsALL(A, B, Bound, K);
   2451 #ifndef NDEBUG
   2452     DEBUG(dbgs() << "\t    " << K << '\t');
   2453     if (Bound[K].Lower[Dependence::DVEntry::ALL])
   2454       DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t');
   2455     else
   2456       DEBUG(dbgs() << "-inf\t");
   2457     if (Bound[K].Upper[Dependence::DVEntry::ALL])
   2458       DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n');
   2459     else
   2460       DEBUG(dbgs() << "+inf\n");
   2461 #endif
   2462   }
   2463 
   2464   // Test the *, *, *, ... case.
   2465   bool Disproved = false;
   2466   if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) {
   2467     // Explore the direction vector hierarchy.
   2468     unsigned DepthExpanded = 0;
   2469     unsigned NewDeps = exploreDirections(1, A, B, Bound,
   2470                                          Loops, DepthExpanded, Delta);
   2471     if (NewDeps > 0) {
   2472       bool Improved = false;
   2473       for (unsigned K = 1; K <= CommonLevels; ++K) {
   2474         if (Loops[K]) {
   2475           unsigned Old = Result.DV[K - 1].Direction;
   2476           Result.DV[K - 1].Direction = Old & Bound[K].DirSet;
   2477           Improved |= Old != Result.DV[K - 1].Direction;
   2478           if (!Result.DV[K - 1].Direction) {
   2479             Improved = false;
   2480             Disproved = true;
   2481             break;
   2482           }
   2483         }
   2484       }
   2485       if (Improved)
   2486         ++BanerjeeSuccesses;
   2487     }
   2488     else {
   2489       ++BanerjeeIndependence;
   2490       Disproved = true;
   2491     }
   2492   }
   2493   else {
   2494     ++BanerjeeIndependence;
   2495     Disproved = true;
   2496   }
   2497   delete [] Bound;
   2498   delete [] A;
   2499   delete [] B;
   2500   return Disproved;
   2501 }
   2502 
   2503 
   2504 // Hierarchically expands the direction vector
   2505 // search space, combining the directions of discovered dependences
   2506 // in the DirSet field of Bound. Returns the number of distinct
   2507 // dependences discovered. If the dependence is disproved,
   2508 // it will return 0.
   2509 unsigned DependenceAnalysis::exploreDirections(unsigned Level,
   2510                                                CoefficientInfo *A,
   2511                                                CoefficientInfo *B,
   2512                                                BoundInfo *Bound,
   2513                                                const SmallBitVector &Loops,
   2514                                                unsigned &DepthExpanded,
   2515                                                const SCEV *Delta) const {
   2516   if (Level > CommonLevels) {
   2517     // record result
   2518     DEBUG(dbgs() << "\t[");
   2519     for (unsigned K = 1; K <= CommonLevels; ++K) {
   2520       if (Loops[K]) {
   2521         Bound[K].DirSet |= Bound[K].Direction;
   2522 #ifndef NDEBUG
   2523         switch (Bound[K].Direction) {
   2524         case Dependence::DVEntry::LT:
   2525           DEBUG(dbgs() << " <");
   2526           break;
   2527         case Dependence::DVEntry::EQ:
   2528           DEBUG(dbgs() << " =");
   2529           break;
   2530         case Dependence::DVEntry::GT:
   2531           DEBUG(dbgs() << " >");
   2532           break;
   2533         case Dependence::DVEntry::ALL:
   2534           DEBUG(dbgs() << " *");
   2535           break;
   2536         default:
   2537           llvm_unreachable("unexpected Bound[K].Direction");
   2538         }
   2539 #endif
   2540       }
   2541     }
   2542     DEBUG(dbgs() << " ]\n");
   2543     return 1;
   2544   }
   2545   if (Loops[Level]) {
   2546     if (Level > DepthExpanded) {
   2547       DepthExpanded = Level;
   2548       // compute bounds for <, =, > at current level
   2549       findBoundsLT(A, B, Bound, Level);
   2550       findBoundsGT(A, B, Bound, Level);
   2551       findBoundsEQ(A, B, Bound, Level);
   2552 #ifndef NDEBUG
   2553       DEBUG(dbgs() << "\tBound for level = " << Level << '\n');
   2554       DEBUG(dbgs() << "\t    <\t");
   2555       if (Bound[Level].Lower[Dependence::DVEntry::LT])
   2556         DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t');
   2557       else
   2558         DEBUG(dbgs() << "-inf\t");
   2559       if (Bound[Level].Upper[Dependence::DVEntry::LT])
   2560         DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n');
   2561       else
   2562         DEBUG(dbgs() << "+inf\n");
   2563       DEBUG(dbgs() << "\t    =\t");
   2564       if (Bound[Level].Lower[Dependence::DVEntry::EQ])
   2565         DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t');
   2566       else
   2567         DEBUG(dbgs() << "-inf\t");
   2568       if (Bound[Level].Upper[Dependence::DVEntry::EQ])
   2569         DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n');
   2570       else
   2571         DEBUG(dbgs() << "+inf\n");
   2572       DEBUG(dbgs() << "\t    >\t");
   2573       if (Bound[Level].Lower[Dependence::DVEntry::GT])
   2574         DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t');
   2575       else
   2576         DEBUG(dbgs() << "-inf\t");
   2577       if (Bound[Level].Upper[Dependence::DVEntry::GT])
   2578         DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n');
   2579       else
   2580         DEBUG(dbgs() << "+inf\n");
   2581 #endif
   2582     }
   2583 
   2584     unsigned NewDeps = 0;
   2585 
   2586     // test bounds for <, *, *, ...
   2587     if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta))
   2588       NewDeps += exploreDirections(Level + 1, A, B, Bound,
   2589                                    Loops, DepthExpanded, Delta);
   2590 
   2591     // Test bounds for =, *, *, ...
   2592     if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta))
   2593       NewDeps += exploreDirections(Level + 1, A, B, Bound,
   2594                                    Loops, DepthExpanded, Delta);
   2595 
   2596     // test bounds for >, *, *, ...
   2597     if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta))
   2598       NewDeps += exploreDirections(Level + 1, A, B, Bound,
   2599                                    Loops, DepthExpanded, Delta);
   2600 
   2601     Bound[Level].Direction = Dependence::DVEntry::ALL;
   2602     return NewDeps;
   2603   }
   2604   else
   2605     return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta);
   2606 }
   2607 
   2608 
   2609 // Returns true iff the current bounds are plausible.
   2610 bool DependenceAnalysis::testBounds(unsigned char DirKind,
   2611                                     unsigned Level,
   2612                                     BoundInfo *Bound,
   2613                                     const SCEV *Delta) const {
   2614   Bound[Level].Direction = DirKind;
   2615   if (const SCEV *LowerBound = getLowerBound(Bound))
   2616     if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta))
   2617       return false;
   2618   if (const SCEV *UpperBound = getUpperBound(Bound))
   2619     if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound))
   2620       return false;
   2621   return true;
   2622 }
   2623 
   2624 
   2625 // Computes the upper and lower bounds for level K
   2626 // using the * direction. Records them in Bound.
   2627 // Wolfe gives the equations
   2628 //
   2629 //    LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k
   2630 //    UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k
   2631 //
   2632 // Since we normalize loops, we can simplify these equations to
   2633 //
   2634 //    LB^*_k = (A^-_k - B^+_k)U_k
   2635 //    UB^*_k = (A^+_k - B^-_k)U_k
   2636 //
   2637 // We must be careful to handle the case where the upper bound is unknown.
   2638 // Note that the lower bound is always <= 0
   2639 // and the upper bound is always >= 0.
   2640 void DependenceAnalysis::findBoundsALL(CoefficientInfo *A,
   2641                                        CoefficientInfo *B,
   2642                                        BoundInfo *Bound,
   2643                                        unsigned K) const {
   2644   Bound[K].Lower[Dependence::DVEntry::ALL] = NULL; // Default value = -infinity.
   2645   Bound[K].Upper[Dependence::DVEntry::ALL] = NULL; // Default value = +infinity.
   2646   if (Bound[K].Iterations) {
   2647     Bound[K].Lower[Dependence::DVEntry::ALL] =
   2648       SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart),
   2649                      Bound[K].Iterations);
   2650     Bound[K].Upper[Dependence::DVEntry::ALL] =
   2651       SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart),
   2652                      Bound[K].Iterations);
   2653   }
   2654   else {
   2655     // If the difference is 0, we won't need to know the number of iterations.
   2656     if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart))
   2657       Bound[K].Lower[Dependence::DVEntry::ALL] =
   2658         SE->getConstant(A[K].Coeff->getType(), 0);
   2659     if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart))
   2660       Bound[K].Upper[Dependence::DVEntry::ALL] =
   2661         SE->getConstant(A[K].Coeff->getType(), 0);
   2662   }
   2663 }
   2664 
   2665 
   2666 // Computes the upper and lower bounds for level K
   2667 // using the = direction. Records them in Bound.
   2668 // Wolfe gives the equations
   2669 //
   2670 //    LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k
   2671 //    UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k
   2672 //
   2673 // Since we normalize loops, we can simplify these equations to
   2674 //
   2675 //    LB^=_k = (A_k - B_k)^- U_k
   2676 //    UB^=_k = (A_k - B_k)^+ U_k
   2677 //
   2678 // We must be careful to handle the case where the upper bound is unknown.
   2679 // Note that the lower bound is always <= 0
   2680 // and the upper bound is always >= 0.
   2681 void DependenceAnalysis::findBoundsEQ(CoefficientInfo *A,
   2682                                       CoefficientInfo *B,
   2683                                       BoundInfo *Bound,
   2684                                       unsigned K) const {
   2685   Bound[K].Lower[Dependence::DVEntry::EQ] = NULL; // Default value = -infinity.
   2686   Bound[K].Upper[Dependence::DVEntry::EQ] = NULL; // Default value = +infinity.
   2687   if (Bound[K].Iterations) {
   2688     const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
   2689     const SCEV *NegativePart = getNegativePart(Delta);
   2690     Bound[K].Lower[Dependence::DVEntry::EQ] =
   2691       SE->getMulExpr(NegativePart, Bound[K].Iterations);
   2692     const SCEV *PositivePart = getPositivePart(Delta);
   2693     Bound[K].Upper[Dependence::DVEntry::EQ] =
   2694       SE->getMulExpr(PositivePart, Bound[K].Iterations);
   2695   }
   2696   else {
   2697     // If the positive/negative part of the difference is 0,
   2698     // we won't need to know the number of iterations.
   2699     const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
   2700     const SCEV *NegativePart = getNegativePart(Delta);
   2701     if (NegativePart->isZero())
   2702       Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero
   2703     const SCEV *PositivePart = getPositivePart(Delta);
   2704     if (PositivePart->isZero())
   2705       Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero
   2706   }
   2707 }
   2708 
   2709 
   2710 // Computes the upper and lower bounds for level K
   2711 // using the < direction. Records them in Bound.
   2712 // Wolfe gives the equations
   2713 //
   2714 //    LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
   2715 //    UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
   2716 //
   2717 // Since we normalize loops, we can simplify these equations to
   2718 //
   2719 //    LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k
   2720 //    UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k
   2721 //
   2722 // We must be careful to handle the case where the upper bound is unknown.
   2723 void DependenceAnalysis::findBoundsLT(CoefficientInfo *A,
   2724                                       CoefficientInfo *B,
   2725                                       BoundInfo *Bound,
   2726                                       unsigned K) const {
   2727   Bound[K].Lower[Dependence::DVEntry::LT] = NULL; // Default value = -infinity.
   2728   Bound[K].Upper[Dependence::DVEntry::LT] = NULL; // Default value = +infinity.
   2729   if (Bound[K].Iterations) {
   2730     const SCEV *Iter_1 =
   2731       SE->getMinusSCEV(Bound[K].Iterations,
   2732                        SE->getConstant(Bound[K].Iterations->getType(), 1));
   2733     const SCEV *NegPart =
   2734       getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
   2735     Bound[K].Lower[Dependence::DVEntry::LT] =
   2736       SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff);
   2737     const SCEV *PosPart =
   2738       getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
   2739     Bound[K].Upper[Dependence::DVEntry::LT] =
   2740       SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff);
   2741   }
   2742   else {
   2743     // If the positive/negative part of the difference is 0,
   2744     // we won't need to know the number of iterations.
   2745     const SCEV *NegPart =
   2746       getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
   2747     if (NegPart->isZero())
   2748       Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
   2749     const SCEV *PosPart =
   2750       getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
   2751     if (PosPart->isZero())
   2752       Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
   2753   }
   2754 }
   2755 
   2756 
   2757 // Computes the upper and lower bounds for level K
   2758 // using the > direction. Records them in Bound.
   2759 // Wolfe gives the equations
   2760 //
   2761 //    LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
   2762 //    UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
   2763 //
   2764 // Since we normalize loops, we can simplify these equations to
   2765 //
   2766 //    LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k
   2767 //    UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k
   2768 //
   2769 // We must be careful to handle the case where the upper bound is unknown.
   2770 void DependenceAnalysis::findBoundsGT(CoefficientInfo *A,
   2771                                       CoefficientInfo *B,
   2772                                       BoundInfo *Bound,
   2773                                       unsigned K) const {
   2774   Bound[K].Lower[Dependence::DVEntry::GT] = NULL; // Default value = -infinity.
   2775   Bound[K].Upper[Dependence::DVEntry::GT] = NULL; // Default value = +infinity.
   2776   if (Bound[K].Iterations) {
   2777     const SCEV *Iter_1 =
   2778       SE->getMinusSCEV(Bound[K].Iterations,
   2779                        SE->getConstant(Bound[K].Iterations->getType(), 1));
   2780     const SCEV *NegPart =
   2781       getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
   2782     Bound[K].Lower[Dependence::DVEntry::GT] =
   2783       SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff);
   2784     const SCEV *PosPart =
   2785       getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
   2786     Bound[K].Upper[Dependence::DVEntry::GT] =
   2787       SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff);
   2788   }
   2789   else {
   2790     // If the positive/negative part of the difference is 0,
   2791     // we won't need to know the number of iterations.
   2792     const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
   2793     if (NegPart->isZero())
   2794       Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff;
   2795     const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
   2796     if (PosPart->isZero())
   2797       Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff;
   2798   }
   2799 }
   2800 
   2801 
   2802 // X^+ = max(X, 0)
   2803 const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const {
   2804   return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0));
   2805 }
   2806 
   2807 
   2808 // X^- = min(X, 0)
   2809 const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const {
   2810   return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0));
   2811 }
   2812 
   2813 
   2814 // Walks through the subscript,
   2815 // collecting each coefficient, the associated loop bounds,
   2816 // and recording its positive and negative parts for later use.
   2817 DependenceAnalysis::CoefficientInfo *
   2818 DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript,
   2819                                      bool SrcFlag,
   2820                                      const SCEV *&Constant) const {
   2821   const SCEV *Zero = SE->getConstant(Subscript->getType(), 0);
   2822   CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1];
   2823   for (unsigned K = 1; K <= MaxLevels; ++K) {
   2824     CI[K].Coeff = Zero;
   2825     CI[K].PosPart = Zero;
   2826     CI[K].NegPart = Zero;
   2827     CI[K].Iterations = NULL;
   2828   }
   2829   while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) {
   2830     const Loop *L = AddRec->getLoop();
   2831     unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L);
   2832     CI[K].Coeff = AddRec->getStepRecurrence(*SE);
   2833     CI[K].PosPart = getPositivePart(CI[K].Coeff);
   2834     CI[K].NegPart = getNegativePart(CI[K].Coeff);
   2835     CI[K].Iterations = collectUpperBound(L, Subscript->getType());
   2836     Subscript = AddRec->getStart();
   2837   }
   2838   Constant = Subscript;
   2839 #ifndef NDEBUG
   2840   DEBUG(dbgs() << "\tCoefficient Info\n");
   2841   for (unsigned K = 1; K <= MaxLevels; ++K) {
   2842     DEBUG(dbgs() << "\t    " << K << "\t" << *CI[K].Coeff);
   2843     DEBUG(dbgs() << "\tPos Part = ");
   2844     DEBUG(dbgs() << *CI[K].PosPart);
   2845     DEBUG(dbgs() << "\tNeg Part = ");
   2846     DEBUG(dbgs() << *CI[K].NegPart);
   2847     DEBUG(dbgs() << "\tUpper Bound = ");
   2848     if (CI[K].Iterations)
   2849       DEBUG(dbgs() << *CI[K].Iterations);
   2850     else
   2851       DEBUG(dbgs() << "+inf");
   2852     DEBUG(dbgs() << '\n');
   2853   }
   2854   DEBUG(dbgs() << "\t    Constant = " << *Subscript << '\n');
   2855 #endif
   2856   return CI;
   2857 }
   2858 
   2859 
   2860 // Looks through all the bounds info and
   2861 // computes the lower bound given the current direction settings
   2862 // at each level. If the lower bound for any level is -inf,
   2863 // the result is -inf.
   2864 const SCEV *DependenceAnalysis::getLowerBound(BoundInfo *Bound) const {
   2865   const SCEV *Sum = Bound[1].Lower[Bound[1].Direction];
   2866   for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
   2867     if (Bound[K].Lower[Bound[K].Direction])
   2868       Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]);
   2869     else
   2870       Sum = NULL;
   2871   }
   2872   return Sum;
   2873 }
   2874 
   2875 
   2876 // Looks through all the bounds info and
   2877 // computes the upper bound given the current direction settings
   2878 // at each level. If the upper bound at any level is +inf,
   2879 // the result is +inf.
   2880 const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const {
   2881   const SCEV *Sum = Bound[1].Upper[Bound[1].Direction];
   2882   for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
   2883     if (Bound[K].Upper[Bound[K].Direction])
   2884       Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]);
   2885     else
   2886       Sum = NULL;
   2887   }
   2888   return Sum;
   2889 }
   2890 
   2891 
   2892 //===----------------------------------------------------------------------===//
   2893 // Constraint manipulation for Delta test.
   2894 
   2895 // Given a linear SCEV,
   2896 // return the coefficient (the step)
   2897 // corresponding to the specified loop.
   2898 // If there isn't one, return 0.
   2899 // For example, given a*i + b*j + c*k, zeroing the coefficient
   2900 // corresponding to the j loop would yield b.
   2901 const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr,
   2902                                                 const Loop *TargetLoop)  const {
   2903   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
   2904   if (!AddRec)
   2905     return SE->getConstant(Expr->getType(), 0);
   2906   if (AddRec->getLoop() == TargetLoop)
   2907     return AddRec->getStepRecurrence(*SE);
   2908   return findCoefficient(AddRec->getStart(), TargetLoop);
   2909 }
   2910 
   2911 
   2912 // Given a linear SCEV,
   2913 // return the SCEV given by zeroing out the coefficient
   2914 // corresponding to the specified loop.
   2915 // For example, given a*i + b*j + c*k, zeroing the coefficient
   2916 // corresponding to the j loop would yield a*i + c*k.
   2917 const SCEV *DependenceAnalysis::zeroCoefficient(const SCEV *Expr,
   2918                                                 const Loop *TargetLoop)  const {
   2919   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
   2920   if (!AddRec)
   2921     return Expr; // ignore
   2922   if (AddRec->getLoop() == TargetLoop)
   2923     return AddRec->getStart();
   2924   return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop),
   2925                            AddRec->getStepRecurrence(*SE),
   2926                            AddRec->getLoop(),
   2927                            AddRec->getNoWrapFlags());
   2928 }
   2929 
   2930 
   2931 // Given a linear SCEV Expr,
   2932 // return the SCEV given by adding some Value to the
   2933 // coefficient corresponding to the specified TargetLoop.
   2934 // For example, given a*i + b*j + c*k, adding 1 to the coefficient
   2935 // corresponding to the j loop would yield a*i + (b+1)*j + c*k.
   2936 const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr,
   2937                                                  const Loop *TargetLoop,
   2938                                                  const SCEV *Value)  const {
   2939   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
   2940   if (!AddRec) // create a new addRec
   2941     return SE->getAddRecExpr(Expr,
   2942                              Value,
   2943                              TargetLoop,
   2944                              SCEV::FlagAnyWrap); // Worst case, with no info.
   2945   if (AddRec->getLoop() == TargetLoop) {
   2946     const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value);
   2947     if (Sum->isZero())
   2948       return AddRec->getStart();
   2949     return SE->getAddRecExpr(AddRec->getStart(),
   2950                              Sum,
   2951                              AddRec->getLoop(),
   2952                              AddRec->getNoWrapFlags());
   2953   }
   2954   if (SE->isLoopInvariant(AddRec, TargetLoop))
   2955     return SE->getAddRecExpr(AddRec,
   2956 			     Value,
   2957 			     TargetLoop,
   2958 			     SCEV::FlagAnyWrap);
   2959   return SE->getAddRecExpr(addToCoefficient(AddRec->getStart(),
   2960                                             TargetLoop, Value),
   2961                            AddRec->getStepRecurrence(*SE),
   2962                            AddRec->getLoop(),
   2963                            AddRec->getNoWrapFlags());
   2964 }
   2965 
   2966 
   2967 // Review the constraints, looking for opportunities
   2968 // to simplify a subscript pair (Src and Dst).
   2969 // Return true if some simplification occurs.
   2970 // If the simplification isn't exact (that is, if it is conservative
   2971 // in terms of dependence), set consistent to false.
   2972 // Corresponds to Figure 5 from the paper
   2973 //
   2974 //            Practical Dependence Testing
   2975 //            Goff, Kennedy, Tseng
   2976 //            PLDI 1991
   2977 bool DependenceAnalysis::propagate(const SCEV *&Src,
   2978                                    const SCEV *&Dst,
   2979                                    SmallBitVector &Loops,
   2980                                    SmallVectorImpl<Constraint> &Constraints,
   2981                                    bool &Consistent) {
   2982   bool Result = false;
   2983   for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) {
   2984     DEBUG(dbgs() << "\t    Constraint[" << LI << "] is");
   2985     DEBUG(Constraints[LI].dump(dbgs()));
   2986     if (Constraints[LI].isDistance())
   2987       Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent);
   2988     else if (Constraints[LI].isLine())
   2989       Result |= propagateLine(Src, Dst, Constraints[LI], Consistent);
   2990     else if (Constraints[LI].isPoint())
   2991       Result |= propagatePoint(Src, Dst, Constraints[LI]);
   2992   }
   2993   return Result;
   2994 }
   2995 
   2996 
   2997 // Attempt to propagate a distance
   2998 // constraint into a subscript pair (Src and Dst).
   2999 // Return true if some simplification occurs.
   3000 // If the simplification isn't exact (that is, if it is conservative
   3001 // in terms of dependence), set consistent to false.
   3002 bool DependenceAnalysis::propagateDistance(const SCEV *&Src,
   3003                                            const SCEV *&Dst,
   3004                                            Constraint &CurConstraint,
   3005                                            bool &Consistent) {
   3006   const Loop *CurLoop = CurConstraint.getAssociatedLoop();
   3007   DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
   3008   const SCEV *A_K = findCoefficient(Src, CurLoop);
   3009   if (A_K->isZero())
   3010     return false;
   3011   const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD());
   3012   Src = SE->getMinusSCEV(Src, DA_K);
   3013   Src = zeroCoefficient(Src, CurLoop);
   3014   DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
   3015   DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
   3016   Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K));
   3017   DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
   3018   if (!findCoefficient(Dst, CurLoop)->isZero())
   3019     Consistent = false;
   3020   return true;
   3021 }
   3022 
   3023 
   3024 // Attempt to propagate a line
   3025 // constraint into a subscript pair (Src and Dst).
   3026 // Return true if some simplification occurs.
   3027 // If the simplification isn't exact (that is, if it is conservative
   3028 // in terms of dependence), set consistent to false.
   3029 bool DependenceAnalysis::propagateLine(const SCEV *&Src,
   3030                                        const SCEV *&Dst,
   3031                                        Constraint &CurConstraint,
   3032                                        bool &Consistent) {
   3033   const Loop *CurLoop = CurConstraint.getAssociatedLoop();
   3034   const SCEV *A = CurConstraint.getA();
   3035   const SCEV *B = CurConstraint.getB();
   3036   const SCEV *C = CurConstraint.getC();
   3037   DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n");
   3038   DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n");
   3039   DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n");
   3040   if (A->isZero()) {
   3041     const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B);
   3042     const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
   3043     if (!Bconst || !Cconst) return false;
   3044     APInt Beta = Bconst->getValue()->getValue();
   3045     APInt Charlie = Cconst->getValue()->getValue();
   3046     APInt CdivB = Charlie.sdiv(Beta);
   3047     assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B");
   3048     const SCEV *AP_K = findCoefficient(Dst, CurLoop);
   3049     //    Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
   3050     Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
   3051     Dst = zeroCoefficient(Dst, CurLoop);
   3052     if (!findCoefficient(Src, CurLoop)->isZero())
   3053       Consistent = false;
   3054   }
   3055   else if (B->isZero()) {
   3056     const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
   3057     const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
   3058     if (!Aconst || !Cconst) return false;
   3059     APInt Alpha = Aconst->getValue()->getValue();
   3060     APInt Charlie = Cconst->getValue()->getValue();
   3061     APInt CdivA = Charlie.sdiv(Alpha);
   3062     assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
   3063     const SCEV *A_K = findCoefficient(Src, CurLoop);
   3064     Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
   3065     Src = zeroCoefficient(Src, CurLoop);
   3066     if (!findCoefficient(Dst, CurLoop)->isZero())
   3067       Consistent = false;
   3068   }
   3069   else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) {
   3070     const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
   3071     const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
   3072     if (!Aconst || !Cconst) return false;
   3073     APInt Alpha = Aconst->getValue()->getValue();
   3074     APInt Charlie = Cconst->getValue()->getValue();
   3075     APInt CdivA = Charlie.sdiv(Alpha);
   3076     assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
   3077     const SCEV *A_K = findCoefficient(Src, CurLoop);
   3078     Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
   3079     Src = zeroCoefficient(Src, CurLoop);
   3080     Dst = addToCoefficient(Dst, CurLoop, A_K);
   3081     if (!findCoefficient(Dst, CurLoop)->isZero())
   3082       Consistent = false;
   3083   }
   3084   else {
   3085     // paper is incorrect here, or perhaps just misleading
   3086     const SCEV *A_K = findCoefficient(Src, CurLoop);
   3087     Src = SE->getMulExpr(Src, A);
   3088     Dst = SE->getMulExpr(Dst, A);
   3089     Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C));
   3090     Src = zeroCoefficient(Src, CurLoop);
   3091     Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B));
   3092     if (!findCoefficient(Dst, CurLoop)->isZero())
   3093       Consistent = false;
   3094   }
   3095   DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n");
   3096   DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n");
   3097   return true;
   3098 }
   3099 
   3100 
   3101 // Attempt to propagate a point
   3102 // constraint into a subscript pair (Src and Dst).
   3103 // Return true if some simplification occurs.
   3104 bool DependenceAnalysis::propagatePoint(const SCEV *&Src,
   3105                                         const SCEV *&Dst,
   3106                                         Constraint &CurConstraint) {
   3107   const Loop *CurLoop = CurConstraint.getAssociatedLoop();
   3108   const SCEV *A_K = findCoefficient(Src, CurLoop);
   3109   const SCEV *AP_K = findCoefficient(Dst, CurLoop);
   3110   const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX());
   3111   const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY());
   3112   DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
   3113   Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K));
   3114   Src = zeroCoefficient(Src, CurLoop);
   3115   DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
   3116   DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
   3117   Dst = zeroCoefficient(Dst, CurLoop);
   3118   DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
   3119   return true;
   3120 }
   3121 
   3122 
   3123 // Update direction vector entry based on the current constraint.
   3124 void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level,
   3125                                          const Constraint &CurConstraint
   3126                                          ) const {
   3127   DEBUG(dbgs() << "\tUpdate direction, constraint =");
   3128   DEBUG(CurConstraint.dump(dbgs()));
   3129   if (CurConstraint.isAny())
   3130     ; // use defaults
   3131   else if (CurConstraint.isDistance()) {
   3132     // this one is consistent, the others aren't
   3133     Level.Scalar = false;
   3134     Level.Distance = CurConstraint.getD();
   3135     unsigned NewDirection = Dependence::DVEntry::NONE;
   3136     if (!SE->isKnownNonZero(Level.Distance)) // if may be zero
   3137       NewDirection = Dependence::DVEntry::EQ;
   3138     if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive
   3139       NewDirection |= Dependence::DVEntry::LT;
   3140     if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative
   3141       NewDirection |= Dependence::DVEntry::GT;
   3142     Level.Direction &= NewDirection;
   3143   }
   3144   else if (CurConstraint.isLine()) {
   3145     Level.Scalar = false;
   3146     Level.Distance = NULL;
   3147     // direction should be accurate
   3148   }
   3149   else if (CurConstraint.isPoint()) {
   3150     Level.Scalar = false;
   3151     Level.Distance = NULL;
   3152     unsigned NewDirection = Dependence::DVEntry::NONE;
   3153     if (!isKnownPredicate(CmpInst::ICMP_NE,
   3154                           CurConstraint.getY(),
   3155                           CurConstraint.getX()))
   3156       // if X may be = Y
   3157       NewDirection |= Dependence::DVEntry::EQ;
   3158     if (!isKnownPredicate(CmpInst::ICMP_SLE,
   3159                           CurConstraint.getY(),
   3160                           CurConstraint.getX()))
   3161       // if Y may be > X
   3162       NewDirection |= Dependence::DVEntry::LT;
   3163     if (!isKnownPredicate(CmpInst::ICMP_SGE,
   3164                           CurConstraint.getY(),
   3165                           CurConstraint.getX()))
   3166       // if Y may be < X
   3167       NewDirection |= Dependence::DVEntry::GT;
   3168     Level.Direction &= NewDirection;
   3169   }
   3170   else
   3171     llvm_unreachable("constraint has unexpected kind");
   3172 }
   3173 
   3174 
   3175 //===----------------------------------------------------------------------===//
   3176 
   3177 #ifndef NDEBUG
   3178 // For debugging purposes, dump a small bit vector to dbgs().
   3179 static void dumpSmallBitVector(SmallBitVector &BV) {
   3180   dbgs() << "{";
   3181   for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) {
   3182     dbgs() << VI;
   3183     if (BV.find_next(VI) >= 0)
   3184       dbgs() << ' ';
   3185   }
   3186   dbgs() << "}\n";
   3187 }
   3188 #endif
   3189 
   3190 
   3191 // depends -
   3192 // Returns NULL if there is no dependence.
   3193 // Otherwise, return a Dependence with as many details as possible.
   3194 // Corresponds to Section 3.1 in the paper
   3195 //
   3196 //            Practical Dependence Testing
   3197 //            Goff, Kennedy, Tseng
   3198 //            PLDI 1991
   3199 //
   3200 // Care is required to keep the routine below, getSplitIteration(),
   3201 // up to date with respect to this routine.
   3202 Dependence *DependenceAnalysis::depends(Instruction *Src,
   3203                                         Instruction *Dst,
   3204                                         bool PossiblyLoopIndependent) {
   3205   if (Src == Dst)
   3206     PossiblyLoopIndependent = false;
   3207 
   3208   if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) ||
   3209       (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory()))
   3210     // if both instructions don't reference memory, there's no dependence
   3211     return NULL;
   3212 
   3213   if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) {
   3214     // can only analyze simple loads and stores, i.e., no calls, invokes, etc.
   3215     DEBUG(dbgs() << "can only handle simple loads and stores\n");
   3216     return new Dependence(Src, Dst);
   3217   }
   3218 
   3219   Value *SrcPtr = getPointerOperand(Src);
   3220   Value *DstPtr = getPointerOperand(Dst);
   3221 
   3222   switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) {
   3223   case AliasAnalysis::MayAlias:
   3224   case AliasAnalysis::PartialAlias:
   3225     // cannot analyse objects if we don't understand their aliasing.
   3226     DEBUG(dbgs() << "can't analyze may or partial alias\n");
   3227     return new Dependence(Src, Dst);
   3228   case AliasAnalysis::NoAlias:
   3229     // If the objects noalias, they are distinct, accesses are independent.
   3230     DEBUG(dbgs() << "no alias\n");
   3231     return NULL;
   3232   case AliasAnalysis::MustAlias:
   3233     break; // The underlying objects alias; test accesses for dependence.
   3234   }
   3235 
   3236   // establish loop nesting levels
   3237   establishNestingLevels(Src, Dst);
   3238   DEBUG(dbgs() << "    common nesting levels = " << CommonLevels << "\n");
   3239   DEBUG(dbgs() << "    maximum nesting levels = " << MaxLevels << "\n");
   3240 
   3241   FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
   3242   ++TotalArrayPairs;
   3243 
   3244   // See if there are GEPs we can use.
   3245   bool UsefulGEP = false;
   3246   GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr);
   3247   GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr);
   3248   if (SrcGEP && DstGEP &&
   3249       SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) {
   3250     const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand());
   3251     const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand());
   3252     DEBUG(dbgs() << "    SrcPtrSCEV = " << *SrcPtrSCEV << "\n");
   3253     DEBUG(dbgs() << "    DstPtrSCEV = " << *DstPtrSCEV << "\n");
   3254 
   3255     UsefulGEP =
   3256       isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
   3257       isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent()));
   3258   }
   3259   unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1;
   3260   SmallVector<Subscript, 4> Pair(Pairs);
   3261   if (UsefulGEP) {
   3262     DEBUG(dbgs() << "    using GEPs\n");
   3263     unsigned P = 0;
   3264     for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(),
   3265            SrcEnd = SrcGEP->idx_end(),
   3266            DstIdx = DstGEP->idx_begin();
   3267          SrcIdx != SrcEnd;
   3268          ++SrcIdx, ++DstIdx, ++P) {
   3269       Pair[P].Src = SE->getSCEV(*SrcIdx);
   3270       Pair[P].Dst = SE->getSCEV(*DstIdx);
   3271     }
   3272   }
   3273   else {
   3274     DEBUG(dbgs() << "    ignoring GEPs\n");
   3275     const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
   3276     const SCEV *DstSCEV = SE->getSCEV(DstPtr);
   3277     DEBUG(dbgs() << "    SrcSCEV = " << *SrcSCEV << "\n");
   3278     DEBUG(dbgs() << "    DstSCEV = " << *DstSCEV << "\n");
   3279     Pair[0].Src = SrcSCEV;
   3280     Pair[0].Dst = DstSCEV;
   3281   }
   3282 
   3283   for (unsigned P = 0; P < Pairs; ++P) {
   3284     Pair[P].Loops.resize(MaxLevels + 1);
   3285     Pair[P].GroupLoops.resize(MaxLevels + 1);
   3286     Pair[P].Group.resize(Pairs);
   3287     removeMatchingExtensions(&Pair[P]);
   3288     Pair[P].Classification =
   3289       classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),
   3290                    Pair[P].Dst, LI->getLoopFor(Dst->getParent()),
   3291                    Pair[P].Loops);
   3292     Pair[P].GroupLoops = Pair[P].Loops;
   3293     Pair[P].Group.set(P);
   3294     DEBUG(dbgs() << "    subscript " << P << "\n");
   3295     DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n");
   3296     DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n");
   3297     DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n");
   3298     DEBUG(dbgs() << "\tloops = ");
   3299     DEBUG(dumpSmallBitVector(Pair[P].Loops));
   3300   }
   3301 
   3302   SmallBitVector Separable(Pairs);
   3303   SmallBitVector Coupled(Pairs);
   3304 
   3305   // Partition subscripts into separable and minimally-coupled groups
   3306   // Algorithm in paper is algorithmically better;
   3307   // this may be faster in practice. Check someday.
   3308   //
   3309   // Here's an example of how it works. Consider this code:
   3310   //
   3311   //   for (i = ...) {
   3312   //     for (j = ...) {
   3313   //       for (k = ...) {
   3314   //         for (l = ...) {
   3315   //           for (m = ...) {
   3316   //             A[i][j][k][m] = ...;
   3317   //             ... = A[0][j][l][i + j];
   3318   //           }
   3319   //         }
   3320   //       }
   3321   //     }
   3322   //   }
   3323   //
   3324   // There are 4 subscripts here:
   3325   //    0 [i] and [0]
   3326   //    1 [j] and [j]
   3327   //    2 [k] and [l]
   3328   //    3 [m] and [i + j]
   3329   //
   3330   // We've already classified each subscript pair as ZIV, SIV, etc.,
   3331   // and collected all the loops mentioned by pair P in Pair[P].Loops.
   3332   // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops
   3333   // and set Pair[P].Group = {P}.
   3334   //
   3335   //      Src Dst    Classification Loops  GroupLoops Group
   3336   //    0 [i] [0]         SIV       {1}      {1}        {0}
   3337   //    1 [j] [j]         SIV       {2}      {2}        {1}
   3338   //    2 [k] [l]         RDIV      {3,4}    {3,4}      {2}
   3339   //    3 [m] [i + j]     MIV       {1,2,5}  {1,2,5}    {3}
   3340   //
   3341   // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ.
   3342   // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc.
   3343   //
   3344   // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty.
   3345   // Next, 0 and 2. Again, the intersection of their GroupLoops is empty.
   3346   // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty,
   3347   // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added
   3348   // to either Separable or Coupled).
   3349   //
   3350   // Next, we consider 1 and 2. The intersection of the GroupLoops is empty.
   3351   // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty,
   3352   // so Pair[3].Group = {0, 1, 3} and Done = false.
   3353   //
   3354   // Next, we compare 2 against 3. The intersection of the GroupLoops is empty.
   3355   // Since Done remains true, we add 2 to the set of Separable pairs.
   3356   //
   3357   // Finally, we consider 3. There's nothing to compare it with,
   3358   // so Done remains true and we add it to the Coupled set.
   3359   // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}.
   3360   //
   3361   // In the end, we've got 1 separable subscript and 1 coupled group.
   3362   for (unsigned SI = 0; SI < Pairs; ++SI) {
   3363     if (Pair[SI].Classification == Subscript::NonLinear) {
   3364       // ignore these, but collect loops for later
   3365       ++NonlinearSubscriptPairs;
   3366       collectCommonLoops(Pair[SI].Src,
   3367                          LI->getLoopFor(Src->getParent()),
   3368                          Pair[SI].Loops);
   3369       collectCommonLoops(Pair[SI].Dst,
   3370                          LI->getLoopFor(Dst->getParent()),
   3371                          Pair[SI].Loops);
   3372       Result.Consistent = false;
   3373     }
   3374     else if (Pair[SI].Classification == Subscript::ZIV) {
   3375       // always separable
   3376       Separable.set(SI);
   3377     }
   3378     else {
   3379       // SIV, RDIV, or MIV, so check for coupled group
   3380       bool Done = true;
   3381       for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) {
   3382         SmallBitVector Intersection = Pair[SI].GroupLoops;
   3383         Intersection &= Pair[SJ].GroupLoops;
   3384         if (Intersection.any()) {
   3385           // accumulate set of all the loops in group
   3386           Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
   3387           // accumulate set of all subscripts in group
   3388           Pair[SJ].Group |= Pair[SI].Group;
   3389           Done = false;
   3390         }
   3391       }
   3392       if (Done) {
   3393         if (Pair[SI].Group.count() == 1) {
   3394           Separable.set(SI);
   3395           ++SeparableSubscriptPairs;
   3396         }
   3397         else {
   3398           Coupled.set(SI);
   3399           ++CoupledSubscriptPairs;
   3400         }
   3401       }
   3402     }
   3403   }
   3404 
   3405   DEBUG(dbgs() << "    Separable = ");
   3406   DEBUG(dumpSmallBitVector(Separable));
   3407   DEBUG(dbgs() << "    Coupled = ");
   3408   DEBUG(dumpSmallBitVector(Coupled));
   3409 
   3410   Constraint NewConstraint;
   3411   NewConstraint.setAny(SE);
   3412 
   3413   // test separable subscripts
   3414   for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) {
   3415     DEBUG(dbgs() << "testing subscript " << SI);
   3416     switch (Pair[SI].Classification) {
   3417     case Subscript::ZIV:
   3418       DEBUG(dbgs() << ", ZIV\n");
   3419       if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result))
   3420         return NULL;
   3421       break;
   3422     case Subscript::SIV: {
   3423       DEBUG(dbgs() << ", SIV\n");
   3424       unsigned Level;
   3425       const SCEV *SplitIter = NULL;
   3426       if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level,
   3427                   Result, NewConstraint, SplitIter))
   3428         return NULL;
   3429       break;
   3430     }
   3431     case Subscript::RDIV:
   3432       DEBUG(dbgs() << ", RDIV\n");
   3433       if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result))
   3434         return NULL;
   3435       break;
   3436     case Subscript::MIV:
   3437       DEBUG(dbgs() << ", MIV\n");
   3438       if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result))
   3439         return NULL;
   3440       break;
   3441     default:
   3442       llvm_unreachable("subscript has unexpected classification");
   3443     }
   3444   }
   3445 
   3446   if (Coupled.count()) {
   3447     // test coupled subscript groups
   3448     DEBUG(dbgs() << "starting on coupled subscripts\n");
   3449     DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n");
   3450     SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
   3451     for (unsigned II = 0; II <= MaxLevels; ++II)
   3452       Constraints[II].setAny(SE);
   3453     for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) {
   3454       DEBUG(dbgs() << "testing subscript group " << SI << " { ");
   3455       SmallBitVector Group(Pair[SI].Group);
   3456       SmallBitVector Sivs(Pairs);
   3457       SmallBitVector Mivs(Pairs);
   3458       SmallBitVector ConstrainedLevels(MaxLevels + 1);
   3459       for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) {
   3460         DEBUG(dbgs() << SJ << " ");
   3461         if (Pair[SJ].Classification == Subscript::SIV)
   3462           Sivs.set(SJ);
   3463         else
   3464           Mivs.set(SJ);
   3465       }
   3466       DEBUG(dbgs() << "}\n");
   3467       while (Sivs.any()) {
   3468         bool Changed = false;
   3469         for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) {
   3470           DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n");
   3471           // SJ is an SIV subscript that's part of the current coupled group
   3472           unsigned Level;
   3473           const SCEV *SplitIter = NULL;
   3474           DEBUG(dbgs() << "SIV\n");
   3475           if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level,
   3476                       Result, NewConstraint, SplitIter))
   3477             return NULL;
   3478           ConstrainedLevels.set(Level);
   3479           if (intersectConstraints(&Constraints[Level], &NewConstraint)) {
   3480             if (Constraints[Level].isEmpty()) {
   3481               ++DeltaIndependence;
   3482               return NULL;
   3483             }
   3484             Changed = true;
   3485           }
   3486           Sivs.reset(SJ);
   3487         }
   3488         if (Changed) {
   3489           // propagate, possibly creating new SIVs and ZIVs
   3490           DEBUG(dbgs() << "    propagating\n");
   3491           DEBUG(dbgs() << "\tMivs = ");
   3492           DEBUG(dumpSmallBitVector(Mivs));
   3493           for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
   3494             // SJ is an MIV subscript that's part of the current coupled group
   3495             DEBUG(dbgs() << "\tSJ = " << SJ << "\n");
   3496             if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops,
   3497                           Constraints, Result.Consistent)) {
   3498               DEBUG(dbgs() << "\t    Changed\n");
   3499               ++DeltaPropagations;
   3500               Pair[SJ].Classification =
   3501                 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),
   3502                              Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),
   3503                              Pair[SJ].Loops);
   3504               switch (Pair[SJ].Classification) {
   3505               case Subscript::ZIV:
   3506                 DEBUG(dbgs() << "ZIV\n");
   3507                 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
   3508                   return NULL;
   3509                 Mivs.reset(SJ);
   3510                 break;
   3511               case Subscript::SIV:
   3512                 Sivs.set(SJ);
   3513                 Mivs.reset(SJ);
   3514                 break;
   3515               case Subscript::RDIV:
   3516               case Subscript::MIV:
   3517                 break;
   3518               default:
   3519                 llvm_unreachable("bad subscript classification");
   3520               }
   3521             }
   3522           }
   3523         }
   3524       }
   3525 
   3526       // test & propagate remaining RDIVs
   3527       for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
   3528         if (Pair[SJ].Classification == Subscript::RDIV) {
   3529           DEBUG(dbgs() << "RDIV test\n");
   3530           if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
   3531             return NULL;
   3532           // I don't yet understand how to propagate RDIV results
   3533           Mivs.reset(SJ);
   3534         }
   3535       }
   3536 
   3537       // test remaining MIVs
   3538       // This code is temporary.
   3539       // Better to somehow test all remaining subscripts simultaneously.
   3540       for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
   3541         if (Pair[SJ].Classification == Subscript::MIV) {
   3542           DEBUG(dbgs() << "MIV test\n");
   3543           if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result))
   3544             return NULL;
   3545         }
   3546         else
   3547           llvm_unreachable("expected only MIV subscripts at this point");
   3548       }
   3549 
   3550       // update Result.DV from constraint vector
   3551       DEBUG(dbgs() << "    updating\n");
   3552       for (int SJ = ConstrainedLevels.find_first();
   3553            SJ >= 0; SJ = ConstrainedLevels.find_next(SJ)) {
   3554         updateDirection(Result.DV[SJ - 1], Constraints[SJ]);
   3555         if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE)
   3556           return NULL;
   3557       }
   3558     }
   3559   }
   3560 
   3561   // Make sure the Scalar flags are set correctly.
   3562   SmallBitVector CompleteLoops(MaxLevels + 1);
   3563   for (unsigned SI = 0; SI < Pairs; ++SI)
   3564     CompleteLoops |= Pair[SI].Loops;
   3565   for (unsigned II = 1; II <= CommonLevels; ++II)
   3566     if (CompleteLoops[II])
   3567       Result.DV[II - 1].Scalar = false;
   3568 
   3569   if (PossiblyLoopIndependent) {
   3570     // Make sure the LoopIndependent flag is set correctly.
   3571     // All directions must include equal, otherwise no
   3572     // loop-independent dependence is possible.
   3573     for (unsigned II = 1; II <= CommonLevels; ++II) {
   3574       if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) {
   3575         Result.LoopIndependent = false;
   3576         break;
   3577       }
   3578     }
   3579   }
   3580   else {
   3581     // On the other hand, if all directions are equal and there's no
   3582     // loop-independent dependence possible, then no dependence exists.
   3583     bool AllEqual = true;
   3584     for (unsigned II = 1; II <= CommonLevels; ++II) {
   3585       if (Result.getDirection(II) != Dependence::DVEntry::EQ) {
   3586         AllEqual = false;
   3587         break;
   3588       }
   3589     }
   3590     if (AllEqual)
   3591       return NULL;
   3592   }
   3593 
   3594   FullDependence *Final = new FullDependence(Result);
   3595   Result.DV = NULL;
   3596   return Final;
   3597 }
   3598 
   3599 
   3600 
   3601 //===----------------------------------------------------------------------===//
   3602 // getSplitIteration -
   3603 // Rather than spend rarely-used space recording the splitting iteration
   3604 // during the Weak-Crossing SIV test, we re-compute it on demand.
   3605 // The re-computation is basically a repeat of the entire dependence test,
   3606 // though simplified since we know that the dependence exists.
   3607 // It's tedious, since we must go through all propagations, etc.
   3608 //
   3609 // Care is required to keep this code up to date with respect to the routine
   3610 // above, depends().
   3611 //
   3612 // Generally, the dependence analyzer will be used to build
   3613 // a dependence graph for a function (basically a map from instructions
   3614 // to dependences). Looking for cycles in the graph shows us loops
   3615 // that cannot be trivially vectorized/parallelized.
   3616 //
   3617 // We can try to improve the situation by examining all the dependences
   3618 // that make up the cycle, looking for ones we can break.
   3619 // Sometimes, peeling the first or last iteration of a loop will break
   3620 // dependences, and we've got flags for those possibilities.
   3621 // Sometimes, splitting a loop at some other iteration will do the trick,
   3622 // and we've got a flag for that case. Rather than waste the space to
   3623 // record the exact iteration (since we rarely know), we provide
   3624 // a method that calculates the iteration. It's a drag that it must work
   3625 // from scratch, but wonderful in that it's possible.
   3626 //
   3627 // Here's an example:
   3628 //
   3629 //    for (i = 0; i < 10; i++)
   3630 //        A[i] = ...
   3631 //        ... = A[11 - i]
   3632 //
   3633 // There's a loop-carried flow dependence from the store to the load,
   3634 // found by the weak-crossing SIV test. The dependence will have a flag,
   3635 // indicating that the dependence can be broken by splitting the loop.
   3636 // Calling getSplitIteration will return 5.
   3637 // Splitting the loop breaks the dependence, like so:
   3638 //
   3639 //    for (i = 0; i <= 5; i++)
   3640 //        A[i] = ...
   3641 //        ... = A[11 - i]
   3642 //    for (i = 6; i < 10; i++)
   3643 //        A[i] = ...
   3644 //        ... = A[11 - i]
   3645 //
   3646 // breaks the dependence and allows us to vectorize/parallelize
   3647 // both loops.
   3648 const  SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep,
   3649                                                    unsigned SplitLevel) {
   3650   assert(Dep && "expected a pointer to a Dependence");
   3651   assert(Dep->isSplitable(SplitLevel) &&
   3652          "Dep should be splitable at SplitLevel");
   3653   Instruction *Src = Dep->getSrc();
   3654   Instruction *Dst = Dep->getDst();
   3655   assert(Src->mayReadFromMemory() || Src->mayWriteToMemory());
   3656   assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory());
   3657   assert(isLoadOrStore(Src));
   3658   assert(isLoadOrStore(Dst));
   3659   Value *SrcPtr = getPointerOperand(Src);
   3660   Value *DstPtr = getPointerOperand(Dst);
   3661   assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) ==
   3662          AliasAnalysis::MustAlias);
   3663 
   3664   // establish loop nesting levels
   3665   establishNestingLevels(Src, Dst);
   3666 
   3667   FullDependence Result(Src, Dst, false, CommonLevels);
   3668 
   3669   // See if there are GEPs we can use.
   3670   bool UsefulGEP = false;
   3671   GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr);
   3672   GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr);
   3673   if (SrcGEP && DstGEP &&
   3674       SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) {
   3675     const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand());
   3676     const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand());
   3677     UsefulGEP =
   3678       isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
   3679       isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent()));
   3680   }
   3681   unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1;
   3682   SmallVector<Subscript, 4> Pair(Pairs);
   3683   if (UsefulGEP) {
   3684     unsigned P = 0;
   3685     for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(),
   3686            SrcEnd = SrcGEP->idx_end(),
   3687            DstIdx = DstGEP->idx_begin();
   3688          SrcIdx != SrcEnd;
   3689          ++SrcIdx, ++DstIdx, ++P) {
   3690       Pair[P].Src = SE->getSCEV(*SrcIdx);
   3691       Pair[P].Dst = SE->getSCEV(*DstIdx);
   3692     }
   3693   }
   3694   else {
   3695     const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
   3696     const SCEV *DstSCEV = SE->getSCEV(DstPtr);
   3697     Pair[0].Src = SrcSCEV;
   3698     Pair[0].Dst = DstSCEV;
   3699   }
   3700 
   3701   for (unsigned P = 0; P < Pairs; ++P) {
   3702     Pair[P].Loops.resize(MaxLevels + 1);
   3703     Pair[P].GroupLoops.resize(MaxLevels + 1);
   3704     Pair[P].Group.resize(Pairs);
   3705     removeMatchingExtensions(&Pair[P]);
   3706     Pair[P].Classification =
   3707       classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),
   3708                    Pair[P].Dst, LI->getLoopFor(Dst->getParent()),
   3709                    Pair[P].Loops);
   3710     Pair[P].GroupLoops = Pair[P].Loops;
   3711     Pair[P].Group.set(P);
   3712   }
   3713 
   3714   SmallBitVector Separable(Pairs);
   3715   SmallBitVector Coupled(Pairs);
   3716 
   3717   // partition subscripts into separable and minimally-coupled groups
   3718   for (unsigned SI = 0; SI < Pairs; ++SI) {
   3719     if (Pair[SI].Classification == Subscript::NonLinear) {
   3720       // ignore these, but collect loops for later
   3721       collectCommonLoops(Pair[SI].Src,
   3722                          LI->getLoopFor(Src->getParent()),
   3723                          Pair[SI].Loops);
   3724       collectCommonLoops(Pair[SI].Dst,
   3725                          LI->getLoopFor(Dst->getParent()),
   3726                          Pair[SI].Loops);
   3727       Result.Consistent = false;
   3728     }
   3729     else if (Pair[SI].Classification == Subscript::ZIV)
   3730       Separable.set(SI);
   3731     else {
   3732       // SIV, RDIV, or MIV, so check for coupled group
   3733       bool Done = true;
   3734       for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) {
   3735         SmallBitVector Intersection = Pair[SI].GroupLoops;
   3736         Intersection &= Pair[SJ].GroupLoops;
   3737         if (Intersection.any()) {
   3738           // accumulate set of all the loops in group
   3739           Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
   3740           // accumulate set of all subscripts in group
   3741           Pair[SJ].Group |= Pair[SI].Group;
   3742           Done = false;
   3743         }
   3744       }
   3745       if (Done) {
   3746         if (Pair[SI].Group.count() == 1)
   3747           Separable.set(SI);
   3748         else
   3749           Coupled.set(SI);
   3750       }
   3751     }
   3752   }
   3753 
   3754   Constraint NewConstraint;
   3755   NewConstraint.setAny(SE);
   3756 
   3757   // test separable subscripts
   3758   for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) {
   3759     switch (Pair[SI].Classification) {
   3760     case Subscript::SIV: {
   3761       unsigned Level;
   3762       const SCEV *SplitIter = NULL;
   3763       (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level,
   3764                      Result, NewConstraint, SplitIter);
   3765       if (Level == SplitLevel) {
   3766         assert(SplitIter != NULL);
   3767         return SplitIter;
   3768       }
   3769       break;
   3770     }
   3771     case Subscript::ZIV:
   3772     case Subscript::RDIV:
   3773     case Subscript::MIV:
   3774       break;
   3775     default:
   3776       llvm_unreachable("subscript has unexpected classification");
   3777     }
   3778   }
   3779 
   3780   if (Coupled.count()) {
   3781     // test coupled subscript groups
   3782     SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
   3783     for (unsigned II = 0; II <= MaxLevels; ++II)
   3784       Constraints[II].setAny(SE);
   3785     for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) {
   3786       SmallBitVector Group(Pair[SI].Group);
   3787       SmallBitVector Sivs(Pairs);
   3788       SmallBitVector Mivs(Pairs);
   3789       SmallBitVector ConstrainedLevels(MaxLevels + 1);
   3790       for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) {
   3791         if (Pair[SJ].Classification == Subscript::SIV)
   3792           Sivs.set(SJ);
   3793         else
   3794           Mivs.set(SJ);
   3795       }
   3796       while (Sivs.any()) {
   3797         bool Changed = false;
   3798         for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) {
   3799           // SJ is an SIV subscript that's part of the current coupled group
   3800           unsigned Level;
   3801           const SCEV *SplitIter = NULL;
   3802           (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level,
   3803                          Result, NewConstraint, SplitIter);
   3804           if (Level == SplitLevel && SplitIter)
   3805             return SplitIter;
   3806           ConstrainedLevels.set(Level);
   3807           if (intersectConstraints(&Constraints[Level], &NewConstraint))
   3808             Changed = true;
   3809           Sivs.reset(SJ);
   3810         }
   3811         if (Changed) {
   3812           // propagate, possibly creating new SIVs and ZIVs
   3813           for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
   3814             // SJ is an MIV subscript that's part of the current coupled group
   3815             if (propagate(Pair[SJ].Src, Pair[SJ].Dst,
   3816                           Pair[SJ].Loops, Constraints, Result.Consistent)) {
   3817               Pair[SJ].Classification =
   3818                 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),
   3819                              Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),
   3820                              Pair[SJ].Loops);
   3821               switch (Pair[SJ].Classification) {
   3822               case Subscript::ZIV:
   3823                 Mivs.reset(SJ);
   3824                 break;
   3825               case Subscript::SIV:
   3826                 Sivs.set(SJ);
   3827                 Mivs.reset(SJ);
   3828                 break;
   3829               case Subscript::RDIV:
   3830               case Subscript::MIV:
   3831                 break;
   3832               default:
   3833                 llvm_unreachable("bad subscript classification");
   3834               }
   3835             }
   3836           }
   3837         }
   3838       }
   3839     }
   3840   }
   3841   llvm_unreachable("somehow reached end of routine");
   3842   return NULL;
   3843 }
   3844