1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 11 // accesses. Currently, it is an (incomplete) implementation of the approach 12 // described in 13 // 14 // Practical Dependence Testing 15 // Goff, Kennedy, Tseng 16 // PLDI 1991 17 // 18 // There's a single entry point that analyzes the dependence between a pair 19 // of memory references in a function, returning either NULL, for no dependence, 20 // or a more-or-less detailed description of the dependence between them. 21 // 22 // Currently, the implementation cannot propagate constraints between 23 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 24 // Both of these are conservative weaknesses; 25 // that is, not a source of correctness problems. 26 // 27 // The implementation depends on the GEP instruction to 28 // differentiate subscripts. Since Clang linearizes subscripts 29 // for most arrays, we give up some precision (though the existing MIV tests 30 // will help). We trust that the GEP instruction will eventually be extended. 31 // In the meantime, we should explore Maslov's ideas about delinearization. 32 // 33 // We should pay some careful attention to the possibility of integer overflow 34 // in the implementation of the various tests. This could happen with Add, 35 // Subtract, or Multiply, with both APInt's and SCEV's. 36 // 37 // Some non-linear subscript pairs can be handled by the GCD test 38 // (and perhaps other tests). 39 // Should explore how often these things occur. 40 // 41 // Finally, it seems like certain test cases expose weaknesses in the SCEV 42 // simplification, especially in the handling of sign and zero extensions. 43 // It could be useful to spend time exploring these. 44 // 45 // Please note that this is work in progress and the interface is subject to 46 // change. 47 // 48 //===----------------------------------------------------------------------===// 49 // // 50 // In memory of Ken Kennedy, 1945 - 2007 // 51 // // 52 //===----------------------------------------------------------------------===// 53 54 #define DEBUG_TYPE "da" 55 56 #include "llvm/Analysis/DependenceAnalysis.h" 57 #include "llvm/ADT/Statistic.h" 58 #include "llvm/Analysis/AliasAnalysis.h" 59 #include "llvm/Analysis/LoopInfo.h" 60 #include "llvm/Analysis/ScalarEvolution.h" 61 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 62 #include "llvm/Analysis/ValueTracking.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/Support/Debug.h" 65 #include "llvm/Support/ErrorHandling.h" 66 #include "llvm/Support/InstIterator.h" 67 #include "llvm/Support/raw_ostream.h" 68 69 using namespace llvm; 70 71 //===----------------------------------------------------------------------===// 72 // statistics 73 74 STATISTIC(TotalArrayPairs, "Array pairs tested"); 75 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 76 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 77 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 78 STATISTIC(ZIVapplications, "ZIV applications"); 79 STATISTIC(ZIVindependence, "ZIV independence"); 80 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 81 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 82 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 83 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 84 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 85 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 86 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 87 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 88 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 89 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 90 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 91 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 92 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 93 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 94 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 95 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 96 STATISTIC(DeltaApplications, "Delta applications"); 97 STATISTIC(DeltaSuccesses, "Delta successes"); 98 STATISTIC(DeltaIndependence, "Delta independence"); 99 STATISTIC(DeltaPropagations, "Delta propagations"); 100 STATISTIC(GCDapplications, "GCD applications"); 101 STATISTIC(GCDsuccesses, "GCD successes"); 102 STATISTIC(GCDindependence, "GCD independence"); 103 STATISTIC(BanerjeeApplications, "Banerjee applications"); 104 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 105 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 106 107 //===----------------------------------------------------------------------===// 108 // basics 109 110 INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da", 111 "Dependence Analysis", true, true) 112 INITIALIZE_PASS_DEPENDENCY(LoopInfo) 113 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 114 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 115 INITIALIZE_PASS_END(DependenceAnalysis, "da", 116 "Dependence Analysis", true, true) 117 118 char DependenceAnalysis::ID = 0; 119 120 121 FunctionPass *llvm::createDependenceAnalysisPass() { 122 return new DependenceAnalysis(); 123 } 124 125 126 bool DependenceAnalysis::runOnFunction(Function &F) { 127 this->F = &F; 128 AA = &getAnalysis<AliasAnalysis>(); 129 SE = &getAnalysis<ScalarEvolution>(); 130 LI = &getAnalysis<LoopInfo>(); 131 return false; 132 } 133 134 135 void DependenceAnalysis::releaseMemory() { 136 } 137 138 139 void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 140 AU.setPreservesAll(); 141 AU.addRequiredTransitive<AliasAnalysis>(); 142 AU.addRequiredTransitive<ScalarEvolution>(); 143 AU.addRequiredTransitive<LoopInfo>(); 144 } 145 146 147 // Used to test the dependence analyzer. 148 // Looks through the function, noting loads and stores. 149 // Calls depends() on every possible pair and prints out the result. 150 // Ignores all other instructions. 151 static 152 void dumpExampleDependence(raw_ostream &OS, Function *F, 153 DependenceAnalysis *DA) { 154 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); 155 SrcI != SrcE; ++SrcI) { 156 if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) { 157 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 158 DstI != DstE; ++DstI) { 159 if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) { 160 OS << "da analyze - "; 161 if (Dependence *D = DA->depends(&*SrcI, &*DstI, true)) { 162 D->dump(OS); 163 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 164 if (D->isSplitable(Level)) { 165 OS << "da analyze - split level = " << Level; 166 OS << ", iteration = " << *DA->getSplitIteration(D, Level); 167 OS << "!\n"; 168 } 169 } 170 delete D; 171 } 172 else 173 OS << "none!\n"; 174 } 175 } 176 } 177 } 178 } 179 180 181 void DependenceAnalysis::print(raw_ostream &OS, const Module*) const { 182 dumpExampleDependence(OS, F, const_cast<DependenceAnalysis *>(this)); 183 } 184 185 //===----------------------------------------------------------------------===// 186 // Dependence methods 187 188 // Returns true if this is an input dependence. 189 bool Dependence::isInput() const { 190 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 191 } 192 193 194 // Returns true if this is an output dependence. 195 bool Dependence::isOutput() const { 196 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 197 } 198 199 200 // Returns true if this is an flow (aka true) dependence. 201 bool Dependence::isFlow() const { 202 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 203 } 204 205 206 // Returns true if this is an anti dependence. 207 bool Dependence::isAnti() const { 208 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 209 } 210 211 212 // Returns true if a particular level is scalar; that is, 213 // if no subscript in the source or destination mention the induction 214 // variable associated with the loop at this level. 215 // Leave this out of line, so it will serve as a virtual method anchor 216 bool Dependence::isScalar(unsigned level) const { 217 return false; 218 } 219 220 221 //===----------------------------------------------------------------------===// 222 // FullDependence methods 223 224 FullDependence::FullDependence(Instruction *Source, 225 Instruction *Destination, 226 bool PossiblyLoopIndependent, 227 unsigned CommonLevels) : 228 Dependence(Source, Destination), 229 Levels(CommonLevels), 230 LoopIndependent(PossiblyLoopIndependent) { 231 Consistent = true; 232 DV = CommonLevels ? new DVEntry[CommonLevels] : NULL; 233 } 234 235 // The rest are simple getters that hide the implementation. 236 237 // getDirection - Returns the direction associated with a particular level. 238 unsigned FullDependence::getDirection(unsigned Level) const { 239 assert(0 < Level && Level <= Levels && "Level out of range"); 240 return DV[Level - 1].Direction; 241 } 242 243 244 // Returns the distance (or NULL) associated with a particular level. 245 const SCEV *FullDependence::getDistance(unsigned Level) const { 246 assert(0 < Level && Level <= Levels && "Level out of range"); 247 return DV[Level - 1].Distance; 248 } 249 250 251 // Returns true if a particular level is scalar; that is, 252 // if no subscript in the source or destination mention the induction 253 // variable associated with the loop at this level. 254 bool FullDependence::isScalar(unsigned Level) const { 255 assert(0 < Level && Level <= Levels && "Level out of range"); 256 return DV[Level - 1].Scalar; 257 } 258 259 260 // Returns true if peeling the first iteration from this loop 261 // will break this dependence. 262 bool FullDependence::isPeelFirst(unsigned Level) const { 263 assert(0 < Level && Level <= Levels && "Level out of range"); 264 return DV[Level - 1].PeelFirst; 265 } 266 267 268 // Returns true if peeling the last iteration from this loop 269 // will break this dependence. 270 bool FullDependence::isPeelLast(unsigned Level) const { 271 assert(0 < Level && Level <= Levels && "Level out of range"); 272 return DV[Level - 1].PeelLast; 273 } 274 275 276 // Returns true if splitting this loop will break the dependence. 277 bool FullDependence::isSplitable(unsigned Level) const { 278 assert(0 < Level && Level <= Levels && "Level out of range"); 279 return DV[Level - 1].Splitable; 280 } 281 282 283 //===----------------------------------------------------------------------===// 284 // DependenceAnalysis::Constraint methods 285 286 // If constraint is a point <X, Y>, returns X. 287 // Otherwise assert. 288 const SCEV *DependenceAnalysis::Constraint::getX() const { 289 assert(Kind == Point && "Kind should be Point"); 290 return A; 291 } 292 293 294 // If constraint is a point <X, Y>, returns Y. 295 // Otherwise assert. 296 const SCEV *DependenceAnalysis::Constraint::getY() const { 297 assert(Kind == Point && "Kind should be Point"); 298 return B; 299 } 300 301 302 // If constraint is a line AX + BY = C, returns A. 303 // Otherwise assert. 304 const SCEV *DependenceAnalysis::Constraint::getA() const { 305 assert((Kind == Line || Kind == Distance) && 306 "Kind should be Line (or Distance)"); 307 return A; 308 } 309 310 311 // If constraint is a line AX + BY = C, returns B. 312 // Otherwise assert. 313 const SCEV *DependenceAnalysis::Constraint::getB() const { 314 assert((Kind == Line || Kind == Distance) && 315 "Kind should be Line (or Distance)"); 316 return B; 317 } 318 319 320 // If constraint is a line AX + BY = C, returns C. 321 // Otherwise assert. 322 const SCEV *DependenceAnalysis::Constraint::getC() const { 323 assert((Kind == Line || Kind == Distance) && 324 "Kind should be Line (or Distance)"); 325 return C; 326 } 327 328 329 // If constraint is a distance, returns D. 330 // Otherwise assert. 331 const SCEV *DependenceAnalysis::Constraint::getD() const { 332 assert(Kind == Distance && "Kind should be Distance"); 333 return SE->getNegativeSCEV(C); 334 } 335 336 337 // Returns the loop associated with this constraint. 338 const Loop *DependenceAnalysis::Constraint::getAssociatedLoop() const { 339 assert((Kind == Distance || Kind == Line || Kind == Point) && 340 "Kind should be Distance, Line, or Point"); 341 return AssociatedLoop; 342 } 343 344 345 void DependenceAnalysis::Constraint::setPoint(const SCEV *X, 346 const SCEV *Y, 347 const Loop *CurLoop) { 348 Kind = Point; 349 A = X; 350 B = Y; 351 AssociatedLoop = CurLoop; 352 } 353 354 355 void DependenceAnalysis::Constraint::setLine(const SCEV *AA, 356 const SCEV *BB, 357 const SCEV *CC, 358 const Loop *CurLoop) { 359 Kind = Line; 360 A = AA; 361 B = BB; 362 C = CC; 363 AssociatedLoop = CurLoop; 364 } 365 366 367 void DependenceAnalysis::Constraint::setDistance(const SCEV *D, 368 const Loop *CurLoop) { 369 Kind = Distance; 370 A = SE->getConstant(D->getType(), 1); 371 B = SE->getNegativeSCEV(A); 372 C = SE->getNegativeSCEV(D); 373 AssociatedLoop = CurLoop; 374 } 375 376 377 void DependenceAnalysis::Constraint::setEmpty() { 378 Kind = Empty; 379 } 380 381 382 void DependenceAnalysis::Constraint::setAny(ScalarEvolution *NewSE) { 383 SE = NewSE; 384 Kind = Any; 385 } 386 387 388 // For debugging purposes. Dumps the constraint out to OS. 389 void DependenceAnalysis::Constraint::dump(raw_ostream &OS) const { 390 if (isEmpty()) 391 OS << " Empty\n"; 392 else if (isAny()) 393 OS << " Any\n"; 394 else if (isPoint()) 395 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 396 else if (isDistance()) 397 OS << " Distance is " << *getD() << 398 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 399 else if (isLine()) 400 OS << " Line is " << *getA() << "*X + " << 401 *getB() << "*Y = " << *getC() << "\n"; 402 else 403 llvm_unreachable("unknown constraint type in Constraint::dump"); 404 } 405 406 407 // Updates X with the intersection 408 // of the Constraints X and Y. Returns true if X has changed. 409 // Corresponds to Figure 4 from the paper 410 // 411 // Practical Dependence Testing 412 // Goff, Kennedy, Tseng 413 // PLDI 1991 414 bool DependenceAnalysis::intersectConstraints(Constraint *X, 415 const Constraint *Y) { 416 ++DeltaApplications; 417 DEBUG(dbgs() << "\tintersect constraints\n"); 418 DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 419 DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 420 assert(!Y->isPoint() && "Y must not be a Point"); 421 if (X->isAny()) { 422 if (Y->isAny()) 423 return false; 424 *X = *Y; 425 return true; 426 } 427 if (X->isEmpty()) 428 return false; 429 if (Y->isEmpty()) { 430 X->setEmpty(); 431 return true; 432 } 433 434 if (X->isDistance() && Y->isDistance()) { 435 DEBUG(dbgs() << "\t intersect 2 distances\n"); 436 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 437 return false; 438 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 439 X->setEmpty(); 440 ++DeltaSuccesses; 441 return true; 442 } 443 // Hmmm, interesting situation. 444 // I guess if either is constant, keep it and ignore the other. 445 if (isa<SCEVConstant>(Y->getD())) { 446 *X = *Y; 447 return true; 448 } 449 return false; 450 } 451 452 // At this point, the pseudo-code in Figure 4 of the paper 453 // checks if (X->isPoint() && Y->isPoint()). 454 // This case can't occur in our implementation, 455 // since a Point can only arise as the result of intersecting 456 // two Line constraints, and the right-hand value, Y, is never 457 // the result of an intersection. 458 assert(!(X->isPoint() && Y->isPoint()) && 459 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 460 461 if (X->isLine() && Y->isLine()) { 462 DEBUG(dbgs() << "\t intersect 2 lines\n"); 463 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 464 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 465 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 466 // slopes are equal, so lines are parallel 467 DEBUG(dbgs() << "\t\tsame slope\n"); 468 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 469 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 470 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 471 return false; 472 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 473 X->setEmpty(); 474 ++DeltaSuccesses; 475 return true; 476 } 477 return false; 478 } 479 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 480 // slopes differ, so lines intersect 481 DEBUG(dbgs() << "\t\tdifferent slopes\n"); 482 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 483 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 484 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 485 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 486 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 487 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 488 const SCEVConstant *C1A2_C2A1 = 489 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 490 const SCEVConstant *C1B2_C2B1 = 491 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 492 const SCEVConstant *A1B2_A2B1 = 493 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 494 const SCEVConstant *A2B1_A1B2 = 495 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 496 if (!C1B2_C2B1 || !C1A2_C2A1 || 497 !A1B2_A2B1 || !A2B1_A1B2) 498 return false; 499 APInt Xtop = C1B2_C2B1->getValue()->getValue(); 500 APInt Xbot = A1B2_A2B1->getValue()->getValue(); 501 APInt Ytop = C1A2_C2A1->getValue()->getValue(); 502 APInt Ybot = A2B1_A1B2->getValue()->getValue(); 503 DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 504 DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 505 DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 506 DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 507 APInt Xq = Xtop; // these need to be initialized, even 508 APInt Xr = Xtop; // though they're just going to be overwritten 509 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 510 APInt Yq = Ytop; 511 APInt Yr = Ytop; 512 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 513 if (Xr != 0 || Yr != 0) { 514 X->setEmpty(); 515 ++DeltaSuccesses; 516 return true; 517 } 518 DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 519 if (Xq.slt(0) || Yq.slt(0)) { 520 X->setEmpty(); 521 ++DeltaSuccesses; 522 return true; 523 } 524 if (const SCEVConstant *CUB = 525 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 526 APInt UpperBound = CUB->getValue()->getValue(); 527 DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 528 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 529 X->setEmpty(); 530 ++DeltaSuccesses; 531 return true; 532 } 533 } 534 X->setPoint(SE->getConstant(Xq), 535 SE->getConstant(Yq), 536 X->getAssociatedLoop()); 537 ++DeltaSuccesses; 538 return true; 539 } 540 return false; 541 } 542 543 // if (X->isLine() && Y->isPoint()) This case can't occur. 544 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 545 546 if (X->isPoint() && Y->isLine()) { 547 DEBUG(dbgs() << "\t intersect Point and Line\n"); 548 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 549 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 550 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 551 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 552 return false; 553 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 554 X->setEmpty(); 555 ++DeltaSuccesses; 556 return true; 557 } 558 return false; 559 } 560 561 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 562 return false; 563 } 564 565 566 //===----------------------------------------------------------------------===// 567 // DependenceAnalysis methods 568 569 // For debugging purposes. Dumps a dependence to OS. 570 void Dependence::dump(raw_ostream &OS) const { 571 bool Splitable = false; 572 if (isConfused()) 573 OS << "confused"; 574 else { 575 if (isConsistent()) 576 OS << "consistent "; 577 if (isFlow()) 578 OS << "flow"; 579 else if (isOutput()) 580 OS << "output"; 581 else if (isAnti()) 582 OS << "anti"; 583 else if (isInput()) 584 OS << "input"; 585 unsigned Levels = getLevels(); 586 OS << " ["; 587 for (unsigned II = 1; II <= Levels; ++II) { 588 if (isSplitable(II)) 589 Splitable = true; 590 if (isPeelFirst(II)) 591 OS << 'p'; 592 const SCEV *Distance = getDistance(II); 593 if (Distance) 594 OS << *Distance; 595 else if (isScalar(II)) 596 OS << "S"; 597 else { 598 unsigned Direction = getDirection(II); 599 if (Direction == DVEntry::ALL) 600 OS << "*"; 601 else { 602 if (Direction & DVEntry::LT) 603 OS << "<"; 604 if (Direction & DVEntry::EQ) 605 OS << "="; 606 if (Direction & DVEntry::GT) 607 OS << ">"; 608 } 609 } 610 if (isPeelLast(II)) 611 OS << 'p'; 612 if (II < Levels) 613 OS << " "; 614 } 615 if (isLoopIndependent()) 616 OS << "|<"; 617 OS << "]"; 618 if (Splitable) 619 OS << " splitable"; 620 } 621 OS << "!\n"; 622 } 623 624 625 626 static 627 AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA, 628 const Value *A, 629 const Value *B) { 630 const Value *AObj = GetUnderlyingObject(A); 631 const Value *BObj = GetUnderlyingObject(B); 632 return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()), 633 BObj, AA->getTypeStoreSize(BObj->getType())); 634 } 635 636 637 // Returns true if the load or store can be analyzed. Atomic and volatile 638 // operations have properties which this analysis does not understand. 639 static 640 bool isLoadOrStore(const Instruction *I) { 641 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 642 return LI->isUnordered(); 643 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 644 return SI->isUnordered(); 645 return false; 646 } 647 648 649 static 650 Value *getPointerOperand(Instruction *I) { 651 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 652 return LI->getPointerOperand(); 653 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 654 return SI->getPointerOperand(); 655 llvm_unreachable("Value is not load or store instruction"); 656 return 0; 657 } 658 659 660 // Examines the loop nesting of the Src and Dst 661 // instructions and establishes their shared loops. Sets the variables 662 // CommonLevels, SrcLevels, and MaxLevels. 663 // The source and destination instructions needn't be contained in the same 664 // loop. The routine establishNestingLevels finds the level of most deeply 665 // nested loop that contains them both, CommonLevels. An instruction that's 666 // not contained in a loop is at level = 0. MaxLevels is equal to the level 667 // of the source plus the level of the destination, minus CommonLevels. 668 // This lets us allocate vectors MaxLevels in length, with room for every 669 // distinct loop referenced in both the source and destination subscripts. 670 // The variable SrcLevels is the nesting depth of the source instruction. 671 // It's used to help calculate distinct loops referenced by the destination. 672 // Here's the map from loops to levels: 673 // 0 - unused 674 // 1 - outermost common loop 675 // ... - other common loops 676 // CommonLevels - innermost common loop 677 // ... - loops containing Src but not Dst 678 // SrcLevels - innermost loop containing Src but not Dst 679 // ... - loops containing Dst but not Src 680 // MaxLevels - innermost loops containing Dst but not Src 681 // Consider the follow code fragment: 682 // for (a = ...) { 683 // for (b = ...) { 684 // for (c = ...) { 685 // for (d = ...) { 686 // A[] = ...; 687 // } 688 // } 689 // for (e = ...) { 690 // for (f = ...) { 691 // for (g = ...) { 692 // ... = A[]; 693 // } 694 // } 695 // } 696 // } 697 // } 698 // If we're looking at the possibility of a dependence between the store 699 // to A (the Src) and the load from A (the Dst), we'll note that they 700 // have 2 loops in common, so CommonLevels will equal 2 and the direction 701 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 702 // A map from loop names to loop numbers would look like 703 // a - 1 704 // b - 2 = CommonLevels 705 // c - 3 706 // d - 4 = SrcLevels 707 // e - 5 708 // f - 6 709 // g - 7 = MaxLevels 710 void DependenceAnalysis::establishNestingLevels(const Instruction *Src, 711 const Instruction *Dst) { 712 const BasicBlock *SrcBlock = Src->getParent(); 713 const BasicBlock *DstBlock = Dst->getParent(); 714 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 715 unsigned DstLevel = LI->getLoopDepth(DstBlock); 716 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 717 const Loop *DstLoop = LI->getLoopFor(DstBlock); 718 SrcLevels = SrcLevel; 719 MaxLevels = SrcLevel + DstLevel; 720 while (SrcLevel > DstLevel) { 721 SrcLoop = SrcLoop->getParentLoop(); 722 SrcLevel--; 723 } 724 while (DstLevel > SrcLevel) { 725 DstLoop = DstLoop->getParentLoop(); 726 DstLevel--; 727 } 728 while (SrcLoop != DstLoop) { 729 SrcLoop = SrcLoop->getParentLoop(); 730 DstLoop = DstLoop->getParentLoop(); 731 SrcLevel--; 732 } 733 CommonLevels = SrcLevel; 734 MaxLevels -= CommonLevels; 735 } 736 737 738 // Given one of the loops containing the source, return 739 // its level index in our numbering scheme. 740 unsigned DependenceAnalysis::mapSrcLoop(const Loop *SrcLoop) const { 741 return SrcLoop->getLoopDepth(); 742 } 743 744 745 // Given one of the loops containing the destination, 746 // return its level index in our numbering scheme. 747 unsigned DependenceAnalysis::mapDstLoop(const Loop *DstLoop) const { 748 unsigned D = DstLoop->getLoopDepth(); 749 if (D > CommonLevels) 750 return D - CommonLevels + SrcLevels; 751 else 752 return D; 753 } 754 755 756 // Returns true if Expression is loop invariant in LoopNest. 757 bool DependenceAnalysis::isLoopInvariant(const SCEV *Expression, 758 const Loop *LoopNest) const { 759 if (!LoopNest) 760 return true; 761 return SE->isLoopInvariant(Expression, LoopNest) && 762 isLoopInvariant(Expression, LoopNest->getParentLoop()); 763 } 764 765 766 767 // Finds the set of loops from the LoopNest that 768 // have a level <= CommonLevels and are referred to by the SCEV Expression. 769 void DependenceAnalysis::collectCommonLoops(const SCEV *Expression, 770 const Loop *LoopNest, 771 SmallBitVector &Loops) const { 772 while (LoopNest) { 773 unsigned Level = LoopNest->getLoopDepth(); 774 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 775 Loops.set(Level); 776 LoopNest = LoopNest->getParentLoop(); 777 } 778 } 779 780 781 // removeMatchingExtensions - Examines a subscript pair. 782 // If the source and destination are identically sign (or zero) 783 // extended, it strips off the extension in an effect to simplify 784 // the actual analysis. 785 void DependenceAnalysis::removeMatchingExtensions(Subscript *Pair) { 786 const SCEV *Src = Pair->Src; 787 const SCEV *Dst = Pair->Dst; 788 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 789 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 790 const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src); 791 const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst); 792 if (SrcCast->getType() == DstCast->getType()) { 793 Pair->Src = SrcCast->getOperand(); 794 Pair->Dst = DstCast->getOperand(); 795 } 796 } 797 } 798 799 800 // Examine the scev and return true iff it's linear. 801 // Collect any loops mentioned in the set of "Loops". 802 bool DependenceAnalysis::checkSrcSubscript(const SCEV *Src, 803 const Loop *LoopNest, 804 SmallBitVector &Loops) { 805 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src); 806 if (!AddRec) 807 return isLoopInvariant(Src, LoopNest); 808 const SCEV *Start = AddRec->getStart(); 809 const SCEV *Step = AddRec->getStepRecurrence(*SE); 810 if (!isLoopInvariant(Step, LoopNest)) 811 return false; 812 Loops.set(mapSrcLoop(AddRec->getLoop())); 813 return checkSrcSubscript(Start, LoopNest, Loops); 814 } 815 816 817 818 // Examine the scev and return true iff it's linear. 819 // Collect any loops mentioned in the set of "Loops". 820 bool DependenceAnalysis::checkDstSubscript(const SCEV *Dst, 821 const Loop *LoopNest, 822 SmallBitVector &Loops) { 823 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst); 824 if (!AddRec) 825 return isLoopInvariant(Dst, LoopNest); 826 const SCEV *Start = AddRec->getStart(); 827 const SCEV *Step = AddRec->getStepRecurrence(*SE); 828 if (!isLoopInvariant(Step, LoopNest)) 829 return false; 830 Loops.set(mapDstLoop(AddRec->getLoop())); 831 return checkDstSubscript(Start, LoopNest, Loops); 832 } 833 834 835 // Examines the subscript pair (the Src and Dst SCEVs) 836 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 837 // Collects the associated loops in a set. 838 DependenceAnalysis::Subscript::ClassificationKind 839 DependenceAnalysis::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 840 const SCEV *Dst, const Loop *DstLoopNest, 841 SmallBitVector &Loops) { 842 SmallBitVector SrcLoops(MaxLevels + 1); 843 SmallBitVector DstLoops(MaxLevels + 1); 844 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 845 return Subscript::NonLinear; 846 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 847 return Subscript::NonLinear; 848 Loops = SrcLoops; 849 Loops |= DstLoops; 850 unsigned N = Loops.count(); 851 if (N == 0) 852 return Subscript::ZIV; 853 if (N == 1) 854 return Subscript::SIV; 855 if (N == 2 && (SrcLoops.count() == 0 || 856 DstLoops.count() == 0 || 857 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 858 return Subscript::RDIV; 859 return Subscript::MIV; 860 } 861 862 863 // A wrapper around SCEV::isKnownPredicate. 864 // Looks for cases where we're interested in comparing for equality. 865 // If both X and Y have been identically sign or zero extended, 866 // it strips off the (confusing) extensions before invoking 867 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 868 // will be similarly updated. 869 // 870 // If SCEV::isKnownPredicate can't prove the predicate, 871 // we try simple subtraction, which seems to help in some cases 872 // involving symbolics. 873 bool DependenceAnalysis::isKnownPredicate(ICmpInst::Predicate Pred, 874 const SCEV *X, 875 const SCEV *Y) const { 876 if (Pred == CmpInst::ICMP_EQ || 877 Pred == CmpInst::ICMP_NE) { 878 if ((isa<SCEVSignExtendExpr>(X) && 879 isa<SCEVSignExtendExpr>(Y)) || 880 (isa<SCEVZeroExtendExpr>(X) && 881 isa<SCEVZeroExtendExpr>(Y))) { 882 const SCEVCastExpr *CX = cast<SCEVCastExpr>(X); 883 const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y); 884 const SCEV *Xop = CX->getOperand(); 885 const SCEV *Yop = CY->getOperand(); 886 if (Xop->getType() == Yop->getType()) { 887 X = Xop; 888 Y = Yop; 889 } 890 } 891 } 892 if (SE->isKnownPredicate(Pred, X, Y)) 893 return true; 894 // If SE->isKnownPredicate can't prove the condition, 895 // we try the brute-force approach of subtracting 896 // and testing the difference. 897 // By testing with SE->isKnownPredicate first, we avoid 898 // the possibility of overflow when the arguments are constants. 899 const SCEV *Delta = SE->getMinusSCEV(X, Y); 900 switch (Pred) { 901 case CmpInst::ICMP_EQ: 902 return Delta->isZero(); 903 case CmpInst::ICMP_NE: 904 return SE->isKnownNonZero(Delta); 905 case CmpInst::ICMP_SGE: 906 return SE->isKnownNonNegative(Delta); 907 case CmpInst::ICMP_SLE: 908 return SE->isKnownNonPositive(Delta); 909 case CmpInst::ICMP_SGT: 910 return SE->isKnownPositive(Delta); 911 case CmpInst::ICMP_SLT: 912 return SE->isKnownNegative(Delta); 913 default: 914 llvm_unreachable("unexpected predicate in isKnownPredicate"); 915 } 916 } 917 918 919 // All subscripts are all the same type. 920 // Loop bound may be smaller (e.g., a char). 921 // Should zero extend loop bound, since it's always >= 0. 922 // This routine collects upper bound and extends if needed. 923 // Return null if no bound available. 924 const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L, 925 Type *T) const { 926 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 927 const SCEV *UB = SE->getBackedgeTakenCount(L); 928 return SE->getNoopOrZeroExtend(UB, T); 929 } 930 return NULL; 931 } 932 933 934 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 935 // If the cast fails, returns NULL. 936 const SCEVConstant *DependenceAnalysis::collectConstantUpperBound(const Loop *L, 937 Type *T 938 ) const { 939 if (const SCEV *UB = collectUpperBound(L, T)) 940 return dyn_cast<SCEVConstant>(UB); 941 return NULL; 942 } 943 944 945 // testZIV - 946 // When we have a pair of subscripts of the form [c1] and [c2], 947 // where c1 and c2 are both loop invariant, we attack it using 948 // the ZIV test. Basically, we test by comparing the two values, 949 // but there are actually three possible results: 950 // 1) the values are equal, so there's a dependence 951 // 2) the values are different, so there's no dependence 952 // 3) the values might be equal, so we have to assume a dependence. 953 // 954 // Return true if dependence disproved. 955 bool DependenceAnalysis::testZIV(const SCEV *Src, 956 const SCEV *Dst, 957 FullDependence &Result) const { 958 DEBUG(dbgs() << " src = " << *Src << "\n"); 959 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 960 ++ZIVapplications; 961 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 962 DEBUG(dbgs() << " provably dependent\n"); 963 return false; // provably dependent 964 } 965 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 966 DEBUG(dbgs() << " provably independent\n"); 967 ++ZIVindependence; 968 return true; // provably independent 969 } 970 DEBUG(dbgs() << " possibly dependent\n"); 971 Result.Consistent = false; 972 return false; // possibly dependent 973 } 974 975 976 // strongSIVtest - 977 // From the paper, Practical Dependence Testing, Section 4.2.1 978 // 979 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 980 // where i is an induction variable, c1 and c2 are loop invariant, 981 // and a is a constant, we can solve it exactly using the Strong SIV test. 982 // 983 // Can prove independence. Failing that, can compute distance (and direction). 984 // In the presence of symbolic terms, we can sometimes make progress. 985 // 986 // If there's a dependence, 987 // 988 // c1 + a*i = c2 + a*i' 989 // 990 // The dependence distance is 991 // 992 // d = i' - i = (c1 - c2)/a 993 // 994 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 995 // loop's upper bound. If a dependence exists, the dependence direction is 996 // defined as 997 // 998 // { < if d > 0 999 // direction = { = if d = 0 1000 // { > if d < 0 1001 // 1002 // Return true if dependence disproved. 1003 bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff, 1004 const SCEV *SrcConst, 1005 const SCEV *DstConst, 1006 const Loop *CurLoop, 1007 unsigned Level, 1008 FullDependence &Result, 1009 Constraint &NewConstraint) const { 1010 DEBUG(dbgs() << "\tStrong SIV test\n"); 1011 DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1012 DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1013 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1014 DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1015 DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1016 DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1017 ++StrongSIVapplications; 1018 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1019 Level--; 1020 1021 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1022 DEBUG(dbgs() << "\t Delta = " << *Delta); 1023 DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1024 1025 // check that |Delta| < iteration count 1026 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1027 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1028 DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1029 const SCEV *AbsDelta = 1030 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1031 const SCEV *AbsCoeff = 1032 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1033 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1034 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1035 // Distance greater than trip count - no dependence 1036 ++StrongSIVindependence; 1037 ++StrongSIVsuccesses; 1038 return true; 1039 } 1040 } 1041 1042 // Can we compute distance? 1043 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1044 APInt ConstDelta = cast<SCEVConstant>(Delta)->getValue()->getValue(); 1045 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getValue()->getValue(); 1046 APInt Distance = ConstDelta; // these need to be initialized 1047 APInt Remainder = ConstDelta; 1048 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1049 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1050 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1051 // Make sure Coeff divides Delta exactly 1052 if (Remainder != 0) { 1053 // Coeff doesn't divide Distance, no dependence 1054 ++StrongSIVindependence; 1055 ++StrongSIVsuccesses; 1056 return true; 1057 } 1058 Result.DV[Level].Distance = SE->getConstant(Distance); 1059 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1060 if (Distance.sgt(0)) 1061 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1062 else if (Distance.slt(0)) 1063 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1064 else 1065 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1066 ++StrongSIVsuccesses; 1067 } 1068 else if (Delta->isZero()) { 1069 // since 0/X == 0 1070 Result.DV[Level].Distance = Delta; 1071 NewConstraint.setDistance(Delta, CurLoop); 1072 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1073 ++StrongSIVsuccesses; 1074 } 1075 else { 1076 if (Coeff->isOne()) { 1077 DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1078 Result.DV[Level].Distance = Delta; // since X/1 == X 1079 NewConstraint.setDistance(Delta, CurLoop); 1080 } 1081 else { 1082 Result.Consistent = false; 1083 NewConstraint.setLine(Coeff, 1084 SE->getNegativeSCEV(Coeff), 1085 SE->getNegativeSCEV(Delta), CurLoop); 1086 } 1087 1088 // maybe we can get a useful direction 1089 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1090 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1091 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1092 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1093 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1094 // The double negatives above are confusing. 1095 // It helps to read !SE->isKnownNonZero(Delta) 1096 // as "Delta might be Zero" 1097 unsigned NewDirection = Dependence::DVEntry::NONE; 1098 if ((DeltaMaybePositive && CoeffMaybePositive) || 1099 (DeltaMaybeNegative && CoeffMaybeNegative)) 1100 NewDirection = Dependence::DVEntry::LT; 1101 if (DeltaMaybeZero) 1102 NewDirection |= Dependence::DVEntry::EQ; 1103 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1104 (DeltaMaybePositive && CoeffMaybeNegative)) 1105 NewDirection |= Dependence::DVEntry::GT; 1106 if (NewDirection < Result.DV[Level].Direction) 1107 ++StrongSIVsuccesses; 1108 Result.DV[Level].Direction &= NewDirection; 1109 } 1110 return false; 1111 } 1112 1113 1114 // weakCrossingSIVtest - 1115 // From the paper, Practical Dependence Testing, Section 4.2.2 1116 // 1117 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1118 // where i is an induction variable, c1 and c2 are loop invariant, 1119 // and a is a constant, we can solve it exactly using the 1120 // Weak-Crossing SIV test. 1121 // 1122 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1123 // the two lines, where i = i', yielding 1124 // 1125 // c1 + a*i = c2 - a*i 1126 // 2a*i = c2 - c1 1127 // i = (c2 - c1)/2a 1128 // 1129 // If i < 0, there is no dependence. 1130 // If i > upperbound, there is no dependence. 1131 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1132 // If i = upperbound, there's a dependence with distance = 0. 1133 // If i is integral, there's a dependence (all directions). 1134 // If the non-integer part = 1/2, there's a dependence (<> directions). 1135 // Otherwise, there's no dependence. 1136 // 1137 // Can prove independence. Failing that, 1138 // can sometimes refine the directions. 1139 // Can determine iteration for splitting. 1140 // 1141 // Return true if dependence disproved. 1142 bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff, 1143 const SCEV *SrcConst, 1144 const SCEV *DstConst, 1145 const Loop *CurLoop, 1146 unsigned Level, 1147 FullDependence &Result, 1148 Constraint &NewConstraint, 1149 const SCEV *&SplitIter) const { 1150 DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1151 DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1152 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1153 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1154 ++WeakCrossingSIVapplications; 1155 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1156 Level--; 1157 Result.Consistent = false; 1158 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1159 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1160 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1161 if (Delta->isZero()) { 1162 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1163 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1164 ++WeakCrossingSIVsuccesses; 1165 if (!Result.DV[Level].Direction) { 1166 ++WeakCrossingSIVindependence; 1167 return true; 1168 } 1169 Result.DV[Level].Distance = Delta; // = 0 1170 return false; 1171 } 1172 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1173 if (!ConstCoeff) 1174 return false; 1175 1176 Result.DV[Level].Splitable = true; 1177 if (SE->isKnownNegative(ConstCoeff)) { 1178 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1179 assert(ConstCoeff && 1180 "dynamic cast of negative of ConstCoeff should yield constant"); 1181 Delta = SE->getNegativeSCEV(Delta); 1182 } 1183 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1184 1185 // compute SplitIter for use by DependenceAnalysis::getSplitIteration() 1186 SplitIter = 1187 SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0), 1188 Delta), 1189 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), 1190 ConstCoeff)); 1191 DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1192 1193 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1194 if (!ConstDelta) 1195 return false; 1196 1197 // We're certain that ConstCoeff > 0; therefore, 1198 // if Delta < 0, then no dependence. 1199 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1200 DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1201 if (SE->isKnownNegative(Delta)) { 1202 // No dependence, Delta < 0 1203 ++WeakCrossingSIVindependence; 1204 ++WeakCrossingSIVsuccesses; 1205 return true; 1206 } 1207 1208 // We're certain that Delta > 0 and ConstCoeff > 0. 1209 // Check Delta/(2*ConstCoeff) against upper loop bound 1210 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1211 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1212 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1213 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1214 ConstantTwo); 1215 DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1216 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1217 // Delta too big, no dependence 1218 ++WeakCrossingSIVindependence; 1219 ++WeakCrossingSIVsuccesses; 1220 return true; 1221 } 1222 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1223 // i = i' = UB 1224 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1225 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1226 ++WeakCrossingSIVsuccesses; 1227 if (!Result.DV[Level].Direction) { 1228 ++WeakCrossingSIVindependence; 1229 return true; 1230 } 1231 Result.DV[Level].Splitable = false; 1232 Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0); 1233 return false; 1234 } 1235 } 1236 1237 // check that Coeff divides Delta 1238 APInt APDelta = ConstDelta->getValue()->getValue(); 1239 APInt APCoeff = ConstCoeff->getValue()->getValue(); 1240 APInt Distance = APDelta; // these need to be initialzed 1241 APInt Remainder = APDelta; 1242 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1243 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1244 if (Remainder != 0) { 1245 // Coeff doesn't divide Delta, no dependence 1246 ++WeakCrossingSIVindependence; 1247 ++WeakCrossingSIVsuccesses; 1248 return true; 1249 } 1250 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1251 1252 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1253 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1254 Remainder = Distance.srem(Two); 1255 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1256 if (Remainder != 0) { 1257 // Equal direction isn't possible 1258 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1259 ++WeakCrossingSIVsuccesses; 1260 } 1261 return false; 1262 } 1263 1264 1265 // Kirch's algorithm, from 1266 // 1267 // Optimizing Supercompilers for Supercomputers 1268 // Michael Wolfe 1269 // MIT Press, 1989 1270 // 1271 // Program 2.1, page 29. 1272 // Computes the GCD of AM and BM. 1273 // Also finds a solution to the equation ax - by = gdc(a, b). 1274 // Returns true iff the gcd divides Delta. 1275 static 1276 bool findGCD(unsigned Bits, APInt AM, APInt BM, APInt Delta, 1277 APInt &G, APInt &X, APInt &Y) { 1278 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1279 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1280 APInt G0 = AM.abs(); 1281 APInt G1 = BM.abs(); 1282 APInt Q = G0; // these need to be initialized 1283 APInt R = G0; 1284 APInt::sdivrem(G0, G1, Q, R); 1285 while (R != 0) { 1286 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1287 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1288 G0 = G1; G1 = R; 1289 APInt::sdivrem(G0, G1, Q, R); 1290 } 1291 G = G1; 1292 DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1293 X = AM.slt(0) ? -A1 : A1; 1294 Y = BM.slt(0) ? B1 : -B1; 1295 1296 // make sure gcd divides Delta 1297 R = Delta.srem(G); 1298 if (R != 0) 1299 return true; // gcd doesn't divide Delta, no dependence 1300 Q = Delta.sdiv(G); 1301 X *= Q; 1302 Y *= Q; 1303 return false; 1304 } 1305 1306 1307 static 1308 APInt floorOfQuotient(APInt A, APInt B) { 1309 APInt Q = A; // these need to be initialized 1310 APInt R = A; 1311 APInt::sdivrem(A, B, Q, R); 1312 if (R == 0) 1313 return Q; 1314 if ((A.sgt(0) && B.sgt(0)) || 1315 (A.slt(0) && B.slt(0))) 1316 return Q; 1317 else 1318 return Q - 1; 1319 } 1320 1321 1322 static 1323 APInt ceilingOfQuotient(APInt A, APInt B) { 1324 APInt Q = A; // these need to be initialized 1325 APInt R = A; 1326 APInt::sdivrem(A, B, Q, R); 1327 if (R == 0) 1328 return Q; 1329 if ((A.sgt(0) && B.sgt(0)) || 1330 (A.slt(0) && B.slt(0))) 1331 return Q + 1; 1332 else 1333 return Q; 1334 } 1335 1336 1337 static 1338 APInt maxAPInt(APInt A, APInt B) { 1339 return A.sgt(B) ? A : B; 1340 } 1341 1342 1343 static 1344 APInt minAPInt(APInt A, APInt B) { 1345 return A.slt(B) ? A : B; 1346 } 1347 1348 1349 // exactSIVtest - 1350 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1351 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1352 // and a2 are constant, we can solve it exactly using an algorithm developed 1353 // by Banerjee and Wolfe. See Section 2.5.3 in 1354 // 1355 // Optimizing Supercompilers for Supercomputers 1356 // Michael Wolfe 1357 // MIT Press, 1989 1358 // 1359 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1360 // so use them if possible. They're also a bit better with symbolics and, 1361 // in the case of the strong SIV test, can compute Distances. 1362 // 1363 // Return true if dependence disproved. 1364 bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff, 1365 const SCEV *DstCoeff, 1366 const SCEV *SrcConst, 1367 const SCEV *DstConst, 1368 const Loop *CurLoop, 1369 unsigned Level, 1370 FullDependence &Result, 1371 Constraint &NewConstraint) const { 1372 DEBUG(dbgs() << "\tExact SIV test\n"); 1373 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1374 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1375 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1376 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1377 ++ExactSIVapplications; 1378 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1379 Level--; 1380 Result.Consistent = false; 1381 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1382 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1383 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), 1384 Delta, CurLoop); 1385 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1386 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1387 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1388 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1389 return false; 1390 1391 // find gcd 1392 APInt G, X, Y; 1393 APInt AM = ConstSrcCoeff->getValue()->getValue(); 1394 APInt BM = ConstDstCoeff->getValue()->getValue(); 1395 unsigned Bits = AM.getBitWidth(); 1396 if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { 1397 // gcd doesn't divide Delta, no dependence 1398 ++ExactSIVindependence; 1399 ++ExactSIVsuccesses; 1400 return true; 1401 } 1402 1403 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1404 1405 // since SCEV construction normalizes, LM = 0 1406 APInt UM(Bits, 1, true); 1407 bool UMvalid = false; 1408 // UM is perhaps unavailable, let's check 1409 if (const SCEVConstant *CUB = 1410 collectConstantUpperBound(CurLoop, Delta->getType())) { 1411 UM = CUB->getValue()->getValue(); 1412 DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1413 UMvalid = true; 1414 } 1415 1416 APInt TU(APInt::getSignedMaxValue(Bits)); 1417 APInt TL(APInt::getSignedMinValue(Bits)); 1418 1419 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1420 APInt TMUL = BM.sdiv(G); 1421 if (TMUL.sgt(0)) { 1422 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1423 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1424 if (UMvalid) { 1425 TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); 1426 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1427 } 1428 } 1429 else { 1430 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1431 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1432 if (UMvalid) { 1433 TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); 1434 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1435 } 1436 } 1437 1438 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1439 TMUL = AM.sdiv(G); 1440 if (TMUL.sgt(0)) { 1441 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1442 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1443 if (UMvalid) { 1444 TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); 1445 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1446 } 1447 } 1448 else { 1449 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1450 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1451 if (UMvalid) { 1452 TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); 1453 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1454 } 1455 } 1456 if (TL.sgt(TU)) { 1457 ++ExactSIVindependence; 1458 ++ExactSIVsuccesses; 1459 return true; 1460 } 1461 1462 // explore directions 1463 unsigned NewDirection = Dependence::DVEntry::NONE; 1464 1465 // less than 1466 APInt SaveTU(TU); // save these 1467 APInt SaveTL(TL); 1468 DEBUG(dbgs() << "\t exploring LT direction\n"); 1469 TMUL = AM - BM; 1470 if (TMUL.sgt(0)) { 1471 TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); 1472 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1473 } 1474 else { 1475 TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); 1476 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1477 } 1478 if (TL.sle(TU)) { 1479 NewDirection |= Dependence::DVEntry::LT; 1480 ++ExactSIVsuccesses; 1481 } 1482 1483 // equal 1484 TU = SaveTU; // restore 1485 TL = SaveTL; 1486 DEBUG(dbgs() << "\t exploring EQ direction\n"); 1487 if (TMUL.sgt(0)) { 1488 TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); 1489 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1490 } 1491 else { 1492 TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); 1493 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1494 } 1495 TMUL = BM - AM; 1496 if (TMUL.sgt(0)) { 1497 TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); 1498 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1499 } 1500 else { 1501 TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); 1502 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1503 } 1504 if (TL.sle(TU)) { 1505 NewDirection |= Dependence::DVEntry::EQ; 1506 ++ExactSIVsuccesses; 1507 } 1508 1509 // greater than 1510 TU = SaveTU; // restore 1511 TL = SaveTL; 1512 DEBUG(dbgs() << "\t exploring GT direction\n"); 1513 if (TMUL.sgt(0)) { 1514 TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); 1515 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1516 } 1517 else { 1518 TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); 1519 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1520 } 1521 if (TL.sle(TU)) { 1522 NewDirection |= Dependence::DVEntry::GT; 1523 ++ExactSIVsuccesses; 1524 } 1525 1526 // finished 1527 Result.DV[Level].Direction &= NewDirection; 1528 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1529 ++ExactSIVindependence; 1530 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1531 } 1532 1533 1534 1535 // Return true if the divisor evenly divides the dividend. 1536 static 1537 bool isRemainderZero(const SCEVConstant *Dividend, 1538 const SCEVConstant *Divisor) { 1539 APInt ConstDividend = Dividend->getValue()->getValue(); 1540 APInt ConstDivisor = Divisor->getValue()->getValue(); 1541 return ConstDividend.srem(ConstDivisor) == 0; 1542 } 1543 1544 1545 // weakZeroSrcSIVtest - 1546 // From the paper, Practical Dependence Testing, Section 4.2.2 1547 // 1548 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1549 // where i is an induction variable, c1 and c2 are loop invariant, 1550 // and a is a constant, we can solve it exactly using the 1551 // Weak-Zero SIV test. 1552 // 1553 // Given 1554 // 1555 // c1 = c2 + a*i 1556 // 1557 // we get 1558 // 1559 // (c1 - c2)/a = i 1560 // 1561 // If i is not an integer, there's no dependence. 1562 // If i < 0 or > UB, there's no dependence. 1563 // If i = 0, the direction is <= and peeling the 1564 // 1st iteration will break the dependence. 1565 // If i = UB, the direction is >= and peeling the 1566 // last iteration will break the dependence. 1567 // Otherwise, the direction is *. 1568 // 1569 // Can prove independence. Failing that, we can sometimes refine 1570 // the directions. Can sometimes show that first or last 1571 // iteration carries all the dependences (so worth peeling). 1572 // 1573 // (see also weakZeroDstSIVtest) 1574 // 1575 // Return true if dependence disproved. 1576 bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1577 const SCEV *SrcConst, 1578 const SCEV *DstConst, 1579 const Loop *CurLoop, 1580 unsigned Level, 1581 FullDependence &Result, 1582 Constraint &NewConstraint) const { 1583 // For the WeakSIV test, it's possible the loop isn't common to 1584 // the Src and Dst loops. If it isn't, then there's no need to 1585 // record a direction. 1586 DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1587 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1588 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1589 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1590 ++WeakZeroSIVapplications; 1591 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1592 Level--; 1593 Result.Consistent = false; 1594 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1595 NewConstraint.setLine(SE->getConstant(Delta->getType(), 0), 1596 DstCoeff, Delta, CurLoop); 1597 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1598 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1599 if (Level < CommonLevels) { 1600 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1601 Result.DV[Level].PeelFirst = true; 1602 ++WeakZeroSIVsuccesses; 1603 } 1604 return false; // dependences caused by first iteration 1605 } 1606 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1607 if (!ConstCoeff) 1608 return false; 1609 const SCEV *AbsCoeff = 1610 SE->isKnownNegative(ConstCoeff) ? 1611 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1612 const SCEV *NewDelta = 1613 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1614 1615 // check that Delta/SrcCoeff < iteration count 1616 // really check NewDelta < count*AbsCoeff 1617 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1618 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1619 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1620 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1621 ++WeakZeroSIVindependence; 1622 ++WeakZeroSIVsuccesses; 1623 return true; 1624 } 1625 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1626 // dependences caused by last iteration 1627 if (Level < CommonLevels) { 1628 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1629 Result.DV[Level].PeelLast = true; 1630 ++WeakZeroSIVsuccesses; 1631 } 1632 return false; 1633 } 1634 } 1635 1636 // check that Delta/SrcCoeff >= 0 1637 // really check that NewDelta >= 0 1638 if (SE->isKnownNegative(NewDelta)) { 1639 // No dependence, newDelta < 0 1640 ++WeakZeroSIVindependence; 1641 ++WeakZeroSIVsuccesses; 1642 return true; 1643 } 1644 1645 // if SrcCoeff doesn't divide Delta, then no dependence 1646 if (isa<SCEVConstant>(Delta) && 1647 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1648 ++WeakZeroSIVindependence; 1649 ++WeakZeroSIVsuccesses; 1650 return true; 1651 } 1652 return false; 1653 } 1654 1655 1656 // weakZeroDstSIVtest - 1657 // From the paper, Practical Dependence Testing, Section 4.2.2 1658 // 1659 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1660 // where i is an induction variable, c1 and c2 are loop invariant, 1661 // and a is a constant, we can solve it exactly using the 1662 // Weak-Zero SIV test. 1663 // 1664 // Given 1665 // 1666 // c1 + a*i = c2 1667 // 1668 // we get 1669 // 1670 // i = (c2 - c1)/a 1671 // 1672 // If i is not an integer, there's no dependence. 1673 // If i < 0 or > UB, there's no dependence. 1674 // If i = 0, the direction is <= and peeling the 1675 // 1st iteration will break the dependence. 1676 // If i = UB, the direction is >= and peeling the 1677 // last iteration will break the dependence. 1678 // Otherwise, the direction is *. 1679 // 1680 // Can prove independence. Failing that, we can sometimes refine 1681 // the directions. Can sometimes show that first or last 1682 // iteration carries all the dependences (so worth peeling). 1683 // 1684 // (see also weakZeroSrcSIVtest) 1685 // 1686 // Return true if dependence disproved. 1687 bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1688 const SCEV *SrcConst, 1689 const SCEV *DstConst, 1690 const Loop *CurLoop, 1691 unsigned Level, 1692 FullDependence &Result, 1693 Constraint &NewConstraint) const { 1694 // For the WeakSIV test, it's possible the loop isn't common to the 1695 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1696 DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1697 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1698 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1699 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1700 ++WeakZeroSIVapplications; 1701 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1702 Level--; 1703 Result.Consistent = false; 1704 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1705 NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0), 1706 Delta, CurLoop); 1707 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1708 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1709 if (Level < CommonLevels) { 1710 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1711 Result.DV[Level].PeelFirst = true; 1712 ++WeakZeroSIVsuccesses; 1713 } 1714 return false; // dependences caused by first iteration 1715 } 1716 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1717 if (!ConstCoeff) 1718 return false; 1719 const SCEV *AbsCoeff = 1720 SE->isKnownNegative(ConstCoeff) ? 1721 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1722 const SCEV *NewDelta = 1723 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1724 1725 // check that Delta/SrcCoeff < iteration count 1726 // really check NewDelta < count*AbsCoeff 1727 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1728 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1729 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1730 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1731 ++WeakZeroSIVindependence; 1732 ++WeakZeroSIVsuccesses; 1733 return true; 1734 } 1735 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1736 // dependences caused by last iteration 1737 if (Level < CommonLevels) { 1738 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1739 Result.DV[Level].PeelLast = true; 1740 ++WeakZeroSIVsuccesses; 1741 } 1742 return false; 1743 } 1744 } 1745 1746 // check that Delta/SrcCoeff >= 0 1747 // really check that NewDelta >= 0 1748 if (SE->isKnownNegative(NewDelta)) { 1749 // No dependence, newDelta < 0 1750 ++WeakZeroSIVindependence; 1751 ++WeakZeroSIVsuccesses; 1752 return true; 1753 } 1754 1755 // if SrcCoeff doesn't divide Delta, then no dependence 1756 if (isa<SCEVConstant>(Delta) && 1757 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1758 ++WeakZeroSIVindependence; 1759 ++WeakZeroSIVsuccesses; 1760 return true; 1761 } 1762 return false; 1763 } 1764 1765 1766 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1767 // Things of the form [c1 + a*i] and [c2 + b*j], 1768 // where i and j are induction variable, c1 and c2 are loop invariant, 1769 // and a and b are constants. 1770 // Returns true if any possible dependence is disproved. 1771 // Marks the result as inconsistent. 1772 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1773 bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff, 1774 const SCEV *DstCoeff, 1775 const SCEV *SrcConst, 1776 const SCEV *DstConst, 1777 const Loop *SrcLoop, 1778 const Loop *DstLoop, 1779 FullDependence &Result) const { 1780 DEBUG(dbgs() << "\tExact RDIV test\n"); 1781 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1782 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1783 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1784 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1785 ++ExactRDIVapplications; 1786 Result.Consistent = false; 1787 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1788 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1789 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1790 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1791 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1792 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1793 return false; 1794 1795 // find gcd 1796 APInt G, X, Y; 1797 APInt AM = ConstSrcCoeff->getValue()->getValue(); 1798 APInt BM = ConstDstCoeff->getValue()->getValue(); 1799 unsigned Bits = AM.getBitWidth(); 1800 if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { 1801 // gcd doesn't divide Delta, no dependence 1802 ++ExactRDIVindependence; 1803 return true; 1804 } 1805 1806 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1807 1808 // since SCEV construction seems to normalize, LM = 0 1809 APInt SrcUM(Bits, 1, true); 1810 bool SrcUMvalid = false; 1811 // SrcUM is perhaps unavailable, let's check 1812 if (const SCEVConstant *UpperBound = 1813 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1814 SrcUM = UpperBound->getValue()->getValue(); 1815 DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1816 SrcUMvalid = true; 1817 } 1818 1819 APInt DstUM(Bits, 1, true); 1820 bool DstUMvalid = false; 1821 // UM is perhaps unavailable, let's check 1822 if (const SCEVConstant *UpperBound = 1823 collectConstantUpperBound(DstLoop, Delta->getType())) { 1824 DstUM = UpperBound->getValue()->getValue(); 1825 DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1826 DstUMvalid = true; 1827 } 1828 1829 APInt TU(APInt::getSignedMaxValue(Bits)); 1830 APInt TL(APInt::getSignedMinValue(Bits)); 1831 1832 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1833 APInt TMUL = BM.sdiv(G); 1834 if (TMUL.sgt(0)) { 1835 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1836 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1837 if (SrcUMvalid) { 1838 TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); 1839 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1840 } 1841 } 1842 else { 1843 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1844 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1845 if (SrcUMvalid) { 1846 TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); 1847 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1848 } 1849 } 1850 1851 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1852 TMUL = AM.sdiv(G); 1853 if (TMUL.sgt(0)) { 1854 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1855 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1856 if (DstUMvalid) { 1857 TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); 1858 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1859 } 1860 } 1861 else { 1862 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1863 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1864 if (DstUMvalid) { 1865 TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); 1866 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1867 } 1868 } 1869 if (TL.sgt(TU)) 1870 ++ExactRDIVindependence; 1871 return TL.sgt(TU); 1872 } 1873 1874 1875 // symbolicRDIVtest - 1876 // In Section 4.5 of the Practical Dependence Testing paper,the authors 1877 // introduce a special case of Banerjee's Inequalities (also called the 1878 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 1879 // particularly cases with symbolics. Since it's only able to disprove 1880 // dependence (not compute distances or directions), we'll use it as a 1881 // fall back for the other tests. 1882 // 1883 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 1884 // where i and j are induction variables and c1 and c2 are loop invariants, 1885 // we can use the symbolic tests to disprove some dependences, serving as a 1886 // backup for the RDIV test. Note that i and j can be the same variable, 1887 // letting this test serve as a backup for the various SIV tests. 1888 // 1889 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 1890 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 1891 // loop bounds for the i and j loops, respectively. So, ... 1892 // 1893 // c1 + a1*i = c2 + a2*j 1894 // a1*i - a2*j = c2 - c1 1895 // 1896 // To test for a dependence, we compute c2 - c1 and make sure it's in the 1897 // range of the maximum and minimum possible values of a1*i - a2*j. 1898 // Considering the signs of a1 and a2, we have 4 possible cases: 1899 // 1900 // 1) If a1 >= 0 and a2 >= 0, then 1901 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 1902 // -a2*N2 <= c2 - c1 <= a1*N1 1903 // 1904 // 2) If a1 >= 0 and a2 <= 0, then 1905 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 1906 // 0 <= c2 - c1 <= a1*N1 - a2*N2 1907 // 1908 // 3) If a1 <= 0 and a2 >= 0, then 1909 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 1910 // a1*N1 - a2*N2 <= c2 - c1 <= 0 1911 // 1912 // 4) If a1 <= 0 and a2 <= 0, then 1913 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 1914 // a1*N1 <= c2 - c1 <= -a2*N2 1915 // 1916 // return true if dependence disproved 1917 bool DependenceAnalysis::symbolicRDIVtest(const SCEV *A1, 1918 const SCEV *A2, 1919 const SCEV *C1, 1920 const SCEV *C2, 1921 const Loop *Loop1, 1922 const Loop *Loop2) const { 1923 ++SymbolicRDIVapplications; 1924 DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 1925 DEBUG(dbgs() << "\t A1 = " << *A1); 1926 DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 1927 DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 1928 DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 1929 DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 1930 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 1931 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 1932 DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 1933 DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 1934 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 1935 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 1936 DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 1937 DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 1938 if (SE->isKnownNonNegative(A1)) { 1939 if (SE->isKnownNonNegative(A2)) { 1940 // A1 >= 0 && A2 >= 0 1941 if (N1) { 1942 // make sure that c2 - c1 <= a1*N1 1943 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1944 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 1945 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 1946 ++SymbolicRDIVindependence; 1947 return true; 1948 } 1949 } 1950 if (N2) { 1951 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 1952 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 1953 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 1954 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 1955 ++SymbolicRDIVindependence; 1956 return true; 1957 } 1958 } 1959 } 1960 else if (SE->isKnownNonPositive(A2)) { 1961 // a1 >= 0 && a2 <= 0 1962 if (N1 && N2) { 1963 // make sure that c2 - c1 <= a1*N1 - a2*N2 1964 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1965 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 1966 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 1967 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 1968 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 1969 ++SymbolicRDIVindependence; 1970 return true; 1971 } 1972 } 1973 // make sure that 0 <= c2 - c1 1974 if (SE->isKnownNegative(C2_C1)) { 1975 ++SymbolicRDIVindependence; 1976 return true; 1977 } 1978 } 1979 } 1980 else if (SE->isKnownNonPositive(A1)) { 1981 if (SE->isKnownNonNegative(A2)) { 1982 // a1 <= 0 && a2 >= 0 1983 if (N1 && N2) { 1984 // make sure that a1*N1 - a2*N2 <= c2 - c1 1985 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1986 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 1987 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 1988 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 1989 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 1990 ++SymbolicRDIVindependence; 1991 return true; 1992 } 1993 } 1994 // make sure that c2 - c1 <= 0 1995 if (SE->isKnownPositive(C2_C1)) { 1996 ++SymbolicRDIVindependence; 1997 return true; 1998 } 1999 } 2000 else if (SE->isKnownNonPositive(A2)) { 2001 // a1 <= 0 && a2 <= 0 2002 if (N1) { 2003 // make sure that a1*N1 <= c2 - c1 2004 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2005 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2006 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2007 ++SymbolicRDIVindependence; 2008 return true; 2009 } 2010 } 2011 if (N2) { 2012 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2013 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2014 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2015 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2016 ++SymbolicRDIVindependence; 2017 return true; 2018 } 2019 } 2020 } 2021 } 2022 return false; 2023 } 2024 2025 2026 // testSIV - 2027 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2028 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2029 // a2 are constant, we attack it with an SIV test. While they can all be 2030 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2031 // they apply; they're cheaper and sometimes more precise. 2032 // 2033 // Return true if dependence disproved. 2034 bool DependenceAnalysis::testSIV(const SCEV *Src, 2035 const SCEV *Dst, 2036 unsigned &Level, 2037 FullDependence &Result, 2038 Constraint &NewConstraint, 2039 const SCEV *&SplitIter) const { 2040 DEBUG(dbgs() << " src = " << *Src << "\n"); 2041 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2042 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2043 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2044 if (SrcAddRec && DstAddRec) { 2045 const SCEV *SrcConst = SrcAddRec->getStart(); 2046 const SCEV *DstConst = DstAddRec->getStart(); 2047 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2048 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2049 const Loop *CurLoop = SrcAddRec->getLoop(); 2050 assert(CurLoop == DstAddRec->getLoop() && 2051 "both loops in SIV should be same"); 2052 Level = mapSrcLoop(CurLoop); 2053 bool disproven; 2054 if (SrcCoeff == DstCoeff) 2055 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2056 Level, Result, NewConstraint); 2057 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2058 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2059 Level, Result, NewConstraint, SplitIter); 2060 else 2061 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2062 Level, Result, NewConstraint); 2063 return disproven || 2064 gcdMIVtest(Src, Dst, Result) || 2065 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2066 } 2067 if (SrcAddRec) { 2068 const SCEV *SrcConst = SrcAddRec->getStart(); 2069 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2070 const SCEV *DstConst = Dst; 2071 const Loop *CurLoop = SrcAddRec->getLoop(); 2072 Level = mapSrcLoop(CurLoop); 2073 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2074 Level, Result, NewConstraint) || 2075 gcdMIVtest(Src, Dst, Result); 2076 } 2077 if (DstAddRec) { 2078 const SCEV *DstConst = DstAddRec->getStart(); 2079 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2080 const SCEV *SrcConst = Src; 2081 const Loop *CurLoop = DstAddRec->getLoop(); 2082 Level = mapDstLoop(CurLoop); 2083 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2084 CurLoop, Level, Result, NewConstraint) || 2085 gcdMIVtest(Src, Dst, Result); 2086 } 2087 llvm_unreachable("SIV test expected at least one AddRec"); 2088 return false; 2089 } 2090 2091 2092 // testRDIV - 2093 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2094 // where i and j are induction variables, c1 and c2 are loop invariant, 2095 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2096 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2097 // It doesn't make sense to talk about distance or direction in this case, 2098 // so there's no point in making special versions of the Strong SIV test or 2099 // the Weak-crossing SIV test. 2100 // 2101 // With minor algebra, this test can also be used for things like 2102 // [c1 + a1*i + a2*j][c2]. 2103 // 2104 // Return true if dependence disproved. 2105 bool DependenceAnalysis::testRDIV(const SCEV *Src, 2106 const SCEV *Dst, 2107 FullDependence &Result) const { 2108 // we have 3 possible situations here: 2109 // 1) [a*i + b] and [c*j + d] 2110 // 2) [a*i + c*j + b] and [d] 2111 // 3) [b] and [a*i + c*j + d] 2112 // We need to find what we've got and get organized 2113 2114 const SCEV *SrcConst, *DstConst; 2115 const SCEV *SrcCoeff, *DstCoeff; 2116 const Loop *SrcLoop, *DstLoop; 2117 2118 DEBUG(dbgs() << " src = " << *Src << "\n"); 2119 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2120 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2121 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2122 if (SrcAddRec && DstAddRec) { 2123 SrcConst = SrcAddRec->getStart(); 2124 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2125 SrcLoop = SrcAddRec->getLoop(); 2126 DstConst = DstAddRec->getStart(); 2127 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2128 DstLoop = DstAddRec->getLoop(); 2129 } 2130 else if (SrcAddRec) { 2131 if (const SCEVAddRecExpr *tmpAddRec = 2132 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2133 SrcConst = tmpAddRec->getStart(); 2134 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2135 SrcLoop = tmpAddRec->getLoop(); 2136 DstConst = Dst; 2137 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2138 DstLoop = SrcAddRec->getLoop(); 2139 } 2140 else 2141 llvm_unreachable("RDIV reached by surprising SCEVs"); 2142 } 2143 else if (DstAddRec) { 2144 if (const SCEVAddRecExpr *tmpAddRec = 2145 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2146 DstConst = tmpAddRec->getStart(); 2147 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2148 DstLoop = tmpAddRec->getLoop(); 2149 SrcConst = Src; 2150 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2151 SrcLoop = DstAddRec->getLoop(); 2152 } 2153 else 2154 llvm_unreachable("RDIV reached by surprising SCEVs"); 2155 } 2156 else 2157 llvm_unreachable("RDIV expected at least one AddRec"); 2158 return exactRDIVtest(SrcCoeff, DstCoeff, 2159 SrcConst, DstConst, 2160 SrcLoop, DstLoop, 2161 Result) || 2162 gcdMIVtest(Src, Dst, Result) || 2163 symbolicRDIVtest(SrcCoeff, DstCoeff, 2164 SrcConst, DstConst, 2165 SrcLoop, DstLoop); 2166 } 2167 2168 2169 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2170 // Return true if dependence disproved. 2171 // Can sometimes refine direction vectors. 2172 bool DependenceAnalysis::testMIV(const SCEV *Src, 2173 const SCEV *Dst, 2174 const SmallBitVector &Loops, 2175 FullDependence &Result) const { 2176 DEBUG(dbgs() << " src = " << *Src << "\n"); 2177 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2178 Result.Consistent = false; 2179 return gcdMIVtest(Src, Dst, Result) || 2180 banerjeeMIVtest(Src, Dst, Loops, Result); 2181 } 2182 2183 2184 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2185 // in this case 10. If there is no constant part, returns NULL. 2186 static 2187 const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) { 2188 for (unsigned Op = 0, Ops = Product->getNumOperands(); Op < Ops; Op++) { 2189 if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Product->getOperand(Op))) 2190 return Constant; 2191 } 2192 return NULL; 2193 } 2194 2195 2196 //===----------------------------------------------------------------------===// 2197 // gcdMIVtest - 2198 // Tests an MIV subscript pair for dependence. 2199 // Returns true if any possible dependence is disproved. 2200 // Marks the result as inconsistent. 2201 // Can sometimes disprove the equal direction for 1 or more loops, 2202 // as discussed in Michael Wolfe's book, 2203 // High Performance Compilers for Parallel Computing, page 235. 2204 // 2205 // We spend some effort (code!) to handle cases like 2206 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2207 // but M and N are just loop-invariant variables. 2208 // This should help us handle linearized subscripts; 2209 // also makes this test a useful backup to the various SIV tests. 2210 // 2211 // It occurs to me that the presence of loop-invariant variables 2212 // changes the nature of the test from "greatest common divisor" 2213 // to "a common divisor". 2214 bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, 2215 const SCEV *Dst, 2216 FullDependence &Result) const { 2217 DEBUG(dbgs() << "starting gcd\n"); 2218 ++GCDapplications; 2219 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2220 APInt RunningGCD = APInt::getNullValue(BitWidth); 2221 2222 // Examine Src coefficients. 2223 // Compute running GCD and record source constant. 2224 // Because we're looking for the constant at the end of the chain, 2225 // we can't quit the loop just because the GCD == 1. 2226 const SCEV *Coefficients = Src; 2227 while (const SCEVAddRecExpr *AddRec = 2228 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2229 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2230 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); 2231 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2232 // If the coefficient is the product of a constant and other stuff, 2233 // we can use the constant in the GCD computation. 2234 Constant = getConstantPart(Product); 2235 if (!Constant) 2236 return false; 2237 APInt ConstCoeff = Constant->getValue()->getValue(); 2238 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2239 Coefficients = AddRec->getStart(); 2240 } 2241 const SCEV *SrcConst = Coefficients; 2242 2243 // Examine Dst coefficients. 2244 // Compute running GCD and record destination constant. 2245 // Because we're looking for the constant at the end of the chain, 2246 // we can't quit the loop just because the GCD == 1. 2247 Coefficients = Dst; 2248 while (const SCEVAddRecExpr *AddRec = 2249 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2250 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2251 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); 2252 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2253 // If the coefficient is the product of a constant and other stuff, 2254 // we can use the constant in the GCD computation. 2255 Constant = getConstantPart(Product); 2256 if (!Constant) 2257 return false; 2258 APInt ConstCoeff = Constant->getValue()->getValue(); 2259 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2260 Coefficients = AddRec->getStart(); 2261 } 2262 const SCEV *DstConst = Coefficients; 2263 2264 APInt ExtraGCD = APInt::getNullValue(BitWidth); 2265 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2266 DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2267 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2268 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2269 // If Delta is a sum of products, we may be able to make further progress. 2270 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2271 const SCEV *Operand = Sum->getOperand(Op); 2272 if (isa<SCEVConstant>(Operand)) { 2273 assert(!Constant && "Surprised to find multiple constants"); 2274 Constant = cast<SCEVConstant>(Operand); 2275 } 2276 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2277 // Search for constant operand to participate in GCD; 2278 // If none found; return false. 2279 const SCEVConstant *ConstOp = getConstantPart(Product); 2280 if (!ConstOp) 2281 return false; 2282 APInt ConstOpValue = ConstOp->getValue()->getValue(); 2283 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2284 ConstOpValue.abs()); 2285 } 2286 else 2287 return false; 2288 } 2289 } 2290 if (!Constant) 2291 return false; 2292 APInt ConstDelta = cast<SCEVConstant>(Constant)->getValue()->getValue(); 2293 DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2294 if (ConstDelta == 0) 2295 return false; 2296 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2297 DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2298 APInt Remainder = ConstDelta.srem(RunningGCD); 2299 if (Remainder != 0) { 2300 ++GCDindependence; 2301 return true; 2302 } 2303 2304 // Try to disprove equal directions. 2305 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2306 // the code above can't disprove the dependence because the GCD = 1. 2307 // So we consider what happen if i = i' and what happens if j = j'. 2308 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2309 // which is infeasible, so we can disallow the = direction for the i level. 2310 // Setting j = j' doesn't help matters, so we end up with a direction vector 2311 // of [<>, *] 2312 // 2313 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2314 // we need to remember that the constant part is 5 and the RunningGCD should 2315 // be initialized to ExtraGCD = 30. 2316 DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2317 2318 bool Improved = false; 2319 Coefficients = Src; 2320 while (const SCEVAddRecExpr *AddRec = 2321 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2322 Coefficients = AddRec->getStart(); 2323 const Loop *CurLoop = AddRec->getLoop(); 2324 RunningGCD = ExtraGCD; 2325 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2326 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2327 const SCEV *Inner = Src; 2328 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2329 AddRec = cast<SCEVAddRecExpr>(Inner); 2330 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2331 if (CurLoop == AddRec->getLoop()) 2332 ; // SrcCoeff == Coeff 2333 else { 2334 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2335 // If the coefficient is the product of a constant and other stuff, 2336 // we can use the constant in the GCD computation. 2337 Constant = getConstantPart(Product); 2338 else 2339 Constant = cast<SCEVConstant>(Coeff); 2340 APInt ConstCoeff = Constant->getValue()->getValue(); 2341 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2342 } 2343 Inner = AddRec->getStart(); 2344 } 2345 Inner = Dst; 2346 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2347 AddRec = cast<SCEVAddRecExpr>(Inner); 2348 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2349 if (CurLoop == AddRec->getLoop()) 2350 DstCoeff = Coeff; 2351 else { 2352 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2353 // If the coefficient is the product of a constant and other stuff, 2354 // we can use the constant in the GCD computation. 2355 Constant = getConstantPart(Product); 2356 else 2357 Constant = cast<SCEVConstant>(Coeff); 2358 APInt ConstCoeff = Constant->getValue()->getValue(); 2359 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2360 } 2361 Inner = AddRec->getStart(); 2362 } 2363 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2364 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Delta)) 2365 // If the coefficient is the product of a constant and other stuff, 2366 // we can use the constant in the GCD computation. 2367 Constant = getConstantPart(Product); 2368 else if (isa<SCEVConstant>(Delta)) 2369 Constant = cast<SCEVConstant>(Delta); 2370 else { 2371 // The difference of the two coefficients might not be a product 2372 // or constant, in which case we give up on this direction. 2373 continue; 2374 } 2375 APInt ConstCoeff = Constant->getValue()->getValue(); 2376 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2377 DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2378 if (RunningGCD != 0) { 2379 Remainder = ConstDelta.srem(RunningGCD); 2380 DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2381 if (Remainder != 0) { 2382 unsigned Level = mapSrcLoop(CurLoop); 2383 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2384 Improved = true; 2385 } 2386 } 2387 } 2388 if (Improved) 2389 ++GCDsuccesses; 2390 DEBUG(dbgs() << "all done\n"); 2391 return false; 2392 } 2393 2394 2395 //===----------------------------------------------------------------------===// 2396 // banerjeeMIVtest - 2397 // Use Banerjee's Inequalities to test an MIV subscript pair. 2398 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2399 // Generally follows the discussion in Section 2.5.2 of 2400 // 2401 // Optimizing Supercompilers for Supercomputers 2402 // Michael Wolfe 2403 // 2404 // The inequalities given on page 25 are simplified in that loops are 2405 // normalized so that the lower bound is always 0 and the stride is always 1. 2406 // For example, Wolfe gives 2407 // 2408 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2409 // 2410 // where A_k is the coefficient of the kth index in the source subscript, 2411 // B_k is the coefficient of the kth index in the destination subscript, 2412 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2413 // index, and N_k is the stride of the kth index. Since all loops are normalized 2414 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2415 // equation to 2416 // 2417 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2418 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2419 // 2420 // Similar simplifications are possible for the other equations. 2421 // 2422 // When we can't determine the number of iterations for a loop, 2423 // we use NULL as an indicator for the worst case, infinity. 2424 // When computing the upper bound, NULL denotes +inf; 2425 // for the lower bound, NULL denotes -inf. 2426 // 2427 // Return true if dependence disproved. 2428 bool DependenceAnalysis::banerjeeMIVtest(const SCEV *Src, 2429 const SCEV *Dst, 2430 const SmallBitVector &Loops, 2431 FullDependence &Result) const { 2432 DEBUG(dbgs() << "starting Banerjee\n"); 2433 ++BanerjeeApplications; 2434 DEBUG(dbgs() << " Src = " << *Src << '\n'); 2435 const SCEV *A0; 2436 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2437 DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2438 const SCEV *B0; 2439 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2440 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2441 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2442 DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2443 2444 // Compute bounds for all the * directions. 2445 DEBUG(dbgs() << "\tBounds[*]\n"); 2446 for (unsigned K = 1; K <= MaxLevels; ++K) { 2447 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2448 Bound[K].Direction = Dependence::DVEntry::ALL; 2449 Bound[K].DirSet = Dependence::DVEntry::NONE; 2450 findBoundsALL(A, B, Bound, K); 2451 #ifndef NDEBUG 2452 DEBUG(dbgs() << "\t " << K << '\t'); 2453 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2454 DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2455 else 2456 DEBUG(dbgs() << "-inf\t"); 2457 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2458 DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2459 else 2460 DEBUG(dbgs() << "+inf\n"); 2461 #endif 2462 } 2463 2464 // Test the *, *, *, ... case. 2465 bool Disproved = false; 2466 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2467 // Explore the direction vector hierarchy. 2468 unsigned DepthExpanded = 0; 2469 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2470 Loops, DepthExpanded, Delta); 2471 if (NewDeps > 0) { 2472 bool Improved = false; 2473 for (unsigned K = 1; K <= CommonLevels; ++K) { 2474 if (Loops[K]) { 2475 unsigned Old = Result.DV[K - 1].Direction; 2476 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2477 Improved |= Old != Result.DV[K - 1].Direction; 2478 if (!Result.DV[K - 1].Direction) { 2479 Improved = false; 2480 Disproved = true; 2481 break; 2482 } 2483 } 2484 } 2485 if (Improved) 2486 ++BanerjeeSuccesses; 2487 } 2488 else { 2489 ++BanerjeeIndependence; 2490 Disproved = true; 2491 } 2492 } 2493 else { 2494 ++BanerjeeIndependence; 2495 Disproved = true; 2496 } 2497 delete [] Bound; 2498 delete [] A; 2499 delete [] B; 2500 return Disproved; 2501 } 2502 2503 2504 // Hierarchically expands the direction vector 2505 // search space, combining the directions of discovered dependences 2506 // in the DirSet field of Bound. Returns the number of distinct 2507 // dependences discovered. If the dependence is disproved, 2508 // it will return 0. 2509 unsigned DependenceAnalysis::exploreDirections(unsigned Level, 2510 CoefficientInfo *A, 2511 CoefficientInfo *B, 2512 BoundInfo *Bound, 2513 const SmallBitVector &Loops, 2514 unsigned &DepthExpanded, 2515 const SCEV *Delta) const { 2516 if (Level > CommonLevels) { 2517 // record result 2518 DEBUG(dbgs() << "\t["); 2519 for (unsigned K = 1; K <= CommonLevels; ++K) { 2520 if (Loops[K]) { 2521 Bound[K].DirSet |= Bound[K].Direction; 2522 #ifndef NDEBUG 2523 switch (Bound[K].Direction) { 2524 case Dependence::DVEntry::LT: 2525 DEBUG(dbgs() << " <"); 2526 break; 2527 case Dependence::DVEntry::EQ: 2528 DEBUG(dbgs() << " ="); 2529 break; 2530 case Dependence::DVEntry::GT: 2531 DEBUG(dbgs() << " >"); 2532 break; 2533 case Dependence::DVEntry::ALL: 2534 DEBUG(dbgs() << " *"); 2535 break; 2536 default: 2537 llvm_unreachable("unexpected Bound[K].Direction"); 2538 } 2539 #endif 2540 } 2541 } 2542 DEBUG(dbgs() << " ]\n"); 2543 return 1; 2544 } 2545 if (Loops[Level]) { 2546 if (Level > DepthExpanded) { 2547 DepthExpanded = Level; 2548 // compute bounds for <, =, > at current level 2549 findBoundsLT(A, B, Bound, Level); 2550 findBoundsGT(A, B, Bound, Level); 2551 findBoundsEQ(A, B, Bound, Level); 2552 #ifndef NDEBUG 2553 DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2554 DEBUG(dbgs() << "\t <\t"); 2555 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2556 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t'); 2557 else 2558 DEBUG(dbgs() << "-inf\t"); 2559 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2560 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n'); 2561 else 2562 DEBUG(dbgs() << "+inf\n"); 2563 DEBUG(dbgs() << "\t =\t"); 2564 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2565 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t'); 2566 else 2567 DEBUG(dbgs() << "-inf\t"); 2568 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2569 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n'); 2570 else 2571 DEBUG(dbgs() << "+inf\n"); 2572 DEBUG(dbgs() << "\t >\t"); 2573 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2574 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t'); 2575 else 2576 DEBUG(dbgs() << "-inf\t"); 2577 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2578 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n'); 2579 else 2580 DEBUG(dbgs() << "+inf\n"); 2581 #endif 2582 } 2583 2584 unsigned NewDeps = 0; 2585 2586 // test bounds for <, *, *, ... 2587 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2588 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2589 Loops, DepthExpanded, Delta); 2590 2591 // Test bounds for =, *, *, ... 2592 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2593 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2594 Loops, DepthExpanded, Delta); 2595 2596 // test bounds for >, *, *, ... 2597 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2598 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2599 Loops, DepthExpanded, Delta); 2600 2601 Bound[Level].Direction = Dependence::DVEntry::ALL; 2602 return NewDeps; 2603 } 2604 else 2605 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2606 } 2607 2608 2609 // Returns true iff the current bounds are plausible. 2610 bool DependenceAnalysis::testBounds(unsigned char DirKind, 2611 unsigned Level, 2612 BoundInfo *Bound, 2613 const SCEV *Delta) const { 2614 Bound[Level].Direction = DirKind; 2615 if (const SCEV *LowerBound = getLowerBound(Bound)) 2616 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2617 return false; 2618 if (const SCEV *UpperBound = getUpperBound(Bound)) 2619 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2620 return false; 2621 return true; 2622 } 2623 2624 2625 // Computes the upper and lower bounds for level K 2626 // using the * direction. Records them in Bound. 2627 // Wolfe gives the equations 2628 // 2629 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2630 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2631 // 2632 // Since we normalize loops, we can simplify these equations to 2633 // 2634 // LB^*_k = (A^-_k - B^+_k)U_k 2635 // UB^*_k = (A^+_k - B^-_k)U_k 2636 // 2637 // We must be careful to handle the case where the upper bound is unknown. 2638 // Note that the lower bound is always <= 0 2639 // and the upper bound is always >= 0. 2640 void DependenceAnalysis::findBoundsALL(CoefficientInfo *A, 2641 CoefficientInfo *B, 2642 BoundInfo *Bound, 2643 unsigned K) const { 2644 Bound[K].Lower[Dependence::DVEntry::ALL] = NULL; // Default value = -infinity. 2645 Bound[K].Upper[Dependence::DVEntry::ALL] = NULL; // Default value = +infinity. 2646 if (Bound[K].Iterations) { 2647 Bound[K].Lower[Dependence::DVEntry::ALL] = 2648 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2649 Bound[K].Iterations); 2650 Bound[K].Upper[Dependence::DVEntry::ALL] = 2651 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2652 Bound[K].Iterations); 2653 } 2654 else { 2655 // If the difference is 0, we won't need to know the number of iterations. 2656 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2657 Bound[K].Lower[Dependence::DVEntry::ALL] = 2658 SE->getConstant(A[K].Coeff->getType(), 0); 2659 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2660 Bound[K].Upper[Dependence::DVEntry::ALL] = 2661 SE->getConstant(A[K].Coeff->getType(), 0); 2662 } 2663 } 2664 2665 2666 // Computes the upper and lower bounds for level K 2667 // using the = direction. Records them in Bound. 2668 // Wolfe gives the equations 2669 // 2670 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2671 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2672 // 2673 // Since we normalize loops, we can simplify these equations to 2674 // 2675 // LB^=_k = (A_k - B_k)^- U_k 2676 // UB^=_k = (A_k - B_k)^+ U_k 2677 // 2678 // We must be careful to handle the case where the upper bound is unknown. 2679 // Note that the lower bound is always <= 0 2680 // and the upper bound is always >= 0. 2681 void DependenceAnalysis::findBoundsEQ(CoefficientInfo *A, 2682 CoefficientInfo *B, 2683 BoundInfo *Bound, 2684 unsigned K) const { 2685 Bound[K].Lower[Dependence::DVEntry::EQ] = NULL; // Default value = -infinity. 2686 Bound[K].Upper[Dependence::DVEntry::EQ] = NULL; // Default value = +infinity. 2687 if (Bound[K].Iterations) { 2688 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2689 const SCEV *NegativePart = getNegativePart(Delta); 2690 Bound[K].Lower[Dependence::DVEntry::EQ] = 2691 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2692 const SCEV *PositivePart = getPositivePart(Delta); 2693 Bound[K].Upper[Dependence::DVEntry::EQ] = 2694 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2695 } 2696 else { 2697 // If the positive/negative part of the difference is 0, 2698 // we won't need to know the number of iterations. 2699 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2700 const SCEV *NegativePart = getNegativePart(Delta); 2701 if (NegativePart->isZero()) 2702 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2703 const SCEV *PositivePart = getPositivePart(Delta); 2704 if (PositivePart->isZero()) 2705 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2706 } 2707 } 2708 2709 2710 // Computes the upper and lower bounds for level K 2711 // using the < direction. Records them in Bound. 2712 // Wolfe gives the equations 2713 // 2714 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2715 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2716 // 2717 // Since we normalize loops, we can simplify these equations to 2718 // 2719 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2720 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2721 // 2722 // We must be careful to handle the case where the upper bound is unknown. 2723 void DependenceAnalysis::findBoundsLT(CoefficientInfo *A, 2724 CoefficientInfo *B, 2725 BoundInfo *Bound, 2726 unsigned K) const { 2727 Bound[K].Lower[Dependence::DVEntry::LT] = NULL; // Default value = -infinity. 2728 Bound[K].Upper[Dependence::DVEntry::LT] = NULL; // Default value = +infinity. 2729 if (Bound[K].Iterations) { 2730 const SCEV *Iter_1 = 2731 SE->getMinusSCEV(Bound[K].Iterations, 2732 SE->getConstant(Bound[K].Iterations->getType(), 1)); 2733 const SCEV *NegPart = 2734 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2735 Bound[K].Lower[Dependence::DVEntry::LT] = 2736 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2737 const SCEV *PosPart = 2738 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2739 Bound[K].Upper[Dependence::DVEntry::LT] = 2740 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2741 } 2742 else { 2743 // If the positive/negative part of the difference is 0, 2744 // we won't need to know the number of iterations. 2745 const SCEV *NegPart = 2746 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2747 if (NegPart->isZero()) 2748 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2749 const SCEV *PosPart = 2750 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2751 if (PosPart->isZero()) 2752 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2753 } 2754 } 2755 2756 2757 // Computes the upper and lower bounds for level K 2758 // using the > direction. Records them in Bound. 2759 // Wolfe gives the equations 2760 // 2761 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2762 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2763 // 2764 // Since we normalize loops, we can simplify these equations to 2765 // 2766 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2767 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2768 // 2769 // We must be careful to handle the case where the upper bound is unknown. 2770 void DependenceAnalysis::findBoundsGT(CoefficientInfo *A, 2771 CoefficientInfo *B, 2772 BoundInfo *Bound, 2773 unsigned K) const { 2774 Bound[K].Lower[Dependence::DVEntry::GT] = NULL; // Default value = -infinity. 2775 Bound[K].Upper[Dependence::DVEntry::GT] = NULL; // Default value = +infinity. 2776 if (Bound[K].Iterations) { 2777 const SCEV *Iter_1 = 2778 SE->getMinusSCEV(Bound[K].Iterations, 2779 SE->getConstant(Bound[K].Iterations->getType(), 1)); 2780 const SCEV *NegPart = 2781 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2782 Bound[K].Lower[Dependence::DVEntry::GT] = 2783 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2784 const SCEV *PosPart = 2785 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2786 Bound[K].Upper[Dependence::DVEntry::GT] = 2787 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2788 } 2789 else { 2790 // If the positive/negative part of the difference is 0, 2791 // we won't need to know the number of iterations. 2792 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2793 if (NegPart->isZero()) 2794 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2795 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2796 if (PosPart->isZero()) 2797 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2798 } 2799 } 2800 2801 2802 // X^+ = max(X, 0) 2803 const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const { 2804 return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0)); 2805 } 2806 2807 2808 // X^- = min(X, 0) 2809 const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const { 2810 return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0)); 2811 } 2812 2813 2814 // Walks through the subscript, 2815 // collecting each coefficient, the associated loop bounds, 2816 // and recording its positive and negative parts for later use. 2817 DependenceAnalysis::CoefficientInfo * 2818 DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript, 2819 bool SrcFlag, 2820 const SCEV *&Constant) const { 2821 const SCEV *Zero = SE->getConstant(Subscript->getType(), 0); 2822 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2823 for (unsigned K = 1; K <= MaxLevels; ++K) { 2824 CI[K].Coeff = Zero; 2825 CI[K].PosPart = Zero; 2826 CI[K].NegPart = Zero; 2827 CI[K].Iterations = NULL; 2828 } 2829 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2830 const Loop *L = AddRec->getLoop(); 2831 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2832 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2833 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2834 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2835 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2836 Subscript = AddRec->getStart(); 2837 } 2838 Constant = Subscript; 2839 #ifndef NDEBUG 2840 DEBUG(dbgs() << "\tCoefficient Info\n"); 2841 for (unsigned K = 1; K <= MaxLevels; ++K) { 2842 DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2843 DEBUG(dbgs() << "\tPos Part = "); 2844 DEBUG(dbgs() << *CI[K].PosPart); 2845 DEBUG(dbgs() << "\tNeg Part = "); 2846 DEBUG(dbgs() << *CI[K].NegPart); 2847 DEBUG(dbgs() << "\tUpper Bound = "); 2848 if (CI[K].Iterations) 2849 DEBUG(dbgs() << *CI[K].Iterations); 2850 else 2851 DEBUG(dbgs() << "+inf"); 2852 DEBUG(dbgs() << '\n'); 2853 } 2854 DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2855 #endif 2856 return CI; 2857 } 2858 2859 2860 // Looks through all the bounds info and 2861 // computes the lower bound given the current direction settings 2862 // at each level. If the lower bound for any level is -inf, 2863 // the result is -inf. 2864 const SCEV *DependenceAnalysis::getLowerBound(BoundInfo *Bound) const { 2865 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2866 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2867 if (Bound[K].Lower[Bound[K].Direction]) 2868 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2869 else 2870 Sum = NULL; 2871 } 2872 return Sum; 2873 } 2874 2875 2876 // Looks through all the bounds info and 2877 // computes the upper bound given the current direction settings 2878 // at each level. If the upper bound at any level is +inf, 2879 // the result is +inf. 2880 const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const { 2881 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 2882 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2883 if (Bound[K].Upper[Bound[K].Direction]) 2884 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 2885 else 2886 Sum = NULL; 2887 } 2888 return Sum; 2889 } 2890 2891 2892 //===----------------------------------------------------------------------===// 2893 // Constraint manipulation for Delta test. 2894 2895 // Given a linear SCEV, 2896 // return the coefficient (the step) 2897 // corresponding to the specified loop. 2898 // If there isn't one, return 0. 2899 // For example, given a*i + b*j + c*k, zeroing the coefficient 2900 // corresponding to the j loop would yield b. 2901 const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr, 2902 const Loop *TargetLoop) const { 2903 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2904 if (!AddRec) 2905 return SE->getConstant(Expr->getType(), 0); 2906 if (AddRec->getLoop() == TargetLoop) 2907 return AddRec->getStepRecurrence(*SE); 2908 return findCoefficient(AddRec->getStart(), TargetLoop); 2909 } 2910 2911 2912 // Given a linear SCEV, 2913 // return the SCEV given by zeroing out the coefficient 2914 // corresponding to the specified loop. 2915 // For example, given a*i + b*j + c*k, zeroing the coefficient 2916 // corresponding to the j loop would yield a*i + c*k. 2917 const SCEV *DependenceAnalysis::zeroCoefficient(const SCEV *Expr, 2918 const Loop *TargetLoop) const { 2919 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2920 if (!AddRec) 2921 return Expr; // ignore 2922 if (AddRec->getLoop() == TargetLoop) 2923 return AddRec->getStart(); 2924 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 2925 AddRec->getStepRecurrence(*SE), 2926 AddRec->getLoop(), 2927 AddRec->getNoWrapFlags()); 2928 } 2929 2930 2931 // Given a linear SCEV Expr, 2932 // return the SCEV given by adding some Value to the 2933 // coefficient corresponding to the specified TargetLoop. 2934 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 2935 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 2936 const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr, 2937 const Loop *TargetLoop, 2938 const SCEV *Value) const { 2939 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2940 if (!AddRec) // create a new addRec 2941 return SE->getAddRecExpr(Expr, 2942 Value, 2943 TargetLoop, 2944 SCEV::FlagAnyWrap); // Worst case, with no info. 2945 if (AddRec->getLoop() == TargetLoop) { 2946 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 2947 if (Sum->isZero()) 2948 return AddRec->getStart(); 2949 return SE->getAddRecExpr(AddRec->getStart(), 2950 Sum, 2951 AddRec->getLoop(), 2952 AddRec->getNoWrapFlags()); 2953 } 2954 if (SE->isLoopInvariant(AddRec, TargetLoop)) 2955 return SE->getAddRecExpr(AddRec, 2956 Value, 2957 TargetLoop, 2958 SCEV::FlagAnyWrap); 2959 return SE->getAddRecExpr(addToCoefficient(AddRec->getStart(), 2960 TargetLoop, Value), 2961 AddRec->getStepRecurrence(*SE), 2962 AddRec->getLoop(), 2963 AddRec->getNoWrapFlags()); 2964 } 2965 2966 2967 // Review the constraints, looking for opportunities 2968 // to simplify a subscript pair (Src and Dst). 2969 // Return true if some simplification occurs. 2970 // If the simplification isn't exact (that is, if it is conservative 2971 // in terms of dependence), set consistent to false. 2972 // Corresponds to Figure 5 from the paper 2973 // 2974 // Practical Dependence Testing 2975 // Goff, Kennedy, Tseng 2976 // PLDI 1991 2977 bool DependenceAnalysis::propagate(const SCEV *&Src, 2978 const SCEV *&Dst, 2979 SmallBitVector &Loops, 2980 SmallVectorImpl<Constraint> &Constraints, 2981 bool &Consistent) { 2982 bool Result = false; 2983 for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) { 2984 DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 2985 DEBUG(Constraints[LI].dump(dbgs())); 2986 if (Constraints[LI].isDistance()) 2987 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 2988 else if (Constraints[LI].isLine()) 2989 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 2990 else if (Constraints[LI].isPoint()) 2991 Result |= propagatePoint(Src, Dst, Constraints[LI]); 2992 } 2993 return Result; 2994 } 2995 2996 2997 // Attempt to propagate a distance 2998 // constraint into a subscript pair (Src and Dst). 2999 // Return true if some simplification occurs. 3000 // If the simplification isn't exact (that is, if it is conservative 3001 // in terms of dependence), set consistent to false. 3002 bool DependenceAnalysis::propagateDistance(const SCEV *&Src, 3003 const SCEV *&Dst, 3004 Constraint &CurConstraint, 3005 bool &Consistent) { 3006 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3007 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3008 const SCEV *A_K = findCoefficient(Src, CurLoop); 3009 if (A_K->isZero()) 3010 return false; 3011 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3012 Src = SE->getMinusSCEV(Src, DA_K); 3013 Src = zeroCoefficient(Src, CurLoop); 3014 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3015 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3016 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3017 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3018 if (!findCoefficient(Dst, CurLoop)->isZero()) 3019 Consistent = false; 3020 return true; 3021 } 3022 3023 3024 // Attempt to propagate a line 3025 // constraint into a subscript pair (Src and Dst). 3026 // Return true if some simplification occurs. 3027 // If the simplification isn't exact (that is, if it is conservative 3028 // in terms of dependence), set consistent to false. 3029 bool DependenceAnalysis::propagateLine(const SCEV *&Src, 3030 const SCEV *&Dst, 3031 Constraint &CurConstraint, 3032 bool &Consistent) { 3033 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3034 const SCEV *A = CurConstraint.getA(); 3035 const SCEV *B = CurConstraint.getB(); 3036 const SCEV *C = CurConstraint.getC(); 3037 DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n"); 3038 DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3039 DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3040 if (A->isZero()) { 3041 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3042 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3043 if (!Bconst || !Cconst) return false; 3044 APInt Beta = Bconst->getValue()->getValue(); 3045 APInt Charlie = Cconst->getValue()->getValue(); 3046 APInt CdivB = Charlie.sdiv(Beta); 3047 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3048 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3049 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3050 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3051 Dst = zeroCoefficient(Dst, CurLoop); 3052 if (!findCoefficient(Src, CurLoop)->isZero()) 3053 Consistent = false; 3054 } 3055 else if (B->isZero()) { 3056 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3057 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3058 if (!Aconst || !Cconst) return false; 3059 APInt Alpha = Aconst->getValue()->getValue(); 3060 APInt Charlie = Cconst->getValue()->getValue(); 3061 APInt CdivA = Charlie.sdiv(Alpha); 3062 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3063 const SCEV *A_K = findCoefficient(Src, CurLoop); 3064 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3065 Src = zeroCoefficient(Src, CurLoop); 3066 if (!findCoefficient(Dst, CurLoop)->isZero()) 3067 Consistent = false; 3068 } 3069 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3070 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3071 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3072 if (!Aconst || !Cconst) return false; 3073 APInt Alpha = Aconst->getValue()->getValue(); 3074 APInt Charlie = Cconst->getValue()->getValue(); 3075 APInt CdivA = Charlie.sdiv(Alpha); 3076 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3077 const SCEV *A_K = findCoefficient(Src, CurLoop); 3078 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3079 Src = zeroCoefficient(Src, CurLoop); 3080 Dst = addToCoefficient(Dst, CurLoop, A_K); 3081 if (!findCoefficient(Dst, CurLoop)->isZero()) 3082 Consistent = false; 3083 } 3084 else { 3085 // paper is incorrect here, or perhaps just misleading 3086 const SCEV *A_K = findCoefficient(Src, CurLoop); 3087 Src = SE->getMulExpr(Src, A); 3088 Dst = SE->getMulExpr(Dst, A); 3089 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3090 Src = zeroCoefficient(Src, CurLoop); 3091 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3092 if (!findCoefficient(Dst, CurLoop)->isZero()) 3093 Consistent = false; 3094 } 3095 DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3096 DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3097 return true; 3098 } 3099 3100 3101 // Attempt to propagate a point 3102 // constraint into a subscript pair (Src and Dst). 3103 // Return true if some simplification occurs. 3104 bool DependenceAnalysis::propagatePoint(const SCEV *&Src, 3105 const SCEV *&Dst, 3106 Constraint &CurConstraint) { 3107 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3108 const SCEV *A_K = findCoefficient(Src, CurLoop); 3109 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3110 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3111 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3112 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3113 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3114 Src = zeroCoefficient(Src, CurLoop); 3115 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3116 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3117 Dst = zeroCoefficient(Dst, CurLoop); 3118 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3119 return true; 3120 } 3121 3122 3123 // Update direction vector entry based on the current constraint. 3124 void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, 3125 const Constraint &CurConstraint 3126 ) const { 3127 DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3128 DEBUG(CurConstraint.dump(dbgs())); 3129 if (CurConstraint.isAny()) 3130 ; // use defaults 3131 else if (CurConstraint.isDistance()) { 3132 // this one is consistent, the others aren't 3133 Level.Scalar = false; 3134 Level.Distance = CurConstraint.getD(); 3135 unsigned NewDirection = Dependence::DVEntry::NONE; 3136 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3137 NewDirection = Dependence::DVEntry::EQ; 3138 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3139 NewDirection |= Dependence::DVEntry::LT; 3140 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3141 NewDirection |= Dependence::DVEntry::GT; 3142 Level.Direction &= NewDirection; 3143 } 3144 else if (CurConstraint.isLine()) { 3145 Level.Scalar = false; 3146 Level.Distance = NULL; 3147 // direction should be accurate 3148 } 3149 else if (CurConstraint.isPoint()) { 3150 Level.Scalar = false; 3151 Level.Distance = NULL; 3152 unsigned NewDirection = Dependence::DVEntry::NONE; 3153 if (!isKnownPredicate(CmpInst::ICMP_NE, 3154 CurConstraint.getY(), 3155 CurConstraint.getX())) 3156 // if X may be = Y 3157 NewDirection |= Dependence::DVEntry::EQ; 3158 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3159 CurConstraint.getY(), 3160 CurConstraint.getX())) 3161 // if Y may be > X 3162 NewDirection |= Dependence::DVEntry::LT; 3163 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3164 CurConstraint.getY(), 3165 CurConstraint.getX())) 3166 // if Y may be < X 3167 NewDirection |= Dependence::DVEntry::GT; 3168 Level.Direction &= NewDirection; 3169 } 3170 else 3171 llvm_unreachable("constraint has unexpected kind"); 3172 } 3173 3174 3175 //===----------------------------------------------------------------------===// 3176 3177 #ifndef NDEBUG 3178 // For debugging purposes, dump a small bit vector to dbgs(). 3179 static void dumpSmallBitVector(SmallBitVector &BV) { 3180 dbgs() << "{"; 3181 for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) { 3182 dbgs() << VI; 3183 if (BV.find_next(VI) >= 0) 3184 dbgs() << ' '; 3185 } 3186 dbgs() << "}\n"; 3187 } 3188 #endif 3189 3190 3191 // depends - 3192 // Returns NULL if there is no dependence. 3193 // Otherwise, return a Dependence with as many details as possible. 3194 // Corresponds to Section 3.1 in the paper 3195 // 3196 // Practical Dependence Testing 3197 // Goff, Kennedy, Tseng 3198 // PLDI 1991 3199 // 3200 // Care is required to keep the routine below, getSplitIteration(), 3201 // up to date with respect to this routine. 3202 Dependence *DependenceAnalysis::depends(Instruction *Src, 3203 Instruction *Dst, 3204 bool PossiblyLoopIndependent) { 3205 if (Src == Dst) 3206 PossiblyLoopIndependent = false; 3207 3208 if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || 3209 (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) 3210 // if both instructions don't reference memory, there's no dependence 3211 return NULL; 3212 3213 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3214 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3215 DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3216 return new Dependence(Src, Dst); 3217 } 3218 3219 Value *SrcPtr = getPointerOperand(Src); 3220 Value *DstPtr = getPointerOperand(Dst); 3221 3222 switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) { 3223 case AliasAnalysis::MayAlias: 3224 case AliasAnalysis::PartialAlias: 3225 // cannot analyse objects if we don't understand their aliasing. 3226 DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3227 return new Dependence(Src, Dst); 3228 case AliasAnalysis::NoAlias: 3229 // If the objects noalias, they are distinct, accesses are independent. 3230 DEBUG(dbgs() << "no alias\n"); 3231 return NULL; 3232 case AliasAnalysis::MustAlias: 3233 break; // The underlying objects alias; test accesses for dependence. 3234 } 3235 3236 // establish loop nesting levels 3237 establishNestingLevels(Src, Dst); 3238 DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3239 DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3240 3241 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3242 ++TotalArrayPairs; 3243 3244 // See if there are GEPs we can use. 3245 bool UsefulGEP = false; 3246 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3247 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3248 if (SrcGEP && DstGEP && 3249 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3250 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3251 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3252 DEBUG(dbgs() << " SrcPtrSCEV = " << *SrcPtrSCEV << "\n"); 3253 DEBUG(dbgs() << " DstPtrSCEV = " << *DstPtrSCEV << "\n"); 3254 3255 UsefulGEP = 3256 isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3257 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())); 3258 } 3259 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3260 SmallVector<Subscript, 4> Pair(Pairs); 3261 if (UsefulGEP) { 3262 DEBUG(dbgs() << " using GEPs\n"); 3263 unsigned P = 0; 3264 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3265 SrcEnd = SrcGEP->idx_end(), 3266 DstIdx = DstGEP->idx_begin(); 3267 SrcIdx != SrcEnd; 3268 ++SrcIdx, ++DstIdx, ++P) { 3269 Pair[P].Src = SE->getSCEV(*SrcIdx); 3270 Pair[P].Dst = SE->getSCEV(*DstIdx); 3271 } 3272 } 3273 else { 3274 DEBUG(dbgs() << " ignoring GEPs\n"); 3275 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3276 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3277 DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3278 DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3279 Pair[0].Src = SrcSCEV; 3280 Pair[0].Dst = DstSCEV; 3281 } 3282 3283 for (unsigned P = 0; P < Pairs; ++P) { 3284 Pair[P].Loops.resize(MaxLevels + 1); 3285 Pair[P].GroupLoops.resize(MaxLevels + 1); 3286 Pair[P].Group.resize(Pairs); 3287 removeMatchingExtensions(&Pair[P]); 3288 Pair[P].Classification = 3289 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3290 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3291 Pair[P].Loops); 3292 Pair[P].GroupLoops = Pair[P].Loops; 3293 Pair[P].Group.set(P); 3294 DEBUG(dbgs() << " subscript " << P << "\n"); 3295 DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3296 DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3297 DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3298 DEBUG(dbgs() << "\tloops = "); 3299 DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3300 } 3301 3302 SmallBitVector Separable(Pairs); 3303 SmallBitVector Coupled(Pairs); 3304 3305 // Partition subscripts into separable and minimally-coupled groups 3306 // Algorithm in paper is algorithmically better; 3307 // this may be faster in practice. Check someday. 3308 // 3309 // Here's an example of how it works. Consider this code: 3310 // 3311 // for (i = ...) { 3312 // for (j = ...) { 3313 // for (k = ...) { 3314 // for (l = ...) { 3315 // for (m = ...) { 3316 // A[i][j][k][m] = ...; 3317 // ... = A[0][j][l][i + j]; 3318 // } 3319 // } 3320 // } 3321 // } 3322 // } 3323 // 3324 // There are 4 subscripts here: 3325 // 0 [i] and [0] 3326 // 1 [j] and [j] 3327 // 2 [k] and [l] 3328 // 3 [m] and [i + j] 3329 // 3330 // We've already classified each subscript pair as ZIV, SIV, etc., 3331 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3332 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3333 // and set Pair[P].Group = {P}. 3334 // 3335 // Src Dst Classification Loops GroupLoops Group 3336 // 0 [i] [0] SIV {1} {1} {0} 3337 // 1 [j] [j] SIV {2} {2} {1} 3338 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3339 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3340 // 3341 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3342 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3343 // 3344 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3345 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3346 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3347 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3348 // to either Separable or Coupled). 3349 // 3350 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3351 // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty, 3352 // so Pair[3].Group = {0, 1, 3} and Done = false. 3353 // 3354 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3355 // Since Done remains true, we add 2 to the set of Separable pairs. 3356 // 3357 // Finally, we consider 3. There's nothing to compare it with, 3358 // so Done remains true and we add it to the Coupled set. 3359 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3360 // 3361 // In the end, we've got 1 separable subscript and 1 coupled group. 3362 for (unsigned SI = 0; SI < Pairs; ++SI) { 3363 if (Pair[SI].Classification == Subscript::NonLinear) { 3364 // ignore these, but collect loops for later 3365 ++NonlinearSubscriptPairs; 3366 collectCommonLoops(Pair[SI].Src, 3367 LI->getLoopFor(Src->getParent()), 3368 Pair[SI].Loops); 3369 collectCommonLoops(Pair[SI].Dst, 3370 LI->getLoopFor(Dst->getParent()), 3371 Pair[SI].Loops); 3372 Result.Consistent = false; 3373 } 3374 else if (Pair[SI].Classification == Subscript::ZIV) { 3375 // always separable 3376 Separable.set(SI); 3377 } 3378 else { 3379 // SIV, RDIV, or MIV, so check for coupled group 3380 bool Done = true; 3381 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3382 SmallBitVector Intersection = Pair[SI].GroupLoops; 3383 Intersection &= Pair[SJ].GroupLoops; 3384 if (Intersection.any()) { 3385 // accumulate set of all the loops in group 3386 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3387 // accumulate set of all subscripts in group 3388 Pair[SJ].Group |= Pair[SI].Group; 3389 Done = false; 3390 } 3391 } 3392 if (Done) { 3393 if (Pair[SI].Group.count() == 1) { 3394 Separable.set(SI); 3395 ++SeparableSubscriptPairs; 3396 } 3397 else { 3398 Coupled.set(SI); 3399 ++CoupledSubscriptPairs; 3400 } 3401 } 3402 } 3403 } 3404 3405 DEBUG(dbgs() << " Separable = "); 3406 DEBUG(dumpSmallBitVector(Separable)); 3407 DEBUG(dbgs() << " Coupled = "); 3408 DEBUG(dumpSmallBitVector(Coupled)); 3409 3410 Constraint NewConstraint; 3411 NewConstraint.setAny(SE); 3412 3413 // test separable subscripts 3414 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3415 DEBUG(dbgs() << "testing subscript " << SI); 3416 switch (Pair[SI].Classification) { 3417 case Subscript::ZIV: 3418 DEBUG(dbgs() << ", ZIV\n"); 3419 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3420 return NULL; 3421 break; 3422 case Subscript::SIV: { 3423 DEBUG(dbgs() << ", SIV\n"); 3424 unsigned Level; 3425 const SCEV *SplitIter = NULL; 3426 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3427 Result, NewConstraint, SplitIter)) 3428 return NULL; 3429 break; 3430 } 3431 case Subscript::RDIV: 3432 DEBUG(dbgs() << ", RDIV\n"); 3433 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3434 return NULL; 3435 break; 3436 case Subscript::MIV: 3437 DEBUG(dbgs() << ", MIV\n"); 3438 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3439 return NULL; 3440 break; 3441 default: 3442 llvm_unreachable("subscript has unexpected classification"); 3443 } 3444 } 3445 3446 if (Coupled.count()) { 3447 // test coupled subscript groups 3448 DEBUG(dbgs() << "starting on coupled subscripts\n"); 3449 DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3450 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3451 for (unsigned II = 0; II <= MaxLevels; ++II) 3452 Constraints[II].setAny(SE); 3453 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3454 DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3455 SmallBitVector Group(Pair[SI].Group); 3456 SmallBitVector Sivs(Pairs); 3457 SmallBitVector Mivs(Pairs); 3458 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3459 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3460 DEBUG(dbgs() << SJ << " "); 3461 if (Pair[SJ].Classification == Subscript::SIV) 3462 Sivs.set(SJ); 3463 else 3464 Mivs.set(SJ); 3465 } 3466 DEBUG(dbgs() << "}\n"); 3467 while (Sivs.any()) { 3468 bool Changed = false; 3469 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3470 DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3471 // SJ is an SIV subscript that's part of the current coupled group 3472 unsigned Level; 3473 const SCEV *SplitIter = NULL; 3474 DEBUG(dbgs() << "SIV\n"); 3475 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3476 Result, NewConstraint, SplitIter)) 3477 return NULL; 3478 ConstrainedLevels.set(Level); 3479 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3480 if (Constraints[Level].isEmpty()) { 3481 ++DeltaIndependence; 3482 return NULL; 3483 } 3484 Changed = true; 3485 } 3486 Sivs.reset(SJ); 3487 } 3488 if (Changed) { 3489 // propagate, possibly creating new SIVs and ZIVs 3490 DEBUG(dbgs() << " propagating\n"); 3491 DEBUG(dbgs() << "\tMivs = "); 3492 DEBUG(dumpSmallBitVector(Mivs)); 3493 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3494 // SJ is an MIV subscript that's part of the current coupled group 3495 DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3496 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3497 Constraints, Result.Consistent)) { 3498 DEBUG(dbgs() << "\t Changed\n"); 3499 ++DeltaPropagations; 3500 Pair[SJ].Classification = 3501 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3502 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3503 Pair[SJ].Loops); 3504 switch (Pair[SJ].Classification) { 3505 case Subscript::ZIV: 3506 DEBUG(dbgs() << "ZIV\n"); 3507 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3508 return NULL; 3509 Mivs.reset(SJ); 3510 break; 3511 case Subscript::SIV: 3512 Sivs.set(SJ); 3513 Mivs.reset(SJ); 3514 break; 3515 case Subscript::RDIV: 3516 case Subscript::MIV: 3517 break; 3518 default: 3519 llvm_unreachable("bad subscript classification"); 3520 } 3521 } 3522 } 3523 } 3524 } 3525 3526 // test & propagate remaining RDIVs 3527 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3528 if (Pair[SJ].Classification == Subscript::RDIV) { 3529 DEBUG(dbgs() << "RDIV test\n"); 3530 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3531 return NULL; 3532 // I don't yet understand how to propagate RDIV results 3533 Mivs.reset(SJ); 3534 } 3535 } 3536 3537 // test remaining MIVs 3538 // This code is temporary. 3539 // Better to somehow test all remaining subscripts simultaneously. 3540 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3541 if (Pair[SJ].Classification == Subscript::MIV) { 3542 DEBUG(dbgs() << "MIV test\n"); 3543 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3544 return NULL; 3545 } 3546 else 3547 llvm_unreachable("expected only MIV subscripts at this point"); 3548 } 3549 3550 // update Result.DV from constraint vector 3551 DEBUG(dbgs() << " updating\n"); 3552 for (int SJ = ConstrainedLevels.find_first(); 3553 SJ >= 0; SJ = ConstrainedLevels.find_next(SJ)) { 3554 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3555 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3556 return NULL; 3557 } 3558 } 3559 } 3560 3561 // Make sure the Scalar flags are set correctly. 3562 SmallBitVector CompleteLoops(MaxLevels + 1); 3563 for (unsigned SI = 0; SI < Pairs; ++SI) 3564 CompleteLoops |= Pair[SI].Loops; 3565 for (unsigned II = 1; II <= CommonLevels; ++II) 3566 if (CompleteLoops[II]) 3567 Result.DV[II - 1].Scalar = false; 3568 3569 if (PossiblyLoopIndependent) { 3570 // Make sure the LoopIndependent flag is set correctly. 3571 // All directions must include equal, otherwise no 3572 // loop-independent dependence is possible. 3573 for (unsigned II = 1; II <= CommonLevels; ++II) { 3574 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3575 Result.LoopIndependent = false; 3576 break; 3577 } 3578 } 3579 } 3580 else { 3581 // On the other hand, if all directions are equal and there's no 3582 // loop-independent dependence possible, then no dependence exists. 3583 bool AllEqual = true; 3584 for (unsigned II = 1; II <= CommonLevels; ++II) { 3585 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 3586 AllEqual = false; 3587 break; 3588 } 3589 } 3590 if (AllEqual) 3591 return NULL; 3592 } 3593 3594 FullDependence *Final = new FullDependence(Result); 3595 Result.DV = NULL; 3596 return Final; 3597 } 3598 3599 3600 3601 //===----------------------------------------------------------------------===// 3602 // getSplitIteration - 3603 // Rather than spend rarely-used space recording the splitting iteration 3604 // during the Weak-Crossing SIV test, we re-compute it on demand. 3605 // The re-computation is basically a repeat of the entire dependence test, 3606 // though simplified since we know that the dependence exists. 3607 // It's tedious, since we must go through all propagations, etc. 3608 // 3609 // Care is required to keep this code up to date with respect to the routine 3610 // above, depends(). 3611 // 3612 // Generally, the dependence analyzer will be used to build 3613 // a dependence graph for a function (basically a map from instructions 3614 // to dependences). Looking for cycles in the graph shows us loops 3615 // that cannot be trivially vectorized/parallelized. 3616 // 3617 // We can try to improve the situation by examining all the dependences 3618 // that make up the cycle, looking for ones we can break. 3619 // Sometimes, peeling the first or last iteration of a loop will break 3620 // dependences, and we've got flags for those possibilities. 3621 // Sometimes, splitting a loop at some other iteration will do the trick, 3622 // and we've got a flag for that case. Rather than waste the space to 3623 // record the exact iteration (since we rarely know), we provide 3624 // a method that calculates the iteration. It's a drag that it must work 3625 // from scratch, but wonderful in that it's possible. 3626 // 3627 // Here's an example: 3628 // 3629 // for (i = 0; i < 10; i++) 3630 // A[i] = ... 3631 // ... = A[11 - i] 3632 // 3633 // There's a loop-carried flow dependence from the store to the load, 3634 // found by the weak-crossing SIV test. The dependence will have a flag, 3635 // indicating that the dependence can be broken by splitting the loop. 3636 // Calling getSplitIteration will return 5. 3637 // Splitting the loop breaks the dependence, like so: 3638 // 3639 // for (i = 0; i <= 5; i++) 3640 // A[i] = ... 3641 // ... = A[11 - i] 3642 // for (i = 6; i < 10; i++) 3643 // A[i] = ... 3644 // ... = A[11 - i] 3645 // 3646 // breaks the dependence and allows us to vectorize/parallelize 3647 // both loops. 3648 const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, 3649 unsigned SplitLevel) { 3650 assert(Dep && "expected a pointer to a Dependence"); 3651 assert(Dep->isSplitable(SplitLevel) && 3652 "Dep should be splitable at SplitLevel"); 3653 Instruction *Src = Dep->getSrc(); 3654 Instruction *Dst = Dep->getDst(); 3655 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3656 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3657 assert(isLoadOrStore(Src)); 3658 assert(isLoadOrStore(Dst)); 3659 Value *SrcPtr = getPointerOperand(Src); 3660 Value *DstPtr = getPointerOperand(Dst); 3661 assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) == 3662 AliasAnalysis::MustAlias); 3663 3664 // establish loop nesting levels 3665 establishNestingLevels(Src, Dst); 3666 3667 FullDependence Result(Src, Dst, false, CommonLevels); 3668 3669 // See if there are GEPs we can use. 3670 bool UsefulGEP = false; 3671 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3672 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3673 if (SrcGEP && DstGEP && 3674 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3675 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3676 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3677 UsefulGEP = 3678 isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3679 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())); 3680 } 3681 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3682 SmallVector<Subscript, 4> Pair(Pairs); 3683 if (UsefulGEP) { 3684 unsigned P = 0; 3685 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3686 SrcEnd = SrcGEP->idx_end(), 3687 DstIdx = DstGEP->idx_begin(); 3688 SrcIdx != SrcEnd; 3689 ++SrcIdx, ++DstIdx, ++P) { 3690 Pair[P].Src = SE->getSCEV(*SrcIdx); 3691 Pair[P].Dst = SE->getSCEV(*DstIdx); 3692 } 3693 } 3694 else { 3695 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3696 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3697 Pair[0].Src = SrcSCEV; 3698 Pair[0].Dst = DstSCEV; 3699 } 3700 3701 for (unsigned P = 0; P < Pairs; ++P) { 3702 Pair[P].Loops.resize(MaxLevels + 1); 3703 Pair[P].GroupLoops.resize(MaxLevels + 1); 3704 Pair[P].Group.resize(Pairs); 3705 removeMatchingExtensions(&Pair[P]); 3706 Pair[P].Classification = 3707 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3708 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3709 Pair[P].Loops); 3710 Pair[P].GroupLoops = Pair[P].Loops; 3711 Pair[P].Group.set(P); 3712 } 3713 3714 SmallBitVector Separable(Pairs); 3715 SmallBitVector Coupled(Pairs); 3716 3717 // partition subscripts into separable and minimally-coupled groups 3718 for (unsigned SI = 0; SI < Pairs; ++SI) { 3719 if (Pair[SI].Classification == Subscript::NonLinear) { 3720 // ignore these, but collect loops for later 3721 collectCommonLoops(Pair[SI].Src, 3722 LI->getLoopFor(Src->getParent()), 3723 Pair[SI].Loops); 3724 collectCommonLoops(Pair[SI].Dst, 3725 LI->getLoopFor(Dst->getParent()), 3726 Pair[SI].Loops); 3727 Result.Consistent = false; 3728 } 3729 else if (Pair[SI].Classification == Subscript::ZIV) 3730 Separable.set(SI); 3731 else { 3732 // SIV, RDIV, or MIV, so check for coupled group 3733 bool Done = true; 3734 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3735 SmallBitVector Intersection = Pair[SI].GroupLoops; 3736 Intersection &= Pair[SJ].GroupLoops; 3737 if (Intersection.any()) { 3738 // accumulate set of all the loops in group 3739 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3740 // accumulate set of all subscripts in group 3741 Pair[SJ].Group |= Pair[SI].Group; 3742 Done = false; 3743 } 3744 } 3745 if (Done) { 3746 if (Pair[SI].Group.count() == 1) 3747 Separable.set(SI); 3748 else 3749 Coupled.set(SI); 3750 } 3751 } 3752 } 3753 3754 Constraint NewConstraint; 3755 NewConstraint.setAny(SE); 3756 3757 // test separable subscripts 3758 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3759 switch (Pair[SI].Classification) { 3760 case Subscript::SIV: { 3761 unsigned Level; 3762 const SCEV *SplitIter = NULL; 3763 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3764 Result, NewConstraint, SplitIter); 3765 if (Level == SplitLevel) { 3766 assert(SplitIter != NULL); 3767 return SplitIter; 3768 } 3769 break; 3770 } 3771 case Subscript::ZIV: 3772 case Subscript::RDIV: 3773 case Subscript::MIV: 3774 break; 3775 default: 3776 llvm_unreachable("subscript has unexpected classification"); 3777 } 3778 } 3779 3780 if (Coupled.count()) { 3781 // test coupled subscript groups 3782 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3783 for (unsigned II = 0; II <= MaxLevels; ++II) 3784 Constraints[II].setAny(SE); 3785 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3786 SmallBitVector Group(Pair[SI].Group); 3787 SmallBitVector Sivs(Pairs); 3788 SmallBitVector Mivs(Pairs); 3789 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3790 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3791 if (Pair[SJ].Classification == Subscript::SIV) 3792 Sivs.set(SJ); 3793 else 3794 Mivs.set(SJ); 3795 } 3796 while (Sivs.any()) { 3797 bool Changed = false; 3798 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3799 // SJ is an SIV subscript that's part of the current coupled group 3800 unsigned Level; 3801 const SCEV *SplitIter = NULL; 3802 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3803 Result, NewConstraint, SplitIter); 3804 if (Level == SplitLevel && SplitIter) 3805 return SplitIter; 3806 ConstrainedLevels.set(Level); 3807 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 3808 Changed = true; 3809 Sivs.reset(SJ); 3810 } 3811 if (Changed) { 3812 // propagate, possibly creating new SIVs and ZIVs 3813 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3814 // SJ is an MIV subscript that's part of the current coupled group 3815 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 3816 Pair[SJ].Loops, Constraints, Result.Consistent)) { 3817 Pair[SJ].Classification = 3818 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3819 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3820 Pair[SJ].Loops); 3821 switch (Pair[SJ].Classification) { 3822 case Subscript::ZIV: 3823 Mivs.reset(SJ); 3824 break; 3825 case Subscript::SIV: 3826 Sivs.set(SJ); 3827 Mivs.reset(SJ); 3828 break; 3829 case Subscript::RDIV: 3830 case Subscript::MIV: 3831 break; 3832 default: 3833 llvm_unreachable("bad subscript classification"); 3834 } 3835 } 3836 } 3837 } 3838 } 3839 } 3840 } 3841 llvm_unreachable("somehow reached end of routine"); 3842 return NULL; 3843 } 3844