1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 11 // accesses. Currently, it is an (incomplete) implementation of the approach 12 // described in 13 // 14 // Practical Dependence Testing 15 // Goff, Kennedy, Tseng 16 // PLDI 1991 17 // 18 // There's a single entry point that analyzes the dependence between a pair 19 // of memory references in a function, returning either NULL, for no dependence, 20 // or a more-or-less detailed description of the dependence between them. 21 // 22 // Currently, the implementation cannot propagate constraints between 23 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 24 // Both of these are conservative weaknesses; 25 // that is, not a source of correctness problems. 26 // 27 // The implementation depends on the GEP instruction to differentiate 28 // subscripts. Since Clang linearizes some array subscripts, the dependence 29 // analysis is using SCEV->delinearize to recover the representation of multiple 30 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 31 // delinearization is controlled by the flag -da-delinearize. 32 // 33 // We should pay some careful attention to the possibility of integer overflow 34 // in the implementation of the various tests. This could happen with Add, 35 // Subtract, or Multiply, with both APInt's and SCEV's. 36 // 37 // Some non-linear subscript pairs can be handled by the GCD test 38 // (and perhaps other tests). 39 // Should explore how often these things occur. 40 // 41 // Finally, it seems like certain test cases expose weaknesses in the SCEV 42 // simplification, especially in the handling of sign and zero extensions. 43 // It could be useful to spend time exploring these. 44 // 45 // Please note that this is work in progress and the interface is subject to 46 // change. 47 // 48 //===----------------------------------------------------------------------===// 49 // // 50 // In memory of Ken Kennedy, 1945 - 2007 // 51 // // 52 //===----------------------------------------------------------------------===// 53 54 #include "llvm/Analysis/DependenceAnalysis.h" 55 #include "llvm/ADT/Statistic.h" 56 #include "llvm/Analysis/AliasAnalysis.h" 57 #include "llvm/Analysis/LoopInfo.h" 58 #include "llvm/Analysis/ScalarEvolution.h" 59 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 60 #include "llvm/Analysis/ValueTracking.h" 61 #include "llvm/IR/InstIterator.h" 62 #include "llvm/IR/Operator.h" 63 #include "llvm/Support/CommandLine.h" 64 #include "llvm/Support/Debug.h" 65 #include "llvm/Support/ErrorHandling.h" 66 #include "llvm/Support/raw_ostream.h" 67 68 using namespace llvm; 69 70 #define DEBUG_TYPE "da" 71 72 //===----------------------------------------------------------------------===// 73 // statistics 74 75 STATISTIC(TotalArrayPairs, "Array pairs tested"); 76 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 77 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 78 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 79 STATISTIC(ZIVapplications, "ZIV applications"); 80 STATISTIC(ZIVindependence, "ZIV independence"); 81 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 82 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 83 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 84 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 85 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 86 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 87 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 88 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 89 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 90 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 91 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 92 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 93 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 94 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 95 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 96 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 97 STATISTIC(DeltaApplications, "Delta applications"); 98 STATISTIC(DeltaSuccesses, "Delta successes"); 99 STATISTIC(DeltaIndependence, "Delta independence"); 100 STATISTIC(DeltaPropagations, "Delta propagations"); 101 STATISTIC(GCDapplications, "GCD applications"); 102 STATISTIC(GCDsuccesses, "GCD successes"); 103 STATISTIC(GCDindependence, "GCD independence"); 104 STATISTIC(BanerjeeApplications, "Banerjee applications"); 105 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 106 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 107 108 static cl::opt<bool> 109 Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore, 110 cl::desc("Try to delinearize array references.")); 111 112 //===----------------------------------------------------------------------===// 113 // basics 114 115 INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da", 116 "Dependence Analysis", true, true) 117 INITIALIZE_PASS_DEPENDENCY(LoopInfo) 118 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 119 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 120 INITIALIZE_PASS_END(DependenceAnalysis, "da", 121 "Dependence Analysis", true, true) 122 123 char DependenceAnalysis::ID = 0; 124 125 126 FunctionPass *llvm::createDependenceAnalysisPass() { 127 return new DependenceAnalysis(); 128 } 129 130 131 bool DependenceAnalysis::runOnFunction(Function &F) { 132 this->F = &F; 133 AA = &getAnalysis<AliasAnalysis>(); 134 SE = &getAnalysis<ScalarEvolution>(); 135 LI = &getAnalysis<LoopInfo>(); 136 return false; 137 } 138 139 140 void DependenceAnalysis::releaseMemory() { 141 } 142 143 144 void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 145 AU.setPreservesAll(); 146 AU.addRequiredTransitive<AliasAnalysis>(); 147 AU.addRequiredTransitive<ScalarEvolution>(); 148 AU.addRequiredTransitive<LoopInfo>(); 149 } 150 151 152 // Used to test the dependence analyzer. 153 // Looks through the function, noting loads and stores. 154 // Calls depends() on every possible pair and prints out the result. 155 // Ignores all other instructions. 156 static 157 void dumpExampleDependence(raw_ostream &OS, Function *F, 158 DependenceAnalysis *DA) { 159 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); 160 SrcI != SrcE; ++SrcI) { 161 if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) { 162 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 163 DstI != DstE; ++DstI) { 164 if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) { 165 OS << "da analyze - "; 166 if (Dependence *D = DA->depends(&*SrcI, &*DstI, true)) { 167 D->dump(OS); 168 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 169 if (D->isSplitable(Level)) { 170 OS << "da analyze - split level = " << Level; 171 OS << ", iteration = " << *DA->getSplitIteration(D, Level); 172 OS << "!\n"; 173 } 174 } 175 delete D; 176 } 177 else 178 OS << "none!\n"; 179 } 180 } 181 } 182 } 183 } 184 185 186 void DependenceAnalysis::print(raw_ostream &OS, const Module*) const { 187 dumpExampleDependence(OS, F, const_cast<DependenceAnalysis *>(this)); 188 } 189 190 //===----------------------------------------------------------------------===// 191 // Dependence methods 192 193 // Returns true if this is an input dependence. 194 bool Dependence::isInput() const { 195 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 196 } 197 198 199 // Returns true if this is an output dependence. 200 bool Dependence::isOutput() const { 201 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 202 } 203 204 205 // Returns true if this is an flow (aka true) dependence. 206 bool Dependence::isFlow() const { 207 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 208 } 209 210 211 // Returns true if this is an anti dependence. 212 bool Dependence::isAnti() const { 213 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 214 } 215 216 217 // Returns true if a particular level is scalar; that is, 218 // if no subscript in the source or destination mention the induction 219 // variable associated with the loop at this level. 220 // Leave this out of line, so it will serve as a virtual method anchor 221 bool Dependence::isScalar(unsigned level) const { 222 return false; 223 } 224 225 226 //===----------------------------------------------------------------------===// 227 // FullDependence methods 228 229 FullDependence::FullDependence(Instruction *Source, 230 Instruction *Destination, 231 bool PossiblyLoopIndependent, 232 unsigned CommonLevels) : 233 Dependence(Source, Destination), 234 Levels(CommonLevels), 235 LoopIndependent(PossiblyLoopIndependent) { 236 Consistent = true; 237 DV = CommonLevels ? new DVEntry[CommonLevels] : nullptr; 238 } 239 240 // The rest are simple getters that hide the implementation. 241 242 // getDirection - Returns the direction associated with a particular level. 243 unsigned FullDependence::getDirection(unsigned Level) const { 244 assert(0 < Level && Level <= Levels && "Level out of range"); 245 return DV[Level - 1].Direction; 246 } 247 248 249 // Returns the distance (or NULL) associated with a particular level. 250 const SCEV *FullDependence::getDistance(unsigned Level) const { 251 assert(0 < Level && Level <= Levels && "Level out of range"); 252 return DV[Level - 1].Distance; 253 } 254 255 256 // Returns true if a particular level is scalar; that is, 257 // if no subscript in the source or destination mention the induction 258 // variable associated with the loop at this level. 259 bool FullDependence::isScalar(unsigned Level) const { 260 assert(0 < Level && Level <= Levels && "Level out of range"); 261 return DV[Level - 1].Scalar; 262 } 263 264 265 // Returns true if peeling the first iteration from this loop 266 // will break this dependence. 267 bool FullDependence::isPeelFirst(unsigned Level) const { 268 assert(0 < Level && Level <= Levels && "Level out of range"); 269 return DV[Level - 1].PeelFirst; 270 } 271 272 273 // Returns true if peeling the last iteration from this loop 274 // will break this dependence. 275 bool FullDependence::isPeelLast(unsigned Level) const { 276 assert(0 < Level && Level <= Levels && "Level out of range"); 277 return DV[Level - 1].PeelLast; 278 } 279 280 281 // Returns true if splitting this loop will break the dependence. 282 bool FullDependence::isSplitable(unsigned Level) const { 283 assert(0 < Level && Level <= Levels && "Level out of range"); 284 return DV[Level - 1].Splitable; 285 } 286 287 288 //===----------------------------------------------------------------------===// 289 // DependenceAnalysis::Constraint methods 290 291 // If constraint is a point <X, Y>, returns X. 292 // Otherwise assert. 293 const SCEV *DependenceAnalysis::Constraint::getX() const { 294 assert(Kind == Point && "Kind should be Point"); 295 return A; 296 } 297 298 299 // If constraint is a point <X, Y>, returns Y. 300 // Otherwise assert. 301 const SCEV *DependenceAnalysis::Constraint::getY() const { 302 assert(Kind == Point && "Kind should be Point"); 303 return B; 304 } 305 306 307 // If constraint is a line AX + BY = C, returns A. 308 // Otherwise assert. 309 const SCEV *DependenceAnalysis::Constraint::getA() const { 310 assert((Kind == Line || Kind == Distance) && 311 "Kind should be Line (or Distance)"); 312 return A; 313 } 314 315 316 // If constraint is a line AX + BY = C, returns B. 317 // Otherwise assert. 318 const SCEV *DependenceAnalysis::Constraint::getB() const { 319 assert((Kind == Line || Kind == Distance) && 320 "Kind should be Line (or Distance)"); 321 return B; 322 } 323 324 325 // If constraint is a line AX + BY = C, returns C. 326 // Otherwise assert. 327 const SCEV *DependenceAnalysis::Constraint::getC() const { 328 assert((Kind == Line || Kind == Distance) && 329 "Kind should be Line (or Distance)"); 330 return C; 331 } 332 333 334 // If constraint is a distance, returns D. 335 // Otherwise assert. 336 const SCEV *DependenceAnalysis::Constraint::getD() const { 337 assert(Kind == Distance && "Kind should be Distance"); 338 return SE->getNegativeSCEV(C); 339 } 340 341 342 // Returns the loop associated with this constraint. 343 const Loop *DependenceAnalysis::Constraint::getAssociatedLoop() const { 344 assert((Kind == Distance || Kind == Line || Kind == Point) && 345 "Kind should be Distance, Line, or Point"); 346 return AssociatedLoop; 347 } 348 349 350 void DependenceAnalysis::Constraint::setPoint(const SCEV *X, 351 const SCEV *Y, 352 const Loop *CurLoop) { 353 Kind = Point; 354 A = X; 355 B = Y; 356 AssociatedLoop = CurLoop; 357 } 358 359 360 void DependenceAnalysis::Constraint::setLine(const SCEV *AA, 361 const SCEV *BB, 362 const SCEV *CC, 363 const Loop *CurLoop) { 364 Kind = Line; 365 A = AA; 366 B = BB; 367 C = CC; 368 AssociatedLoop = CurLoop; 369 } 370 371 372 void DependenceAnalysis::Constraint::setDistance(const SCEV *D, 373 const Loop *CurLoop) { 374 Kind = Distance; 375 A = SE->getConstant(D->getType(), 1); 376 B = SE->getNegativeSCEV(A); 377 C = SE->getNegativeSCEV(D); 378 AssociatedLoop = CurLoop; 379 } 380 381 382 void DependenceAnalysis::Constraint::setEmpty() { 383 Kind = Empty; 384 } 385 386 387 void DependenceAnalysis::Constraint::setAny(ScalarEvolution *NewSE) { 388 SE = NewSE; 389 Kind = Any; 390 } 391 392 393 // For debugging purposes. Dumps the constraint out to OS. 394 void DependenceAnalysis::Constraint::dump(raw_ostream &OS) const { 395 if (isEmpty()) 396 OS << " Empty\n"; 397 else if (isAny()) 398 OS << " Any\n"; 399 else if (isPoint()) 400 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 401 else if (isDistance()) 402 OS << " Distance is " << *getD() << 403 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 404 else if (isLine()) 405 OS << " Line is " << *getA() << "*X + " << 406 *getB() << "*Y = " << *getC() << "\n"; 407 else 408 llvm_unreachable("unknown constraint type in Constraint::dump"); 409 } 410 411 412 // Updates X with the intersection 413 // of the Constraints X and Y. Returns true if X has changed. 414 // Corresponds to Figure 4 from the paper 415 // 416 // Practical Dependence Testing 417 // Goff, Kennedy, Tseng 418 // PLDI 1991 419 bool DependenceAnalysis::intersectConstraints(Constraint *X, 420 const Constraint *Y) { 421 ++DeltaApplications; 422 DEBUG(dbgs() << "\tintersect constraints\n"); 423 DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 424 DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 425 assert(!Y->isPoint() && "Y must not be a Point"); 426 if (X->isAny()) { 427 if (Y->isAny()) 428 return false; 429 *X = *Y; 430 return true; 431 } 432 if (X->isEmpty()) 433 return false; 434 if (Y->isEmpty()) { 435 X->setEmpty(); 436 return true; 437 } 438 439 if (X->isDistance() && Y->isDistance()) { 440 DEBUG(dbgs() << "\t intersect 2 distances\n"); 441 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 442 return false; 443 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 444 X->setEmpty(); 445 ++DeltaSuccesses; 446 return true; 447 } 448 // Hmmm, interesting situation. 449 // I guess if either is constant, keep it and ignore the other. 450 if (isa<SCEVConstant>(Y->getD())) { 451 *X = *Y; 452 return true; 453 } 454 return false; 455 } 456 457 // At this point, the pseudo-code in Figure 4 of the paper 458 // checks if (X->isPoint() && Y->isPoint()). 459 // This case can't occur in our implementation, 460 // since a Point can only arise as the result of intersecting 461 // two Line constraints, and the right-hand value, Y, is never 462 // the result of an intersection. 463 assert(!(X->isPoint() && Y->isPoint()) && 464 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 465 466 if (X->isLine() && Y->isLine()) { 467 DEBUG(dbgs() << "\t intersect 2 lines\n"); 468 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 469 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 470 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 471 // slopes are equal, so lines are parallel 472 DEBUG(dbgs() << "\t\tsame slope\n"); 473 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 474 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 475 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 476 return false; 477 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 478 X->setEmpty(); 479 ++DeltaSuccesses; 480 return true; 481 } 482 return false; 483 } 484 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 485 // slopes differ, so lines intersect 486 DEBUG(dbgs() << "\t\tdifferent slopes\n"); 487 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 488 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 489 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 490 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 491 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 492 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 493 const SCEVConstant *C1A2_C2A1 = 494 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 495 const SCEVConstant *C1B2_C2B1 = 496 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 497 const SCEVConstant *A1B2_A2B1 = 498 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 499 const SCEVConstant *A2B1_A1B2 = 500 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 501 if (!C1B2_C2B1 || !C1A2_C2A1 || 502 !A1B2_A2B1 || !A2B1_A1B2) 503 return false; 504 APInt Xtop = C1B2_C2B1->getValue()->getValue(); 505 APInt Xbot = A1B2_A2B1->getValue()->getValue(); 506 APInt Ytop = C1A2_C2A1->getValue()->getValue(); 507 APInt Ybot = A2B1_A1B2->getValue()->getValue(); 508 DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 509 DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 510 DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 511 DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 512 APInt Xq = Xtop; // these need to be initialized, even 513 APInt Xr = Xtop; // though they're just going to be overwritten 514 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 515 APInt Yq = Ytop; 516 APInt Yr = Ytop; 517 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 518 if (Xr != 0 || Yr != 0) { 519 X->setEmpty(); 520 ++DeltaSuccesses; 521 return true; 522 } 523 DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 524 if (Xq.slt(0) || Yq.slt(0)) { 525 X->setEmpty(); 526 ++DeltaSuccesses; 527 return true; 528 } 529 if (const SCEVConstant *CUB = 530 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 531 APInt UpperBound = CUB->getValue()->getValue(); 532 DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 533 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 534 X->setEmpty(); 535 ++DeltaSuccesses; 536 return true; 537 } 538 } 539 X->setPoint(SE->getConstant(Xq), 540 SE->getConstant(Yq), 541 X->getAssociatedLoop()); 542 ++DeltaSuccesses; 543 return true; 544 } 545 return false; 546 } 547 548 // if (X->isLine() && Y->isPoint()) This case can't occur. 549 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 550 551 if (X->isPoint() && Y->isLine()) { 552 DEBUG(dbgs() << "\t intersect Point and Line\n"); 553 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 554 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 555 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 556 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 557 return false; 558 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 559 X->setEmpty(); 560 ++DeltaSuccesses; 561 return true; 562 } 563 return false; 564 } 565 566 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 567 return false; 568 } 569 570 571 //===----------------------------------------------------------------------===// 572 // DependenceAnalysis methods 573 574 // For debugging purposes. Dumps a dependence to OS. 575 void Dependence::dump(raw_ostream &OS) const { 576 bool Splitable = false; 577 if (isConfused()) 578 OS << "confused"; 579 else { 580 if (isConsistent()) 581 OS << "consistent "; 582 if (isFlow()) 583 OS << "flow"; 584 else if (isOutput()) 585 OS << "output"; 586 else if (isAnti()) 587 OS << "anti"; 588 else if (isInput()) 589 OS << "input"; 590 unsigned Levels = getLevels(); 591 OS << " ["; 592 for (unsigned II = 1; II <= Levels; ++II) { 593 if (isSplitable(II)) 594 Splitable = true; 595 if (isPeelFirst(II)) 596 OS << 'p'; 597 const SCEV *Distance = getDistance(II); 598 if (Distance) 599 OS << *Distance; 600 else if (isScalar(II)) 601 OS << "S"; 602 else { 603 unsigned Direction = getDirection(II); 604 if (Direction == DVEntry::ALL) 605 OS << "*"; 606 else { 607 if (Direction & DVEntry::LT) 608 OS << "<"; 609 if (Direction & DVEntry::EQ) 610 OS << "="; 611 if (Direction & DVEntry::GT) 612 OS << ">"; 613 } 614 } 615 if (isPeelLast(II)) 616 OS << 'p'; 617 if (II < Levels) 618 OS << " "; 619 } 620 if (isLoopIndependent()) 621 OS << "|<"; 622 OS << "]"; 623 if (Splitable) 624 OS << " splitable"; 625 } 626 OS << "!\n"; 627 } 628 629 630 631 static 632 AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA, 633 const Value *A, 634 const Value *B) { 635 const Value *AObj = GetUnderlyingObject(A); 636 const Value *BObj = GetUnderlyingObject(B); 637 return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()), 638 BObj, AA->getTypeStoreSize(BObj->getType())); 639 } 640 641 642 // Returns true if the load or store can be analyzed. Atomic and volatile 643 // operations have properties which this analysis does not understand. 644 static 645 bool isLoadOrStore(const Instruction *I) { 646 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 647 return LI->isUnordered(); 648 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 649 return SI->isUnordered(); 650 return false; 651 } 652 653 654 static 655 Value *getPointerOperand(Instruction *I) { 656 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 657 return LI->getPointerOperand(); 658 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 659 return SI->getPointerOperand(); 660 llvm_unreachable("Value is not load or store instruction"); 661 return nullptr; 662 } 663 664 665 // Examines the loop nesting of the Src and Dst 666 // instructions and establishes their shared loops. Sets the variables 667 // CommonLevels, SrcLevels, and MaxLevels. 668 // The source and destination instructions needn't be contained in the same 669 // loop. The routine establishNestingLevels finds the level of most deeply 670 // nested loop that contains them both, CommonLevels. An instruction that's 671 // not contained in a loop is at level = 0. MaxLevels is equal to the level 672 // of the source plus the level of the destination, minus CommonLevels. 673 // This lets us allocate vectors MaxLevels in length, with room for every 674 // distinct loop referenced in both the source and destination subscripts. 675 // The variable SrcLevels is the nesting depth of the source instruction. 676 // It's used to help calculate distinct loops referenced by the destination. 677 // Here's the map from loops to levels: 678 // 0 - unused 679 // 1 - outermost common loop 680 // ... - other common loops 681 // CommonLevels - innermost common loop 682 // ... - loops containing Src but not Dst 683 // SrcLevels - innermost loop containing Src but not Dst 684 // ... - loops containing Dst but not Src 685 // MaxLevels - innermost loops containing Dst but not Src 686 // Consider the follow code fragment: 687 // for (a = ...) { 688 // for (b = ...) { 689 // for (c = ...) { 690 // for (d = ...) { 691 // A[] = ...; 692 // } 693 // } 694 // for (e = ...) { 695 // for (f = ...) { 696 // for (g = ...) { 697 // ... = A[]; 698 // } 699 // } 700 // } 701 // } 702 // } 703 // If we're looking at the possibility of a dependence between the store 704 // to A (the Src) and the load from A (the Dst), we'll note that they 705 // have 2 loops in common, so CommonLevels will equal 2 and the direction 706 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 707 // A map from loop names to loop numbers would look like 708 // a - 1 709 // b - 2 = CommonLevels 710 // c - 3 711 // d - 4 = SrcLevels 712 // e - 5 713 // f - 6 714 // g - 7 = MaxLevels 715 void DependenceAnalysis::establishNestingLevels(const Instruction *Src, 716 const Instruction *Dst) { 717 const BasicBlock *SrcBlock = Src->getParent(); 718 const BasicBlock *DstBlock = Dst->getParent(); 719 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 720 unsigned DstLevel = LI->getLoopDepth(DstBlock); 721 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 722 const Loop *DstLoop = LI->getLoopFor(DstBlock); 723 SrcLevels = SrcLevel; 724 MaxLevels = SrcLevel + DstLevel; 725 while (SrcLevel > DstLevel) { 726 SrcLoop = SrcLoop->getParentLoop(); 727 SrcLevel--; 728 } 729 while (DstLevel > SrcLevel) { 730 DstLoop = DstLoop->getParentLoop(); 731 DstLevel--; 732 } 733 while (SrcLoop != DstLoop) { 734 SrcLoop = SrcLoop->getParentLoop(); 735 DstLoop = DstLoop->getParentLoop(); 736 SrcLevel--; 737 } 738 CommonLevels = SrcLevel; 739 MaxLevels -= CommonLevels; 740 } 741 742 743 // Given one of the loops containing the source, return 744 // its level index in our numbering scheme. 745 unsigned DependenceAnalysis::mapSrcLoop(const Loop *SrcLoop) const { 746 return SrcLoop->getLoopDepth(); 747 } 748 749 750 // Given one of the loops containing the destination, 751 // return its level index in our numbering scheme. 752 unsigned DependenceAnalysis::mapDstLoop(const Loop *DstLoop) const { 753 unsigned D = DstLoop->getLoopDepth(); 754 if (D > CommonLevels) 755 return D - CommonLevels + SrcLevels; 756 else 757 return D; 758 } 759 760 761 // Returns true if Expression is loop invariant in LoopNest. 762 bool DependenceAnalysis::isLoopInvariant(const SCEV *Expression, 763 const Loop *LoopNest) const { 764 if (!LoopNest) 765 return true; 766 return SE->isLoopInvariant(Expression, LoopNest) && 767 isLoopInvariant(Expression, LoopNest->getParentLoop()); 768 } 769 770 771 772 // Finds the set of loops from the LoopNest that 773 // have a level <= CommonLevels and are referred to by the SCEV Expression. 774 void DependenceAnalysis::collectCommonLoops(const SCEV *Expression, 775 const Loop *LoopNest, 776 SmallBitVector &Loops) const { 777 while (LoopNest) { 778 unsigned Level = LoopNest->getLoopDepth(); 779 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 780 Loops.set(Level); 781 LoopNest = LoopNest->getParentLoop(); 782 } 783 } 784 785 786 // removeMatchingExtensions - Examines a subscript pair. 787 // If the source and destination are identically sign (or zero) 788 // extended, it strips off the extension in an effect to simplify 789 // the actual analysis. 790 void DependenceAnalysis::removeMatchingExtensions(Subscript *Pair) { 791 const SCEV *Src = Pair->Src; 792 const SCEV *Dst = Pair->Dst; 793 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 794 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 795 const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src); 796 const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst); 797 if (SrcCast->getType() == DstCast->getType()) { 798 Pair->Src = SrcCast->getOperand(); 799 Pair->Dst = DstCast->getOperand(); 800 } 801 } 802 } 803 804 805 // Examine the scev and return true iff it's linear. 806 // Collect any loops mentioned in the set of "Loops". 807 bool DependenceAnalysis::checkSrcSubscript(const SCEV *Src, 808 const Loop *LoopNest, 809 SmallBitVector &Loops) { 810 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src); 811 if (!AddRec) 812 return isLoopInvariant(Src, LoopNest); 813 const SCEV *Start = AddRec->getStart(); 814 const SCEV *Step = AddRec->getStepRecurrence(*SE); 815 if (!isLoopInvariant(Step, LoopNest)) 816 return false; 817 Loops.set(mapSrcLoop(AddRec->getLoop())); 818 return checkSrcSubscript(Start, LoopNest, Loops); 819 } 820 821 822 823 // Examine the scev and return true iff it's linear. 824 // Collect any loops mentioned in the set of "Loops". 825 bool DependenceAnalysis::checkDstSubscript(const SCEV *Dst, 826 const Loop *LoopNest, 827 SmallBitVector &Loops) { 828 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst); 829 if (!AddRec) 830 return isLoopInvariant(Dst, LoopNest); 831 const SCEV *Start = AddRec->getStart(); 832 const SCEV *Step = AddRec->getStepRecurrence(*SE); 833 if (!isLoopInvariant(Step, LoopNest)) 834 return false; 835 Loops.set(mapDstLoop(AddRec->getLoop())); 836 return checkDstSubscript(Start, LoopNest, Loops); 837 } 838 839 840 // Examines the subscript pair (the Src and Dst SCEVs) 841 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 842 // Collects the associated loops in a set. 843 DependenceAnalysis::Subscript::ClassificationKind 844 DependenceAnalysis::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 845 const SCEV *Dst, const Loop *DstLoopNest, 846 SmallBitVector &Loops) { 847 SmallBitVector SrcLoops(MaxLevels + 1); 848 SmallBitVector DstLoops(MaxLevels + 1); 849 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 850 return Subscript::NonLinear; 851 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 852 return Subscript::NonLinear; 853 Loops = SrcLoops; 854 Loops |= DstLoops; 855 unsigned N = Loops.count(); 856 if (N == 0) 857 return Subscript::ZIV; 858 if (N == 1) 859 return Subscript::SIV; 860 if (N == 2 && (SrcLoops.count() == 0 || 861 DstLoops.count() == 0 || 862 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 863 return Subscript::RDIV; 864 return Subscript::MIV; 865 } 866 867 868 // A wrapper around SCEV::isKnownPredicate. 869 // Looks for cases where we're interested in comparing for equality. 870 // If both X and Y have been identically sign or zero extended, 871 // it strips off the (confusing) extensions before invoking 872 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 873 // will be similarly updated. 874 // 875 // If SCEV::isKnownPredicate can't prove the predicate, 876 // we try simple subtraction, which seems to help in some cases 877 // involving symbolics. 878 bool DependenceAnalysis::isKnownPredicate(ICmpInst::Predicate Pred, 879 const SCEV *X, 880 const SCEV *Y) const { 881 if (Pred == CmpInst::ICMP_EQ || 882 Pred == CmpInst::ICMP_NE) { 883 if ((isa<SCEVSignExtendExpr>(X) && 884 isa<SCEVSignExtendExpr>(Y)) || 885 (isa<SCEVZeroExtendExpr>(X) && 886 isa<SCEVZeroExtendExpr>(Y))) { 887 const SCEVCastExpr *CX = cast<SCEVCastExpr>(X); 888 const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y); 889 const SCEV *Xop = CX->getOperand(); 890 const SCEV *Yop = CY->getOperand(); 891 if (Xop->getType() == Yop->getType()) { 892 X = Xop; 893 Y = Yop; 894 } 895 } 896 } 897 if (SE->isKnownPredicate(Pred, X, Y)) 898 return true; 899 // If SE->isKnownPredicate can't prove the condition, 900 // we try the brute-force approach of subtracting 901 // and testing the difference. 902 // By testing with SE->isKnownPredicate first, we avoid 903 // the possibility of overflow when the arguments are constants. 904 const SCEV *Delta = SE->getMinusSCEV(X, Y); 905 switch (Pred) { 906 case CmpInst::ICMP_EQ: 907 return Delta->isZero(); 908 case CmpInst::ICMP_NE: 909 return SE->isKnownNonZero(Delta); 910 case CmpInst::ICMP_SGE: 911 return SE->isKnownNonNegative(Delta); 912 case CmpInst::ICMP_SLE: 913 return SE->isKnownNonPositive(Delta); 914 case CmpInst::ICMP_SGT: 915 return SE->isKnownPositive(Delta); 916 case CmpInst::ICMP_SLT: 917 return SE->isKnownNegative(Delta); 918 default: 919 llvm_unreachable("unexpected predicate in isKnownPredicate"); 920 } 921 } 922 923 924 // All subscripts are all the same type. 925 // Loop bound may be smaller (e.g., a char). 926 // Should zero extend loop bound, since it's always >= 0. 927 // This routine collects upper bound and extends if needed. 928 // Return null if no bound available. 929 const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L, 930 Type *T) const { 931 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 932 const SCEV *UB = SE->getBackedgeTakenCount(L); 933 return SE->getNoopOrZeroExtend(UB, T); 934 } 935 return nullptr; 936 } 937 938 939 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 940 // If the cast fails, returns NULL. 941 const SCEVConstant *DependenceAnalysis::collectConstantUpperBound(const Loop *L, 942 Type *T 943 ) const { 944 if (const SCEV *UB = collectUpperBound(L, T)) 945 return dyn_cast<SCEVConstant>(UB); 946 return nullptr; 947 } 948 949 950 // testZIV - 951 // When we have a pair of subscripts of the form [c1] and [c2], 952 // where c1 and c2 are both loop invariant, we attack it using 953 // the ZIV test. Basically, we test by comparing the two values, 954 // but there are actually three possible results: 955 // 1) the values are equal, so there's a dependence 956 // 2) the values are different, so there's no dependence 957 // 3) the values might be equal, so we have to assume a dependence. 958 // 959 // Return true if dependence disproved. 960 bool DependenceAnalysis::testZIV(const SCEV *Src, 961 const SCEV *Dst, 962 FullDependence &Result) const { 963 DEBUG(dbgs() << " src = " << *Src << "\n"); 964 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 965 ++ZIVapplications; 966 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 967 DEBUG(dbgs() << " provably dependent\n"); 968 return false; // provably dependent 969 } 970 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 971 DEBUG(dbgs() << " provably independent\n"); 972 ++ZIVindependence; 973 return true; // provably independent 974 } 975 DEBUG(dbgs() << " possibly dependent\n"); 976 Result.Consistent = false; 977 return false; // possibly dependent 978 } 979 980 981 // strongSIVtest - 982 // From the paper, Practical Dependence Testing, Section 4.2.1 983 // 984 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 985 // where i is an induction variable, c1 and c2 are loop invariant, 986 // and a is a constant, we can solve it exactly using the Strong SIV test. 987 // 988 // Can prove independence. Failing that, can compute distance (and direction). 989 // In the presence of symbolic terms, we can sometimes make progress. 990 // 991 // If there's a dependence, 992 // 993 // c1 + a*i = c2 + a*i' 994 // 995 // The dependence distance is 996 // 997 // d = i' - i = (c1 - c2)/a 998 // 999 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1000 // loop's upper bound. If a dependence exists, the dependence direction is 1001 // defined as 1002 // 1003 // { < if d > 0 1004 // direction = { = if d = 0 1005 // { > if d < 0 1006 // 1007 // Return true if dependence disproved. 1008 bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff, 1009 const SCEV *SrcConst, 1010 const SCEV *DstConst, 1011 const Loop *CurLoop, 1012 unsigned Level, 1013 FullDependence &Result, 1014 Constraint &NewConstraint) const { 1015 DEBUG(dbgs() << "\tStrong SIV test\n"); 1016 DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1017 DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1018 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1019 DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1020 DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1021 DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1022 ++StrongSIVapplications; 1023 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1024 Level--; 1025 1026 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1027 DEBUG(dbgs() << "\t Delta = " << *Delta); 1028 DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1029 1030 // check that |Delta| < iteration count 1031 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1032 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1033 DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1034 const SCEV *AbsDelta = 1035 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1036 const SCEV *AbsCoeff = 1037 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1038 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1039 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1040 // Distance greater than trip count - no dependence 1041 ++StrongSIVindependence; 1042 ++StrongSIVsuccesses; 1043 return true; 1044 } 1045 } 1046 1047 // Can we compute distance? 1048 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1049 APInt ConstDelta = cast<SCEVConstant>(Delta)->getValue()->getValue(); 1050 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getValue()->getValue(); 1051 APInt Distance = ConstDelta; // these need to be initialized 1052 APInt Remainder = ConstDelta; 1053 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1054 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1055 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1056 // Make sure Coeff divides Delta exactly 1057 if (Remainder != 0) { 1058 // Coeff doesn't divide Distance, no dependence 1059 ++StrongSIVindependence; 1060 ++StrongSIVsuccesses; 1061 return true; 1062 } 1063 Result.DV[Level].Distance = SE->getConstant(Distance); 1064 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1065 if (Distance.sgt(0)) 1066 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1067 else if (Distance.slt(0)) 1068 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1069 else 1070 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1071 ++StrongSIVsuccesses; 1072 } 1073 else if (Delta->isZero()) { 1074 // since 0/X == 0 1075 Result.DV[Level].Distance = Delta; 1076 NewConstraint.setDistance(Delta, CurLoop); 1077 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1078 ++StrongSIVsuccesses; 1079 } 1080 else { 1081 if (Coeff->isOne()) { 1082 DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1083 Result.DV[Level].Distance = Delta; // since X/1 == X 1084 NewConstraint.setDistance(Delta, CurLoop); 1085 } 1086 else { 1087 Result.Consistent = false; 1088 NewConstraint.setLine(Coeff, 1089 SE->getNegativeSCEV(Coeff), 1090 SE->getNegativeSCEV(Delta), CurLoop); 1091 } 1092 1093 // maybe we can get a useful direction 1094 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1095 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1096 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1097 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1098 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1099 // The double negatives above are confusing. 1100 // It helps to read !SE->isKnownNonZero(Delta) 1101 // as "Delta might be Zero" 1102 unsigned NewDirection = Dependence::DVEntry::NONE; 1103 if ((DeltaMaybePositive && CoeffMaybePositive) || 1104 (DeltaMaybeNegative && CoeffMaybeNegative)) 1105 NewDirection = Dependence::DVEntry::LT; 1106 if (DeltaMaybeZero) 1107 NewDirection |= Dependence::DVEntry::EQ; 1108 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1109 (DeltaMaybePositive && CoeffMaybeNegative)) 1110 NewDirection |= Dependence::DVEntry::GT; 1111 if (NewDirection < Result.DV[Level].Direction) 1112 ++StrongSIVsuccesses; 1113 Result.DV[Level].Direction &= NewDirection; 1114 } 1115 return false; 1116 } 1117 1118 1119 // weakCrossingSIVtest - 1120 // From the paper, Practical Dependence Testing, Section 4.2.2 1121 // 1122 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1123 // where i is an induction variable, c1 and c2 are loop invariant, 1124 // and a is a constant, we can solve it exactly using the 1125 // Weak-Crossing SIV test. 1126 // 1127 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1128 // the two lines, where i = i', yielding 1129 // 1130 // c1 + a*i = c2 - a*i 1131 // 2a*i = c2 - c1 1132 // i = (c2 - c1)/2a 1133 // 1134 // If i < 0, there is no dependence. 1135 // If i > upperbound, there is no dependence. 1136 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1137 // If i = upperbound, there's a dependence with distance = 0. 1138 // If i is integral, there's a dependence (all directions). 1139 // If the non-integer part = 1/2, there's a dependence (<> directions). 1140 // Otherwise, there's no dependence. 1141 // 1142 // Can prove independence. Failing that, 1143 // can sometimes refine the directions. 1144 // Can determine iteration for splitting. 1145 // 1146 // Return true if dependence disproved. 1147 bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff, 1148 const SCEV *SrcConst, 1149 const SCEV *DstConst, 1150 const Loop *CurLoop, 1151 unsigned Level, 1152 FullDependence &Result, 1153 Constraint &NewConstraint, 1154 const SCEV *&SplitIter) const { 1155 DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1156 DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1157 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1158 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1159 ++WeakCrossingSIVapplications; 1160 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1161 Level--; 1162 Result.Consistent = false; 1163 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1164 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1165 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1166 if (Delta->isZero()) { 1167 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1168 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1169 ++WeakCrossingSIVsuccesses; 1170 if (!Result.DV[Level].Direction) { 1171 ++WeakCrossingSIVindependence; 1172 return true; 1173 } 1174 Result.DV[Level].Distance = Delta; // = 0 1175 return false; 1176 } 1177 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1178 if (!ConstCoeff) 1179 return false; 1180 1181 Result.DV[Level].Splitable = true; 1182 if (SE->isKnownNegative(ConstCoeff)) { 1183 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1184 assert(ConstCoeff && 1185 "dynamic cast of negative of ConstCoeff should yield constant"); 1186 Delta = SE->getNegativeSCEV(Delta); 1187 } 1188 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1189 1190 // compute SplitIter for use by DependenceAnalysis::getSplitIteration() 1191 SplitIter = 1192 SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0), 1193 Delta), 1194 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), 1195 ConstCoeff)); 1196 DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1197 1198 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1199 if (!ConstDelta) 1200 return false; 1201 1202 // We're certain that ConstCoeff > 0; therefore, 1203 // if Delta < 0, then no dependence. 1204 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1205 DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1206 if (SE->isKnownNegative(Delta)) { 1207 // No dependence, Delta < 0 1208 ++WeakCrossingSIVindependence; 1209 ++WeakCrossingSIVsuccesses; 1210 return true; 1211 } 1212 1213 // We're certain that Delta > 0 and ConstCoeff > 0. 1214 // Check Delta/(2*ConstCoeff) against upper loop bound 1215 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1216 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1217 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1218 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1219 ConstantTwo); 1220 DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1221 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1222 // Delta too big, no dependence 1223 ++WeakCrossingSIVindependence; 1224 ++WeakCrossingSIVsuccesses; 1225 return true; 1226 } 1227 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1228 // i = i' = UB 1229 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1230 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1231 ++WeakCrossingSIVsuccesses; 1232 if (!Result.DV[Level].Direction) { 1233 ++WeakCrossingSIVindependence; 1234 return true; 1235 } 1236 Result.DV[Level].Splitable = false; 1237 Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0); 1238 return false; 1239 } 1240 } 1241 1242 // check that Coeff divides Delta 1243 APInt APDelta = ConstDelta->getValue()->getValue(); 1244 APInt APCoeff = ConstCoeff->getValue()->getValue(); 1245 APInt Distance = APDelta; // these need to be initialzed 1246 APInt Remainder = APDelta; 1247 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1248 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1249 if (Remainder != 0) { 1250 // Coeff doesn't divide Delta, no dependence 1251 ++WeakCrossingSIVindependence; 1252 ++WeakCrossingSIVsuccesses; 1253 return true; 1254 } 1255 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1256 1257 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1258 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1259 Remainder = Distance.srem(Two); 1260 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1261 if (Remainder != 0) { 1262 // Equal direction isn't possible 1263 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1264 ++WeakCrossingSIVsuccesses; 1265 } 1266 return false; 1267 } 1268 1269 1270 // Kirch's algorithm, from 1271 // 1272 // Optimizing Supercompilers for Supercomputers 1273 // Michael Wolfe 1274 // MIT Press, 1989 1275 // 1276 // Program 2.1, page 29. 1277 // Computes the GCD of AM and BM. 1278 // Also finds a solution to the equation ax - by = gcd(a, b). 1279 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1280 static 1281 bool findGCD(unsigned Bits, APInt AM, APInt BM, APInt Delta, 1282 APInt &G, APInt &X, APInt &Y) { 1283 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1284 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1285 APInt G0 = AM.abs(); 1286 APInt G1 = BM.abs(); 1287 APInt Q = G0; // these need to be initialized 1288 APInt R = G0; 1289 APInt::sdivrem(G0, G1, Q, R); 1290 while (R != 0) { 1291 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1292 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1293 G0 = G1; G1 = R; 1294 APInt::sdivrem(G0, G1, Q, R); 1295 } 1296 G = G1; 1297 DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1298 X = AM.slt(0) ? -A1 : A1; 1299 Y = BM.slt(0) ? B1 : -B1; 1300 1301 // make sure gcd divides Delta 1302 R = Delta.srem(G); 1303 if (R != 0) 1304 return true; // gcd doesn't divide Delta, no dependence 1305 Q = Delta.sdiv(G); 1306 X *= Q; 1307 Y *= Q; 1308 return false; 1309 } 1310 1311 1312 static 1313 APInt floorOfQuotient(APInt A, APInt B) { 1314 APInt Q = A; // these need to be initialized 1315 APInt R = A; 1316 APInt::sdivrem(A, B, Q, R); 1317 if (R == 0) 1318 return Q; 1319 if ((A.sgt(0) && B.sgt(0)) || 1320 (A.slt(0) && B.slt(0))) 1321 return Q; 1322 else 1323 return Q - 1; 1324 } 1325 1326 1327 static 1328 APInt ceilingOfQuotient(APInt A, APInt B) { 1329 APInt Q = A; // these need to be initialized 1330 APInt R = A; 1331 APInt::sdivrem(A, B, Q, R); 1332 if (R == 0) 1333 return Q; 1334 if ((A.sgt(0) && B.sgt(0)) || 1335 (A.slt(0) && B.slt(0))) 1336 return Q + 1; 1337 else 1338 return Q; 1339 } 1340 1341 1342 static 1343 APInt maxAPInt(APInt A, APInt B) { 1344 return A.sgt(B) ? A : B; 1345 } 1346 1347 1348 static 1349 APInt minAPInt(APInt A, APInt B) { 1350 return A.slt(B) ? A : B; 1351 } 1352 1353 1354 // exactSIVtest - 1355 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1356 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1357 // and a2 are constant, we can solve it exactly using an algorithm developed 1358 // by Banerjee and Wolfe. See Section 2.5.3 in 1359 // 1360 // Optimizing Supercompilers for Supercomputers 1361 // Michael Wolfe 1362 // MIT Press, 1989 1363 // 1364 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1365 // so use them if possible. They're also a bit better with symbolics and, 1366 // in the case of the strong SIV test, can compute Distances. 1367 // 1368 // Return true if dependence disproved. 1369 bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff, 1370 const SCEV *DstCoeff, 1371 const SCEV *SrcConst, 1372 const SCEV *DstConst, 1373 const Loop *CurLoop, 1374 unsigned Level, 1375 FullDependence &Result, 1376 Constraint &NewConstraint) const { 1377 DEBUG(dbgs() << "\tExact SIV test\n"); 1378 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1379 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1380 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1381 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1382 ++ExactSIVapplications; 1383 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1384 Level--; 1385 Result.Consistent = false; 1386 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1387 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1388 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), 1389 Delta, CurLoop); 1390 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1391 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1392 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1393 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1394 return false; 1395 1396 // find gcd 1397 APInt G, X, Y; 1398 APInt AM = ConstSrcCoeff->getValue()->getValue(); 1399 APInt BM = ConstDstCoeff->getValue()->getValue(); 1400 unsigned Bits = AM.getBitWidth(); 1401 if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { 1402 // gcd doesn't divide Delta, no dependence 1403 ++ExactSIVindependence; 1404 ++ExactSIVsuccesses; 1405 return true; 1406 } 1407 1408 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1409 1410 // since SCEV construction normalizes, LM = 0 1411 APInt UM(Bits, 1, true); 1412 bool UMvalid = false; 1413 // UM is perhaps unavailable, let's check 1414 if (const SCEVConstant *CUB = 1415 collectConstantUpperBound(CurLoop, Delta->getType())) { 1416 UM = CUB->getValue()->getValue(); 1417 DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1418 UMvalid = true; 1419 } 1420 1421 APInt TU(APInt::getSignedMaxValue(Bits)); 1422 APInt TL(APInt::getSignedMinValue(Bits)); 1423 1424 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1425 APInt TMUL = BM.sdiv(G); 1426 if (TMUL.sgt(0)) { 1427 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1428 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1429 if (UMvalid) { 1430 TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); 1431 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1432 } 1433 } 1434 else { 1435 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1436 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1437 if (UMvalid) { 1438 TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); 1439 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1440 } 1441 } 1442 1443 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1444 TMUL = AM.sdiv(G); 1445 if (TMUL.sgt(0)) { 1446 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1447 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1448 if (UMvalid) { 1449 TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); 1450 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1451 } 1452 } 1453 else { 1454 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1455 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1456 if (UMvalid) { 1457 TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); 1458 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1459 } 1460 } 1461 if (TL.sgt(TU)) { 1462 ++ExactSIVindependence; 1463 ++ExactSIVsuccesses; 1464 return true; 1465 } 1466 1467 // explore directions 1468 unsigned NewDirection = Dependence::DVEntry::NONE; 1469 1470 // less than 1471 APInt SaveTU(TU); // save these 1472 APInt SaveTL(TL); 1473 DEBUG(dbgs() << "\t exploring LT direction\n"); 1474 TMUL = AM - BM; 1475 if (TMUL.sgt(0)) { 1476 TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); 1477 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1478 } 1479 else { 1480 TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); 1481 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1482 } 1483 if (TL.sle(TU)) { 1484 NewDirection |= Dependence::DVEntry::LT; 1485 ++ExactSIVsuccesses; 1486 } 1487 1488 // equal 1489 TU = SaveTU; // restore 1490 TL = SaveTL; 1491 DEBUG(dbgs() << "\t exploring EQ direction\n"); 1492 if (TMUL.sgt(0)) { 1493 TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); 1494 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1495 } 1496 else { 1497 TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); 1498 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1499 } 1500 TMUL = BM - AM; 1501 if (TMUL.sgt(0)) { 1502 TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); 1503 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1504 } 1505 else { 1506 TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); 1507 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1508 } 1509 if (TL.sle(TU)) { 1510 NewDirection |= Dependence::DVEntry::EQ; 1511 ++ExactSIVsuccesses; 1512 } 1513 1514 // greater than 1515 TU = SaveTU; // restore 1516 TL = SaveTL; 1517 DEBUG(dbgs() << "\t exploring GT direction\n"); 1518 if (TMUL.sgt(0)) { 1519 TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); 1520 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1521 } 1522 else { 1523 TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); 1524 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1525 } 1526 if (TL.sle(TU)) { 1527 NewDirection |= Dependence::DVEntry::GT; 1528 ++ExactSIVsuccesses; 1529 } 1530 1531 // finished 1532 Result.DV[Level].Direction &= NewDirection; 1533 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1534 ++ExactSIVindependence; 1535 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1536 } 1537 1538 1539 1540 // Return true if the divisor evenly divides the dividend. 1541 static 1542 bool isRemainderZero(const SCEVConstant *Dividend, 1543 const SCEVConstant *Divisor) { 1544 APInt ConstDividend = Dividend->getValue()->getValue(); 1545 APInt ConstDivisor = Divisor->getValue()->getValue(); 1546 return ConstDividend.srem(ConstDivisor) == 0; 1547 } 1548 1549 1550 // weakZeroSrcSIVtest - 1551 // From the paper, Practical Dependence Testing, Section 4.2.2 1552 // 1553 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1554 // where i is an induction variable, c1 and c2 are loop invariant, 1555 // and a is a constant, we can solve it exactly using the 1556 // Weak-Zero SIV test. 1557 // 1558 // Given 1559 // 1560 // c1 = c2 + a*i 1561 // 1562 // we get 1563 // 1564 // (c1 - c2)/a = i 1565 // 1566 // If i is not an integer, there's no dependence. 1567 // If i < 0 or > UB, there's no dependence. 1568 // If i = 0, the direction is <= and peeling the 1569 // 1st iteration will break the dependence. 1570 // If i = UB, the direction is >= and peeling the 1571 // last iteration will break the dependence. 1572 // Otherwise, the direction is *. 1573 // 1574 // Can prove independence. Failing that, we can sometimes refine 1575 // the directions. Can sometimes show that first or last 1576 // iteration carries all the dependences (so worth peeling). 1577 // 1578 // (see also weakZeroDstSIVtest) 1579 // 1580 // Return true if dependence disproved. 1581 bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1582 const SCEV *SrcConst, 1583 const SCEV *DstConst, 1584 const Loop *CurLoop, 1585 unsigned Level, 1586 FullDependence &Result, 1587 Constraint &NewConstraint) const { 1588 // For the WeakSIV test, it's possible the loop isn't common to 1589 // the Src and Dst loops. If it isn't, then there's no need to 1590 // record a direction. 1591 DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1592 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1593 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1594 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1595 ++WeakZeroSIVapplications; 1596 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1597 Level--; 1598 Result.Consistent = false; 1599 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1600 NewConstraint.setLine(SE->getConstant(Delta->getType(), 0), 1601 DstCoeff, Delta, CurLoop); 1602 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1603 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1604 if (Level < CommonLevels) { 1605 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1606 Result.DV[Level].PeelFirst = true; 1607 ++WeakZeroSIVsuccesses; 1608 } 1609 return false; // dependences caused by first iteration 1610 } 1611 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1612 if (!ConstCoeff) 1613 return false; 1614 const SCEV *AbsCoeff = 1615 SE->isKnownNegative(ConstCoeff) ? 1616 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1617 const SCEV *NewDelta = 1618 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1619 1620 // check that Delta/SrcCoeff < iteration count 1621 // really check NewDelta < count*AbsCoeff 1622 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1623 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1624 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1625 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1626 ++WeakZeroSIVindependence; 1627 ++WeakZeroSIVsuccesses; 1628 return true; 1629 } 1630 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1631 // dependences caused by last iteration 1632 if (Level < CommonLevels) { 1633 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1634 Result.DV[Level].PeelLast = true; 1635 ++WeakZeroSIVsuccesses; 1636 } 1637 return false; 1638 } 1639 } 1640 1641 // check that Delta/SrcCoeff >= 0 1642 // really check that NewDelta >= 0 1643 if (SE->isKnownNegative(NewDelta)) { 1644 // No dependence, newDelta < 0 1645 ++WeakZeroSIVindependence; 1646 ++WeakZeroSIVsuccesses; 1647 return true; 1648 } 1649 1650 // if SrcCoeff doesn't divide Delta, then no dependence 1651 if (isa<SCEVConstant>(Delta) && 1652 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1653 ++WeakZeroSIVindependence; 1654 ++WeakZeroSIVsuccesses; 1655 return true; 1656 } 1657 return false; 1658 } 1659 1660 1661 // weakZeroDstSIVtest - 1662 // From the paper, Practical Dependence Testing, Section 4.2.2 1663 // 1664 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1665 // where i is an induction variable, c1 and c2 are loop invariant, 1666 // and a is a constant, we can solve it exactly using the 1667 // Weak-Zero SIV test. 1668 // 1669 // Given 1670 // 1671 // c1 + a*i = c2 1672 // 1673 // we get 1674 // 1675 // i = (c2 - c1)/a 1676 // 1677 // If i is not an integer, there's no dependence. 1678 // If i < 0 or > UB, there's no dependence. 1679 // If i = 0, the direction is <= and peeling the 1680 // 1st iteration will break the dependence. 1681 // If i = UB, the direction is >= and peeling the 1682 // last iteration will break the dependence. 1683 // Otherwise, the direction is *. 1684 // 1685 // Can prove independence. Failing that, we can sometimes refine 1686 // the directions. Can sometimes show that first or last 1687 // iteration carries all the dependences (so worth peeling). 1688 // 1689 // (see also weakZeroSrcSIVtest) 1690 // 1691 // Return true if dependence disproved. 1692 bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1693 const SCEV *SrcConst, 1694 const SCEV *DstConst, 1695 const Loop *CurLoop, 1696 unsigned Level, 1697 FullDependence &Result, 1698 Constraint &NewConstraint) const { 1699 // For the WeakSIV test, it's possible the loop isn't common to the 1700 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1701 DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1702 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1703 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1704 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1705 ++WeakZeroSIVapplications; 1706 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1707 Level--; 1708 Result.Consistent = false; 1709 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1710 NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0), 1711 Delta, CurLoop); 1712 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1713 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1714 if (Level < CommonLevels) { 1715 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1716 Result.DV[Level].PeelFirst = true; 1717 ++WeakZeroSIVsuccesses; 1718 } 1719 return false; // dependences caused by first iteration 1720 } 1721 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1722 if (!ConstCoeff) 1723 return false; 1724 const SCEV *AbsCoeff = 1725 SE->isKnownNegative(ConstCoeff) ? 1726 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1727 const SCEV *NewDelta = 1728 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1729 1730 // check that Delta/SrcCoeff < iteration count 1731 // really check NewDelta < count*AbsCoeff 1732 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1733 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1734 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1735 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1736 ++WeakZeroSIVindependence; 1737 ++WeakZeroSIVsuccesses; 1738 return true; 1739 } 1740 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1741 // dependences caused by last iteration 1742 if (Level < CommonLevels) { 1743 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1744 Result.DV[Level].PeelLast = true; 1745 ++WeakZeroSIVsuccesses; 1746 } 1747 return false; 1748 } 1749 } 1750 1751 // check that Delta/SrcCoeff >= 0 1752 // really check that NewDelta >= 0 1753 if (SE->isKnownNegative(NewDelta)) { 1754 // No dependence, newDelta < 0 1755 ++WeakZeroSIVindependence; 1756 ++WeakZeroSIVsuccesses; 1757 return true; 1758 } 1759 1760 // if SrcCoeff doesn't divide Delta, then no dependence 1761 if (isa<SCEVConstant>(Delta) && 1762 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1763 ++WeakZeroSIVindependence; 1764 ++WeakZeroSIVsuccesses; 1765 return true; 1766 } 1767 return false; 1768 } 1769 1770 1771 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1772 // Things of the form [c1 + a*i] and [c2 + b*j], 1773 // where i and j are induction variable, c1 and c2 are loop invariant, 1774 // and a and b are constants. 1775 // Returns true if any possible dependence is disproved. 1776 // Marks the result as inconsistent. 1777 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1778 bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff, 1779 const SCEV *DstCoeff, 1780 const SCEV *SrcConst, 1781 const SCEV *DstConst, 1782 const Loop *SrcLoop, 1783 const Loop *DstLoop, 1784 FullDependence &Result) const { 1785 DEBUG(dbgs() << "\tExact RDIV test\n"); 1786 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1787 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1788 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1789 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1790 ++ExactRDIVapplications; 1791 Result.Consistent = false; 1792 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1793 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1794 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1795 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1796 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1797 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1798 return false; 1799 1800 // find gcd 1801 APInt G, X, Y; 1802 APInt AM = ConstSrcCoeff->getValue()->getValue(); 1803 APInt BM = ConstDstCoeff->getValue()->getValue(); 1804 unsigned Bits = AM.getBitWidth(); 1805 if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { 1806 // gcd doesn't divide Delta, no dependence 1807 ++ExactRDIVindependence; 1808 return true; 1809 } 1810 1811 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1812 1813 // since SCEV construction seems to normalize, LM = 0 1814 APInt SrcUM(Bits, 1, true); 1815 bool SrcUMvalid = false; 1816 // SrcUM is perhaps unavailable, let's check 1817 if (const SCEVConstant *UpperBound = 1818 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1819 SrcUM = UpperBound->getValue()->getValue(); 1820 DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1821 SrcUMvalid = true; 1822 } 1823 1824 APInt DstUM(Bits, 1, true); 1825 bool DstUMvalid = false; 1826 // UM is perhaps unavailable, let's check 1827 if (const SCEVConstant *UpperBound = 1828 collectConstantUpperBound(DstLoop, Delta->getType())) { 1829 DstUM = UpperBound->getValue()->getValue(); 1830 DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1831 DstUMvalid = true; 1832 } 1833 1834 APInt TU(APInt::getSignedMaxValue(Bits)); 1835 APInt TL(APInt::getSignedMinValue(Bits)); 1836 1837 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1838 APInt TMUL = BM.sdiv(G); 1839 if (TMUL.sgt(0)) { 1840 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1841 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1842 if (SrcUMvalid) { 1843 TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); 1844 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1845 } 1846 } 1847 else { 1848 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1849 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1850 if (SrcUMvalid) { 1851 TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); 1852 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1853 } 1854 } 1855 1856 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1857 TMUL = AM.sdiv(G); 1858 if (TMUL.sgt(0)) { 1859 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1860 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1861 if (DstUMvalid) { 1862 TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); 1863 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1864 } 1865 } 1866 else { 1867 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1868 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1869 if (DstUMvalid) { 1870 TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); 1871 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1872 } 1873 } 1874 if (TL.sgt(TU)) 1875 ++ExactRDIVindependence; 1876 return TL.sgt(TU); 1877 } 1878 1879 1880 // symbolicRDIVtest - 1881 // In Section 4.5 of the Practical Dependence Testing paper,the authors 1882 // introduce a special case of Banerjee's Inequalities (also called the 1883 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 1884 // particularly cases with symbolics. Since it's only able to disprove 1885 // dependence (not compute distances or directions), we'll use it as a 1886 // fall back for the other tests. 1887 // 1888 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 1889 // where i and j are induction variables and c1 and c2 are loop invariants, 1890 // we can use the symbolic tests to disprove some dependences, serving as a 1891 // backup for the RDIV test. Note that i and j can be the same variable, 1892 // letting this test serve as a backup for the various SIV tests. 1893 // 1894 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 1895 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 1896 // loop bounds for the i and j loops, respectively. So, ... 1897 // 1898 // c1 + a1*i = c2 + a2*j 1899 // a1*i - a2*j = c2 - c1 1900 // 1901 // To test for a dependence, we compute c2 - c1 and make sure it's in the 1902 // range of the maximum and minimum possible values of a1*i - a2*j. 1903 // Considering the signs of a1 and a2, we have 4 possible cases: 1904 // 1905 // 1) If a1 >= 0 and a2 >= 0, then 1906 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 1907 // -a2*N2 <= c2 - c1 <= a1*N1 1908 // 1909 // 2) If a1 >= 0 and a2 <= 0, then 1910 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 1911 // 0 <= c2 - c1 <= a1*N1 - a2*N2 1912 // 1913 // 3) If a1 <= 0 and a2 >= 0, then 1914 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 1915 // a1*N1 - a2*N2 <= c2 - c1 <= 0 1916 // 1917 // 4) If a1 <= 0 and a2 <= 0, then 1918 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 1919 // a1*N1 <= c2 - c1 <= -a2*N2 1920 // 1921 // return true if dependence disproved 1922 bool DependenceAnalysis::symbolicRDIVtest(const SCEV *A1, 1923 const SCEV *A2, 1924 const SCEV *C1, 1925 const SCEV *C2, 1926 const Loop *Loop1, 1927 const Loop *Loop2) const { 1928 ++SymbolicRDIVapplications; 1929 DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 1930 DEBUG(dbgs() << "\t A1 = " << *A1); 1931 DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 1932 DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 1933 DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 1934 DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 1935 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 1936 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 1937 DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 1938 DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 1939 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 1940 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 1941 DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 1942 DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 1943 if (SE->isKnownNonNegative(A1)) { 1944 if (SE->isKnownNonNegative(A2)) { 1945 // A1 >= 0 && A2 >= 0 1946 if (N1) { 1947 // make sure that c2 - c1 <= a1*N1 1948 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1949 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 1950 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 1951 ++SymbolicRDIVindependence; 1952 return true; 1953 } 1954 } 1955 if (N2) { 1956 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 1957 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 1958 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 1959 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 1960 ++SymbolicRDIVindependence; 1961 return true; 1962 } 1963 } 1964 } 1965 else if (SE->isKnownNonPositive(A2)) { 1966 // a1 >= 0 && a2 <= 0 1967 if (N1 && N2) { 1968 // make sure that c2 - c1 <= a1*N1 - a2*N2 1969 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1970 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 1971 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 1972 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 1973 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 1974 ++SymbolicRDIVindependence; 1975 return true; 1976 } 1977 } 1978 // make sure that 0 <= c2 - c1 1979 if (SE->isKnownNegative(C2_C1)) { 1980 ++SymbolicRDIVindependence; 1981 return true; 1982 } 1983 } 1984 } 1985 else if (SE->isKnownNonPositive(A1)) { 1986 if (SE->isKnownNonNegative(A2)) { 1987 // a1 <= 0 && a2 >= 0 1988 if (N1 && N2) { 1989 // make sure that a1*N1 - a2*N2 <= c2 - c1 1990 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1991 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 1992 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 1993 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 1994 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 1995 ++SymbolicRDIVindependence; 1996 return true; 1997 } 1998 } 1999 // make sure that c2 - c1 <= 0 2000 if (SE->isKnownPositive(C2_C1)) { 2001 ++SymbolicRDIVindependence; 2002 return true; 2003 } 2004 } 2005 else if (SE->isKnownNonPositive(A2)) { 2006 // a1 <= 0 && a2 <= 0 2007 if (N1) { 2008 // make sure that a1*N1 <= c2 - c1 2009 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2010 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2011 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2012 ++SymbolicRDIVindependence; 2013 return true; 2014 } 2015 } 2016 if (N2) { 2017 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2018 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2019 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2020 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2021 ++SymbolicRDIVindependence; 2022 return true; 2023 } 2024 } 2025 } 2026 } 2027 return false; 2028 } 2029 2030 2031 // testSIV - 2032 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2033 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2034 // a2 are constant, we attack it with an SIV test. While they can all be 2035 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2036 // they apply; they're cheaper and sometimes more precise. 2037 // 2038 // Return true if dependence disproved. 2039 bool DependenceAnalysis::testSIV(const SCEV *Src, 2040 const SCEV *Dst, 2041 unsigned &Level, 2042 FullDependence &Result, 2043 Constraint &NewConstraint, 2044 const SCEV *&SplitIter) const { 2045 DEBUG(dbgs() << " src = " << *Src << "\n"); 2046 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2047 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2048 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2049 if (SrcAddRec && DstAddRec) { 2050 const SCEV *SrcConst = SrcAddRec->getStart(); 2051 const SCEV *DstConst = DstAddRec->getStart(); 2052 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2053 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2054 const Loop *CurLoop = SrcAddRec->getLoop(); 2055 assert(CurLoop == DstAddRec->getLoop() && 2056 "both loops in SIV should be same"); 2057 Level = mapSrcLoop(CurLoop); 2058 bool disproven; 2059 if (SrcCoeff == DstCoeff) 2060 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2061 Level, Result, NewConstraint); 2062 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2063 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2064 Level, Result, NewConstraint, SplitIter); 2065 else 2066 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2067 Level, Result, NewConstraint); 2068 return disproven || 2069 gcdMIVtest(Src, Dst, Result) || 2070 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2071 } 2072 if (SrcAddRec) { 2073 const SCEV *SrcConst = SrcAddRec->getStart(); 2074 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2075 const SCEV *DstConst = Dst; 2076 const Loop *CurLoop = SrcAddRec->getLoop(); 2077 Level = mapSrcLoop(CurLoop); 2078 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2079 Level, Result, NewConstraint) || 2080 gcdMIVtest(Src, Dst, Result); 2081 } 2082 if (DstAddRec) { 2083 const SCEV *DstConst = DstAddRec->getStart(); 2084 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2085 const SCEV *SrcConst = Src; 2086 const Loop *CurLoop = DstAddRec->getLoop(); 2087 Level = mapDstLoop(CurLoop); 2088 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2089 CurLoop, Level, Result, NewConstraint) || 2090 gcdMIVtest(Src, Dst, Result); 2091 } 2092 llvm_unreachable("SIV test expected at least one AddRec"); 2093 return false; 2094 } 2095 2096 2097 // testRDIV - 2098 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2099 // where i and j are induction variables, c1 and c2 are loop invariant, 2100 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2101 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2102 // It doesn't make sense to talk about distance or direction in this case, 2103 // so there's no point in making special versions of the Strong SIV test or 2104 // the Weak-crossing SIV test. 2105 // 2106 // With minor algebra, this test can also be used for things like 2107 // [c1 + a1*i + a2*j][c2]. 2108 // 2109 // Return true if dependence disproved. 2110 bool DependenceAnalysis::testRDIV(const SCEV *Src, 2111 const SCEV *Dst, 2112 FullDependence &Result) const { 2113 // we have 3 possible situations here: 2114 // 1) [a*i + b] and [c*j + d] 2115 // 2) [a*i + c*j + b] and [d] 2116 // 3) [b] and [a*i + c*j + d] 2117 // We need to find what we've got and get organized 2118 2119 const SCEV *SrcConst, *DstConst; 2120 const SCEV *SrcCoeff, *DstCoeff; 2121 const Loop *SrcLoop, *DstLoop; 2122 2123 DEBUG(dbgs() << " src = " << *Src << "\n"); 2124 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2125 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2126 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2127 if (SrcAddRec && DstAddRec) { 2128 SrcConst = SrcAddRec->getStart(); 2129 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2130 SrcLoop = SrcAddRec->getLoop(); 2131 DstConst = DstAddRec->getStart(); 2132 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2133 DstLoop = DstAddRec->getLoop(); 2134 } 2135 else if (SrcAddRec) { 2136 if (const SCEVAddRecExpr *tmpAddRec = 2137 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2138 SrcConst = tmpAddRec->getStart(); 2139 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2140 SrcLoop = tmpAddRec->getLoop(); 2141 DstConst = Dst; 2142 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2143 DstLoop = SrcAddRec->getLoop(); 2144 } 2145 else 2146 llvm_unreachable("RDIV reached by surprising SCEVs"); 2147 } 2148 else if (DstAddRec) { 2149 if (const SCEVAddRecExpr *tmpAddRec = 2150 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2151 DstConst = tmpAddRec->getStart(); 2152 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2153 DstLoop = tmpAddRec->getLoop(); 2154 SrcConst = Src; 2155 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2156 SrcLoop = DstAddRec->getLoop(); 2157 } 2158 else 2159 llvm_unreachable("RDIV reached by surprising SCEVs"); 2160 } 2161 else 2162 llvm_unreachable("RDIV expected at least one AddRec"); 2163 return exactRDIVtest(SrcCoeff, DstCoeff, 2164 SrcConst, DstConst, 2165 SrcLoop, DstLoop, 2166 Result) || 2167 gcdMIVtest(Src, Dst, Result) || 2168 symbolicRDIVtest(SrcCoeff, DstCoeff, 2169 SrcConst, DstConst, 2170 SrcLoop, DstLoop); 2171 } 2172 2173 2174 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2175 // Return true if dependence disproved. 2176 // Can sometimes refine direction vectors. 2177 bool DependenceAnalysis::testMIV(const SCEV *Src, 2178 const SCEV *Dst, 2179 const SmallBitVector &Loops, 2180 FullDependence &Result) const { 2181 DEBUG(dbgs() << " src = " << *Src << "\n"); 2182 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2183 Result.Consistent = false; 2184 return gcdMIVtest(Src, Dst, Result) || 2185 banerjeeMIVtest(Src, Dst, Loops, Result); 2186 } 2187 2188 2189 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2190 // in this case 10. If there is no constant part, returns NULL. 2191 static 2192 const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) { 2193 for (unsigned Op = 0, Ops = Product->getNumOperands(); Op < Ops; Op++) { 2194 if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Product->getOperand(Op))) 2195 return Constant; 2196 } 2197 return nullptr; 2198 } 2199 2200 2201 //===----------------------------------------------------------------------===// 2202 // gcdMIVtest - 2203 // Tests an MIV subscript pair for dependence. 2204 // Returns true if any possible dependence is disproved. 2205 // Marks the result as inconsistent. 2206 // Can sometimes disprove the equal direction for 1 or more loops, 2207 // as discussed in Michael Wolfe's book, 2208 // High Performance Compilers for Parallel Computing, page 235. 2209 // 2210 // We spend some effort (code!) to handle cases like 2211 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2212 // but M and N are just loop-invariant variables. 2213 // This should help us handle linearized subscripts; 2214 // also makes this test a useful backup to the various SIV tests. 2215 // 2216 // It occurs to me that the presence of loop-invariant variables 2217 // changes the nature of the test from "greatest common divisor" 2218 // to "a common divisor". 2219 bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, 2220 const SCEV *Dst, 2221 FullDependence &Result) const { 2222 DEBUG(dbgs() << "starting gcd\n"); 2223 ++GCDapplications; 2224 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2225 APInt RunningGCD = APInt::getNullValue(BitWidth); 2226 2227 // Examine Src coefficients. 2228 // Compute running GCD and record source constant. 2229 // Because we're looking for the constant at the end of the chain, 2230 // we can't quit the loop just because the GCD == 1. 2231 const SCEV *Coefficients = Src; 2232 while (const SCEVAddRecExpr *AddRec = 2233 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2234 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2235 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); 2236 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2237 // If the coefficient is the product of a constant and other stuff, 2238 // we can use the constant in the GCD computation. 2239 Constant = getConstantPart(Product); 2240 if (!Constant) 2241 return false; 2242 APInt ConstCoeff = Constant->getValue()->getValue(); 2243 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2244 Coefficients = AddRec->getStart(); 2245 } 2246 const SCEV *SrcConst = Coefficients; 2247 2248 // Examine Dst coefficients. 2249 // Compute running GCD and record destination constant. 2250 // Because we're looking for the constant at the end of the chain, 2251 // we can't quit the loop just because the GCD == 1. 2252 Coefficients = Dst; 2253 while (const SCEVAddRecExpr *AddRec = 2254 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2255 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2256 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); 2257 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2258 // If the coefficient is the product of a constant and other stuff, 2259 // we can use the constant in the GCD computation. 2260 Constant = getConstantPart(Product); 2261 if (!Constant) 2262 return false; 2263 APInt ConstCoeff = Constant->getValue()->getValue(); 2264 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2265 Coefficients = AddRec->getStart(); 2266 } 2267 const SCEV *DstConst = Coefficients; 2268 2269 APInt ExtraGCD = APInt::getNullValue(BitWidth); 2270 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2271 DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2272 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2273 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2274 // If Delta is a sum of products, we may be able to make further progress. 2275 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2276 const SCEV *Operand = Sum->getOperand(Op); 2277 if (isa<SCEVConstant>(Operand)) { 2278 assert(!Constant && "Surprised to find multiple constants"); 2279 Constant = cast<SCEVConstant>(Operand); 2280 } 2281 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2282 // Search for constant operand to participate in GCD; 2283 // If none found; return false. 2284 const SCEVConstant *ConstOp = getConstantPart(Product); 2285 if (!ConstOp) 2286 return false; 2287 APInt ConstOpValue = ConstOp->getValue()->getValue(); 2288 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2289 ConstOpValue.abs()); 2290 } 2291 else 2292 return false; 2293 } 2294 } 2295 if (!Constant) 2296 return false; 2297 APInt ConstDelta = cast<SCEVConstant>(Constant)->getValue()->getValue(); 2298 DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2299 if (ConstDelta == 0) 2300 return false; 2301 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2302 DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2303 APInt Remainder = ConstDelta.srem(RunningGCD); 2304 if (Remainder != 0) { 2305 ++GCDindependence; 2306 return true; 2307 } 2308 2309 // Try to disprove equal directions. 2310 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2311 // the code above can't disprove the dependence because the GCD = 1. 2312 // So we consider what happen if i = i' and what happens if j = j'. 2313 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2314 // which is infeasible, so we can disallow the = direction for the i level. 2315 // Setting j = j' doesn't help matters, so we end up with a direction vector 2316 // of [<>, *] 2317 // 2318 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2319 // we need to remember that the constant part is 5 and the RunningGCD should 2320 // be initialized to ExtraGCD = 30. 2321 DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2322 2323 bool Improved = false; 2324 Coefficients = Src; 2325 while (const SCEVAddRecExpr *AddRec = 2326 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2327 Coefficients = AddRec->getStart(); 2328 const Loop *CurLoop = AddRec->getLoop(); 2329 RunningGCD = ExtraGCD; 2330 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2331 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2332 const SCEV *Inner = Src; 2333 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2334 AddRec = cast<SCEVAddRecExpr>(Inner); 2335 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2336 if (CurLoop == AddRec->getLoop()) 2337 ; // SrcCoeff == Coeff 2338 else { 2339 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2340 // If the coefficient is the product of a constant and other stuff, 2341 // we can use the constant in the GCD computation. 2342 Constant = getConstantPart(Product); 2343 else 2344 Constant = cast<SCEVConstant>(Coeff); 2345 APInt ConstCoeff = Constant->getValue()->getValue(); 2346 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2347 } 2348 Inner = AddRec->getStart(); 2349 } 2350 Inner = Dst; 2351 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2352 AddRec = cast<SCEVAddRecExpr>(Inner); 2353 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2354 if (CurLoop == AddRec->getLoop()) 2355 DstCoeff = Coeff; 2356 else { 2357 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2358 // If the coefficient is the product of a constant and other stuff, 2359 // we can use the constant in the GCD computation. 2360 Constant = getConstantPart(Product); 2361 else 2362 Constant = cast<SCEVConstant>(Coeff); 2363 APInt ConstCoeff = Constant->getValue()->getValue(); 2364 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2365 } 2366 Inner = AddRec->getStart(); 2367 } 2368 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2369 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Delta)) 2370 // If the coefficient is the product of a constant and other stuff, 2371 // we can use the constant in the GCD computation. 2372 Constant = getConstantPart(Product); 2373 else if (isa<SCEVConstant>(Delta)) 2374 Constant = cast<SCEVConstant>(Delta); 2375 else { 2376 // The difference of the two coefficients might not be a product 2377 // or constant, in which case we give up on this direction. 2378 continue; 2379 } 2380 APInt ConstCoeff = Constant->getValue()->getValue(); 2381 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2382 DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2383 if (RunningGCD != 0) { 2384 Remainder = ConstDelta.srem(RunningGCD); 2385 DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2386 if (Remainder != 0) { 2387 unsigned Level = mapSrcLoop(CurLoop); 2388 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2389 Improved = true; 2390 } 2391 } 2392 } 2393 if (Improved) 2394 ++GCDsuccesses; 2395 DEBUG(dbgs() << "all done\n"); 2396 return false; 2397 } 2398 2399 2400 //===----------------------------------------------------------------------===// 2401 // banerjeeMIVtest - 2402 // Use Banerjee's Inequalities to test an MIV subscript pair. 2403 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2404 // Generally follows the discussion in Section 2.5.2 of 2405 // 2406 // Optimizing Supercompilers for Supercomputers 2407 // Michael Wolfe 2408 // 2409 // The inequalities given on page 25 are simplified in that loops are 2410 // normalized so that the lower bound is always 0 and the stride is always 1. 2411 // For example, Wolfe gives 2412 // 2413 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2414 // 2415 // where A_k is the coefficient of the kth index in the source subscript, 2416 // B_k is the coefficient of the kth index in the destination subscript, 2417 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2418 // index, and N_k is the stride of the kth index. Since all loops are normalized 2419 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2420 // equation to 2421 // 2422 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2423 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2424 // 2425 // Similar simplifications are possible for the other equations. 2426 // 2427 // When we can't determine the number of iterations for a loop, 2428 // we use NULL as an indicator for the worst case, infinity. 2429 // When computing the upper bound, NULL denotes +inf; 2430 // for the lower bound, NULL denotes -inf. 2431 // 2432 // Return true if dependence disproved. 2433 bool DependenceAnalysis::banerjeeMIVtest(const SCEV *Src, 2434 const SCEV *Dst, 2435 const SmallBitVector &Loops, 2436 FullDependence &Result) const { 2437 DEBUG(dbgs() << "starting Banerjee\n"); 2438 ++BanerjeeApplications; 2439 DEBUG(dbgs() << " Src = " << *Src << '\n'); 2440 const SCEV *A0; 2441 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2442 DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2443 const SCEV *B0; 2444 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2445 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2446 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2447 DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2448 2449 // Compute bounds for all the * directions. 2450 DEBUG(dbgs() << "\tBounds[*]\n"); 2451 for (unsigned K = 1; K <= MaxLevels; ++K) { 2452 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2453 Bound[K].Direction = Dependence::DVEntry::ALL; 2454 Bound[K].DirSet = Dependence::DVEntry::NONE; 2455 findBoundsALL(A, B, Bound, K); 2456 #ifndef NDEBUG 2457 DEBUG(dbgs() << "\t " << K << '\t'); 2458 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2459 DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2460 else 2461 DEBUG(dbgs() << "-inf\t"); 2462 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2463 DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2464 else 2465 DEBUG(dbgs() << "+inf\n"); 2466 #endif 2467 } 2468 2469 // Test the *, *, *, ... case. 2470 bool Disproved = false; 2471 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2472 // Explore the direction vector hierarchy. 2473 unsigned DepthExpanded = 0; 2474 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2475 Loops, DepthExpanded, Delta); 2476 if (NewDeps > 0) { 2477 bool Improved = false; 2478 for (unsigned K = 1; K <= CommonLevels; ++K) { 2479 if (Loops[K]) { 2480 unsigned Old = Result.DV[K - 1].Direction; 2481 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2482 Improved |= Old != Result.DV[K - 1].Direction; 2483 if (!Result.DV[K - 1].Direction) { 2484 Improved = false; 2485 Disproved = true; 2486 break; 2487 } 2488 } 2489 } 2490 if (Improved) 2491 ++BanerjeeSuccesses; 2492 } 2493 else { 2494 ++BanerjeeIndependence; 2495 Disproved = true; 2496 } 2497 } 2498 else { 2499 ++BanerjeeIndependence; 2500 Disproved = true; 2501 } 2502 delete [] Bound; 2503 delete [] A; 2504 delete [] B; 2505 return Disproved; 2506 } 2507 2508 2509 // Hierarchically expands the direction vector 2510 // search space, combining the directions of discovered dependences 2511 // in the DirSet field of Bound. Returns the number of distinct 2512 // dependences discovered. If the dependence is disproved, 2513 // it will return 0. 2514 unsigned DependenceAnalysis::exploreDirections(unsigned Level, 2515 CoefficientInfo *A, 2516 CoefficientInfo *B, 2517 BoundInfo *Bound, 2518 const SmallBitVector &Loops, 2519 unsigned &DepthExpanded, 2520 const SCEV *Delta) const { 2521 if (Level > CommonLevels) { 2522 // record result 2523 DEBUG(dbgs() << "\t["); 2524 for (unsigned K = 1; K <= CommonLevels; ++K) { 2525 if (Loops[K]) { 2526 Bound[K].DirSet |= Bound[K].Direction; 2527 #ifndef NDEBUG 2528 switch (Bound[K].Direction) { 2529 case Dependence::DVEntry::LT: 2530 DEBUG(dbgs() << " <"); 2531 break; 2532 case Dependence::DVEntry::EQ: 2533 DEBUG(dbgs() << " ="); 2534 break; 2535 case Dependence::DVEntry::GT: 2536 DEBUG(dbgs() << " >"); 2537 break; 2538 case Dependence::DVEntry::ALL: 2539 DEBUG(dbgs() << " *"); 2540 break; 2541 default: 2542 llvm_unreachable("unexpected Bound[K].Direction"); 2543 } 2544 #endif 2545 } 2546 } 2547 DEBUG(dbgs() << " ]\n"); 2548 return 1; 2549 } 2550 if (Loops[Level]) { 2551 if (Level > DepthExpanded) { 2552 DepthExpanded = Level; 2553 // compute bounds for <, =, > at current level 2554 findBoundsLT(A, B, Bound, Level); 2555 findBoundsGT(A, B, Bound, Level); 2556 findBoundsEQ(A, B, Bound, Level); 2557 #ifndef NDEBUG 2558 DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2559 DEBUG(dbgs() << "\t <\t"); 2560 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2561 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t'); 2562 else 2563 DEBUG(dbgs() << "-inf\t"); 2564 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2565 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n'); 2566 else 2567 DEBUG(dbgs() << "+inf\n"); 2568 DEBUG(dbgs() << "\t =\t"); 2569 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2570 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t'); 2571 else 2572 DEBUG(dbgs() << "-inf\t"); 2573 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2574 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n'); 2575 else 2576 DEBUG(dbgs() << "+inf\n"); 2577 DEBUG(dbgs() << "\t >\t"); 2578 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2579 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t'); 2580 else 2581 DEBUG(dbgs() << "-inf\t"); 2582 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2583 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n'); 2584 else 2585 DEBUG(dbgs() << "+inf\n"); 2586 #endif 2587 } 2588 2589 unsigned NewDeps = 0; 2590 2591 // test bounds for <, *, *, ... 2592 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2593 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2594 Loops, DepthExpanded, Delta); 2595 2596 // Test bounds for =, *, *, ... 2597 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2598 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2599 Loops, DepthExpanded, Delta); 2600 2601 // test bounds for >, *, *, ... 2602 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2603 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2604 Loops, DepthExpanded, Delta); 2605 2606 Bound[Level].Direction = Dependence::DVEntry::ALL; 2607 return NewDeps; 2608 } 2609 else 2610 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2611 } 2612 2613 2614 // Returns true iff the current bounds are plausible. 2615 bool DependenceAnalysis::testBounds(unsigned char DirKind, 2616 unsigned Level, 2617 BoundInfo *Bound, 2618 const SCEV *Delta) const { 2619 Bound[Level].Direction = DirKind; 2620 if (const SCEV *LowerBound = getLowerBound(Bound)) 2621 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2622 return false; 2623 if (const SCEV *UpperBound = getUpperBound(Bound)) 2624 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2625 return false; 2626 return true; 2627 } 2628 2629 2630 // Computes the upper and lower bounds for level K 2631 // using the * direction. Records them in Bound. 2632 // Wolfe gives the equations 2633 // 2634 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2635 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2636 // 2637 // Since we normalize loops, we can simplify these equations to 2638 // 2639 // LB^*_k = (A^-_k - B^+_k)U_k 2640 // UB^*_k = (A^+_k - B^-_k)U_k 2641 // 2642 // We must be careful to handle the case where the upper bound is unknown. 2643 // Note that the lower bound is always <= 0 2644 // and the upper bound is always >= 0. 2645 void DependenceAnalysis::findBoundsALL(CoefficientInfo *A, 2646 CoefficientInfo *B, 2647 BoundInfo *Bound, 2648 unsigned K) const { 2649 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2650 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2651 if (Bound[K].Iterations) { 2652 Bound[K].Lower[Dependence::DVEntry::ALL] = 2653 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2654 Bound[K].Iterations); 2655 Bound[K].Upper[Dependence::DVEntry::ALL] = 2656 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2657 Bound[K].Iterations); 2658 } 2659 else { 2660 // If the difference is 0, we won't need to know the number of iterations. 2661 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2662 Bound[K].Lower[Dependence::DVEntry::ALL] = 2663 SE->getConstant(A[K].Coeff->getType(), 0); 2664 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2665 Bound[K].Upper[Dependence::DVEntry::ALL] = 2666 SE->getConstant(A[K].Coeff->getType(), 0); 2667 } 2668 } 2669 2670 2671 // Computes the upper and lower bounds for level K 2672 // using the = direction. Records them in Bound. 2673 // Wolfe gives the equations 2674 // 2675 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2676 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2677 // 2678 // Since we normalize loops, we can simplify these equations to 2679 // 2680 // LB^=_k = (A_k - B_k)^- U_k 2681 // UB^=_k = (A_k - B_k)^+ U_k 2682 // 2683 // We must be careful to handle the case where the upper bound is unknown. 2684 // Note that the lower bound is always <= 0 2685 // and the upper bound is always >= 0. 2686 void DependenceAnalysis::findBoundsEQ(CoefficientInfo *A, 2687 CoefficientInfo *B, 2688 BoundInfo *Bound, 2689 unsigned K) const { 2690 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2691 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2692 if (Bound[K].Iterations) { 2693 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2694 const SCEV *NegativePart = getNegativePart(Delta); 2695 Bound[K].Lower[Dependence::DVEntry::EQ] = 2696 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2697 const SCEV *PositivePart = getPositivePart(Delta); 2698 Bound[K].Upper[Dependence::DVEntry::EQ] = 2699 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2700 } 2701 else { 2702 // If the positive/negative part of the difference is 0, 2703 // we won't need to know the number of iterations. 2704 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2705 const SCEV *NegativePart = getNegativePart(Delta); 2706 if (NegativePart->isZero()) 2707 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2708 const SCEV *PositivePart = getPositivePart(Delta); 2709 if (PositivePart->isZero()) 2710 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2711 } 2712 } 2713 2714 2715 // Computes the upper and lower bounds for level K 2716 // using the < direction. Records them in Bound. 2717 // Wolfe gives the equations 2718 // 2719 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2720 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2721 // 2722 // Since we normalize loops, we can simplify these equations to 2723 // 2724 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2725 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2726 // 2727 // We must be careful to handle the case where the upper bound is unknown. 2728 void DependenceAnalysis::findBoundsLT(CoefficientInfo *A, 2729 CoefficientInfo *B, 2730 BoundInfo *Bound, 2731 unsigned K) const { 2732 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2733 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2734 if (Bound[K].Iterations) { 2735 const SCEV *Iter_1 = 2736 SE->getMinusSCEV(Bound[K].Iterations, 2737 SE->getConstant(Bound[K].Iterations->getType(), 1)); 2738 const SCEV *NegPart = 2739 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2740 Bound[K].Lower[Dependence::DVEntry::LT] = 2741 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2742 const SCEV *PosPart = 2743 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2744 Bound[K].Upper[Dependence::DVEntry::LT] = 2745 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2746 } 2747 else { 2748 // If the positive/negative part of the difference is 0, 2749 // we won't need to know the number of iterations. 2750 const SCEV *NegPart = 2751 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2752 if (NegPart->isZero()) 2753 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2754 const SCEV *PosPart = 2755 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2756 if (PosPart->isZero()) 2757 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2758 } 2759 } 2760 2761 2762 // Computes the upper and lower bounds for level K 2763 // using the > direction. Records them in Bound. 2764 // Wolfe gives the equations 2765 // 2766 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2767 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2768 // 2769 // Since we normalize loops, we can simplify these equations to 2770 // 2771 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2772 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2773 // 2774 // We must be careful to handle the case where the upper bound is unknown. 2775 void DependenceAnalysis::findBoundsGT(CoefficientInfo *A, 2776 CoefficientInfo *B, 2777 BoundInfo *Bound, 2778 unsigned K) const { 2779 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2780 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2781 if (Bound[K].Iterations) { 2782 const SCEV *Iter_1 = 2783 SE->getMinusSCEV(Bound[K].Iterations, 2784 SE->getConstant(Bound[K].Iterations->getType(), 1)); 2785 const SCEV *NegPart = 2786 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2787 Bound[K].Lower[Dependence::DVEntry::GT] = 2788 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2789 const SCEV *PosPart = 2790 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2791 Bound[K].Upper[Dependence::DVEntry::GT] = 2792 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2793 } 2794 else { 2795 // If the positive/negative part of the difference is 0, 2796 // we won't need to know the number of iterations. 2797 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2798 if (NegPart->isZero()) 2799 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2800 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2801 if (PosPart->isZero()) 2802 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2803 } 2804 } 2805 2806 2807 // X^+ = max(X, 0) 2808 const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const { 2809 return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0)); 2810 } 2811 2812 2813 // X^- = min(X, 0) 2814 const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const { 2815 return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0)); 2816 } 2817 2818 2819 // Walks through the subscript, 2820 // collecting each coefficient, the associated loop bounds, 2821 // and recording its positive and negative parts for later use. 2822 DependenceAnalysis::CoefficientInfo * 2823 DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript, 2824 bool SrcFlag, 2825 const SCEV *&Constant) const { 2826 const SCEV *Zero = SE->getConstant(Subscript->getType(), 0); 2827 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2828 for (unsigned K = 1; K <= MaxLevels; ++K) { 2829 CI[K].Coeff = Zero; 2830 CI[K].PosPart = Zero; 2831 CI[K].NegPart = Zero; 2832 CI[K].Iterations = nullptr; 2833 } 2834 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2835 const Loop *L = AddRec->getLoop(); 2836 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2837 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2838 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2839 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2840 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2841 Subscript = AddRec->getStart(); 2842 } 2843 Constant = Subscript; 2844 #ifndef NDEBUG 2845 DEBUG(dbgs() << "\tCoefficient Info\n"); 2846 for (unsigned K = 1; K <= MaxLevels; ++K) { 2847 DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2848 DEBUG(dbgs() << "\tPos Part = "); 2849 DEBUG(dbgs() << *CI[K].PosPart); 2850 DEBUG(dbgs() << "\tNeg Part = "); 2851 DEBUG(dbgs() << *CI[K].NegPart); 2852 DEBUG(dbgs() << "\tUpper Bound = "); 2853 if (CI[K].Iterations) 2854 DEBUG(dbgs() << *CI[K].Iterations); 2855 else 2856 DEBUG(dbgs() << "+inf"); 2857 DEBUG(dbgs() << '\n'); 2858 } 2859 DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2860 #endif 2861 return CI; 2862 } 2863 2864 2865 // Looks through all the bounds info and 2866 // computes the lower bound given the current direction settings 2867 // at each level. If the lower bound for any level is -inf, 2868 // the result is -inf. 2869 const SCEV *DependenceAnalysis::getLowerBound(BoundInfo *Bound) const { 2870 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2871 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2872 if (Bound[K].Lower[Bound[K].Direction]) 2873 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2874 else 2875 Sum = nullptr; 2876 } 2877 return Sum; 2878 } 2879 2880 2881 // Looks through all the bounds info and 2882 // computes the upper bound given the current direction settings 2883 // at each level. If the upper bound at any level is +inf, 2884 // the result is +inf. 2885 const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const { 2886 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 2887 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2888 if (Bound[K].Upper[Bound[K].Direction]) 2889 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 2890 else 2891 Sum = nullptr; 2892 } 2893 return Sum; 2894 } 2895 2896 2897 //===----------------------------------------------------------------------===// 2898 // Constraint manipulation for Delta test. 2899 2900 // Given a linear SCEV, 2901 // return the coefficient (the step) 2902 // corresponding to the specified loop. 2903 // If there isn't one, return 0. 2904 // For example, given a*i + b*j + c*k, zeroing the coefficient 2905 // corresponding to the j loop would yield b. 2906 const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr, 2907 const Loop *TargetLoop) const { 2908 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2909 if (!AddRec) 2910 return SE->getConstant(Expr->getType(), 0); 2911 if (AddRec->getLoop() == TargetLoop) 2912 return AddRec->getStepRecurrence(*SE); 2913 return findCoefficient(AddRec->getStart(), TargetLoop); 2914 } 2915 2916 2917 // Given a linear SCEV, 2918 // return the SCEV given by zeroing out the coefficient 2919 // corresponding to the specified loop. 2920 // For example, given a*i + b*j + c*k, zeroing the coefficient 2921 // corresponding to the j loop would yield a*i + c*k. 2922 const SCEV *DependenceAnalysis::zeroCoefficient(const SCEV *Expr, 2923 const Loop *TargetLoop) const { 2924 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2925 if (!AddRec) 2926 return Expr; // ignore 2927 if (AddRec->getLoop() == TargetLoop) 2928 return AddRec->getStart(); 2929 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 2930 AddRec->getStepRecurrence(*SE), 2931 AddRec->getLoop(), 2932 AddRec->getNoWrapFlags()); 2933 } 2934 2935 2936 // Given a linear SCEV Expr, 2937 // return the SCEV given by adding some Value to the 2938 // coefficient corresponding to the specified TargetLoop. 2939 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 2940 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 2941 const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr, 2942 const Loop *TargetLoop, 2943 const SCEV *Value) const { 2944 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2945 if (!AddRec) // create a new addRec 2946 return SE->getAddRecExpr(Expr, 2947 Value, 2948 TargetLoop, 2949 SCEV::FlagAnyWrap); // Worst case, with no info. 2950 if (AddRec->getLoop() == TargetLoop) { 2951 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 2952 if (Sum->isZero()) 2953 return AddRec->getStart(); 2954 return SE->getAddRecExpr(AddRec->getStart(), 2955 Sum, 2956 AddRec->getLoop(), 2957 AddRec->getNoWrapFlags()); 2958 } 2959 if (SE->isLoopInvariant(AddRec, TargetLoop)) 2960 return SE->getAddRecExpr(AddRec, 2961 Value, 2962 TargetLoop, 2963 SCEV::FlagAnyWrap); 2964 return SE->getAddRecExpr(addToCoefficient(AddRec->getStart(), 2965 TargetLoop, Value), 2966 AddRec->getStepRecurrence(*SE), 2967 AddRec->getLoop(), 2968 AddRec->getNoWrapFlags()); 2969 } 2970 2971 2972 // Review the constraints, looking for opportunities 2973 // to simplify a subscript pair (Src and Dst). 2974 // Return true if some simplification occurs. 2975 // If the simplification isn't exact (that is, if it is conservative 2976 // in terms of dependence), set consistent to false. 2977 // Corresponds to Figure 5 from the paper 2978 // 2979 // Practical Dependence Testing 2980 // Goff, Kennedy, Tseng 2981 // PLDI 1991 2982 bool DependenceAnalysis::propagate(const SCEV *&Src, 2983 const SCEV *&Dst, 2984 SmallBitVector &Loops, 2985 SmallVectorImpl<Constraint> &Constraints, 2986 bool &Consistent) { 2987 bool Result = false; 2988 for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) { 2989 DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 2990 DEBUG(Constraints[LI].dump(dbgs())); 2991 if (Constraints[LI].isDistance()) 2992 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 2993 else if (Constraints[LI].isLine()) 2994 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 2995 else if (Constraints[LI].isPoint()) 2996 Result |= propagatePoint(Src, Dst, Constraints[LI]); 2997 } 2998 return Result; 2999 } 3000 3001 3002 // Attempt to propagate a distance 3003 // constraint into a subscript pair (Src and Dst). 3004 // Return true if some simplification occurs. 3005 // If the simplification isn't exact (that is, if it is conservative 3006 // in terms of dependence), set consistent to false. 3007 bool DependenceAnalysis::propagateDistance(const SCEV *&Src, 3008 const SCEV *&Dst, 3009 Constraint &CurConstraint, 3010 bool &Consistent) { 3011 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3012 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3013 const SCEV *A_K = findCoefficient(Src, CurLoop); 3014 if (A_K->isZero()) 3015 return false; 3016 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3017 Src = SE->getMinusSCEV(Src, DA_K); 3018 Src = zeroCoefficient(Src, CurLoop); 3019 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3020 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3021 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3022 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3023 if (!findCoefficient(Dst, CurLoop)->isZero()) 3024 Consistent = false; 3025 return true; 3026 } 3027 3028 3029 // Attempt to propagate a line 3030 // constraint into a subscript pair (Src and Dst). 3031 // Return true if some simplification occurs. 3032 // If the simplification isn't exact (that is, if it is conservative 3033 // in terms of dependence), set consistent to false. 3034 bool DependenceAnalysis::propagateLine(const SCEV *&Src, 3035 const SCEV *&Dst, 3036 Constraint &CurConstraint, 3037 bool &Consistent) { 3038 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3039 const SCEV *A = CurConstraint.getA(); 3040 const SCEV *B = CurConstraint.getB(); 3041 const SCEV *C = CurConstraint.getC(); 3042 DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n"); 3043 DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3044 DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3045 if (A->isZero()) { 3046 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3047 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3048 if (!Bconst || !Cconst) return false; 3049 APInt Beta = Bconst->getValue()->getValue(); 3050 APInt Charlie = Cconst->getValue()->getValue(); 3051 APInt CdivB = Charlie.sdiv(Beta); 3052 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3053 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3054 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3055 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3056 Dst = zeroCoefficient(Dst, CurLoop); 3057 if (!findCoefficient(Src, CurLoop)->isZero()) 3058 Consistent = false; 3059 } 3060 else if (B->isZero()) { 3061 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3062 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3063 if (!Aconst || !Cconst) return false; 3064 APInt Alpha = Aconst->getValue()->getValue(); 3065 APInt Charlie = Cconst->getValue()->getValue(); 3066 APInt CdivA = Charlie.sdiv(Alpha); 3067 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3068 const SCEV *A_K = findCoefficient(Src, CurLoop); 3069 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3070 Src = zeroCoefficient(Src, CurLoop); 3071 if (!findCoefficient(Dst, CurLoop)->isZero()) 3072 Consistent = false; 3073 } 3074 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3075 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3076 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3077 if (!Aconst || !Cconst) return false; 3078 APInt Alpha = Aconst->getValue()->getValue(); 3079 APInt Charlie = Cconst->getValue()->getValue(); 3080 APInt CdivA = Charlie.sdiv(Alpha); 3081 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3082 const SCEV *A_K = findCoefficient(Src, CurLoop); 3083 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3084 Src = zeroCoefficient(Src, CurLoop); 3085 Dst = addToCoefficient(Dst, CurLoop, A_K); 3086 if (!findCoefficient(Dst, CurLoop)->isZero()) 3087 Consistent = false; 3088 } 3089 else { 3090 // paper is incorrect here, or perhaps just misleading 3091 const SCEV *A_K = findCoefficient(Src, CurLoop); 3092 Src = SE->getMulExpr(Src, A); 3093 Dst = SE->getMulExpr(Dst, A); 3094 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3095 Src = zeroCoefficient(Src, CurLoop); 3096 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3097 if (!findCoefficient(Dst, CurLoop)->isZero()) 3098 Consistent = false; 3099 } 3100 DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3101 DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3102 return true; 3103 } 3104 3105 3106 // Attempt to propagate a point 3107 // constraint into a subscript pair (Src and Dst). 3108 // Return true if some simplification occurs. 3109 bool DependenceAnalysis::propagatePoint(const SCEV *&Src, 3110 const SCEV *&Dst, 3111 Constraint &CurConstraint) { 3112 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3113 const SCEV *A_K = findCoefficient(Src, CurLoop); 3114 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3115 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3116 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3117 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3118 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3119 Src = zeroCoefficient(Src, CurLoop); 3120 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3121 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3122 Dst = zeroCoefficient(Dst, CurLoop); 3123 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3124 return true; 3125 } 3126 3127 3128 // Update direction vector entry based on the current constraint. 3129 void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, 3130 const Constraint &CurConstraint 3131 ) const { 3132 DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3133 DEBUG(CurConstraint.dump(dbgs())); 3134 if (CurConstraint.isAny()) 3135 ; // use defaults 3136 else if (CurConstraint.isDistance()) { 3137 // this one is consistent, the others aren't 3138 Level.Scalar = false; 3139 Level.Distance = CurConstraint.getD(); 3140 unsigned NewDirection = Dependence::DVEntry::NONE; 3141 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3142 NewDirection = Dependence::DVEntry::EQ; 3143 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3144 NewDirection |= Dependence::DVEntry::LT; 3145 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3146 NewDirection |= Dependence::DVEntry::GT; 3147 Level.Direction &= NewDirection; 3148 } 3149 else if (CurConstraint.isLine()) { 3150 Level.Scalar = false; 3151 Level.Distance = nullptr; 3152 // direction should be accurate 3153 } 3154 else if (CurConstraint.isPoint()) { 3155 Level.Scalar = false; 3156 Level.Distance = nullptr; 3157 unsigned NewDirection = Dependence::DVEntry::NONE; 3158 if (!isKnownPredicate(CmpInst::ICMP_NE, 3159 CurConstraint.getY(), 3160 CurConstraint.getX())) 3161 // if X may be = Y 3162 NewDirection |= Dependence::DVEntry::EQ; 3163 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3164 CurConstraint.getY(), 3165 CurConstraint.getX())) 3166 // if Y may be > X 3167 NewDirection |= Dependence::DVEntry::LT; 3168 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3169 CurConstraint.getY(), 3170 CurConstraint.getX())) 3171 // if Y may be < X 3172 NewDirection |= Dependence::DVEntry::GT; 3173 Level.Direction &= NewDirection; 3174 } 3175 else 3176 llvm_unreachable("constraint has unexpected kind"); 3177 } 3178 3179 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3180 /// source and destination array references are recurrences on a nested loop, 3181 /// this function flattens the nested recurrences into separate recurrences 3182 /// for each loop level. 3183 bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, 3184 const SCEV *DstSCEV, 3185 SmallVectorImpl<Subscript> &Pair, 3186 const SCEV *ElementSize) const { 3187 const SCEVUnknown *SrcBase = 3188 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcSCEV)); 3189 const SCEVUnknown *DstBase = 3190 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstSCEV)); 3191 3192 if (!SrcBase || !DstBase || SrcBase != DstBase) 3193 return false; 3194 3195 SrcSCEV = SE->getMinusSCEV(SrcSCEV, SrcBase); 3196 DstSCEV = SE->getMinusSCEV(DstSCEV, DstBase); 3197 3198 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3199 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3200 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3201 return false; 3202 3203 // First step: collect parametric terms in both array references. 3204 SmallVector<const SCEV *, 4> Terms; 3205 SrcAR->collectParametricTerms(*SE, Terms); 3206 DstAR->collectParametricTerms(*SE, Terms); 3207 3208 // Second step: find subscript sizes. 3209 SmallVector<const SCEV *, 4> Sizes; 3210 SE->findArrayDimensions(Terms, Sizes, ElementSize); 3211 3212 // Third step: compute the access functions for each subscript. 3213 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3214 SrcAR->computeAccessFunctions(*SE, SrcSubscripts, Sizes); 3215 DstAR->computeAccessFunctions(*SE, DstSubscripts, Sizes); 3216 3217 // Fail when there is only a subscript: that's a linearized access function. 3218 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3219 SrcSubscripts.size() != DstSubscripts.size()) 3220 return false; 3221 3222 int size = SrcSubscripts.size(); 3223 3224 DEBUG({ 3225 dbgs() << "\nSrcSubscripts: "; 3226 for (int i = 0; i < size; i++) 3227 dbgs() << *SrcSubscripts[i]; 3228 dbgs() << "\nDstSubscripts: "; 3229 for (int i = 0; i < size; i++) 3230 dbgs() << *DstSubscripts[i]; 3231 }); 3232 3233 // The delinearization transforms a single-subscript MIV dependence test into 3234 // a multi-subscript SIV dependence test that is easier to compute. So we 3235 // resize Pair to contain as many pairs of subscripts as the delinearization 3236 // has found, and then initialize the pairs following the delinearization. 3237 Pair.resize(size); 3238 for (int i = 0; i < size; ++i) { 3239 Pair[i].Src = SrcSubscripts[i]; 3240 Pair[i].Dst = DstSubscripts[i]; 3241 3242 // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the 3243 // delinearization has found, and add these constraints to the dependence 3244 // check to avoid memory accesses overflow from one dimension into another. 3245 // This is related to the problem of determining the existence of data 3246 // dependences in array accesses using a different number of subscripts: in 3247 // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc. 3248 } 3249 3250 return true; 3251 } 3252 3253 //===----------------------------------------------------------------------===// 3254 3255 #ifndef NDEBUG 3256 // For debugging purposes, dump a small bit vector to dbgs(). 3257 static void dumpSmallBitVector(SmallBitVector &BV) { 3258 dbgs() << "{"; 3259 for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) { 3260 dbgs() << VI; 3261 if (BV.find_next(VI) >= 0) 3262 dbgs() << ' '; 3263 } 3264 dbgs() << "}\n"; 3265 } 3266 #endif 3267 3268 3269 // depends - 3270 // Returns NULL if there is no dependence. 3271 // Otherwise, return a Dependence with as many details as possible. 3272 // Corresponds to Section 3.1 in the paper 3273 // 3274 // Practical Dependence Testing 3275 // Goff, Kennedy, Tseng 3276 // PLDI 1991 3277 // 3278 // Care is required to keep the routine below, getSplitIteration(), 3279 // up to date with respect to this routine. 3280 Dependence *DependenceAnalysis::depends(Instruction *Src, 3281 Instruction *Dst, 3282 bool PossiblyLoopIndependent) { 3283 if (Src == Dst) 3284 PossiblyLoopIndependent = false; 3285 3286 if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || 3287 (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) 3288 // if both instructions don't reference memory, there's no dependence 3289 return nullptr; 3290 3291 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3292 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3293 DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3294 return new Dependence(Src, Dst); 3295 } 3296 3297 Value *SrcPtr = getPointerOperand(Src); 3298 Value *DstPtr = getPointerOperand(Dst); 3299 3300 switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) { 3301 case AliasAnalysis::MayAlias: 3302 case AliasAnalysis::PartialAlias: 3303 // cannot analyse objects if we don't understand their aliasing. 3304 DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3305 return new Dependence(Src, Dst); 3306 case AliasAnalysis::NoAlias: 3307 // If the objects noalias, they are distinct, accesses are independent. 3308 DEBUG(dbgs() << "no alias\n"); 3309 return nullptr; 3310 case AliasAnalysis::MustAlias: 3311 break; // The underlying objects alias; test accesses for dependence. 3312 } 3313 3314 // establish loop nesting levels 3315 establishNestingLevels(Src, Dst); 3316 DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3317 DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3318 3319 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3320 ++TotalArrayPairs; 3321 3322 // See if there are GEPs we can use. 3323 bool UsefulGEP = false; 3324 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3325 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3326 if (SrcGEP && DstGEP && 3327 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3328 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3329 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3330 DEBUG(dbgs() << " SrcPtrSCEV = " << *SrcPtrSCEV << "\n"); 3331 DEBUG(dbgs() << " DstPtrSCEV = " << *DstPtrSCEV << "\n"); 3332 3333 UsefulGEP = 3334 isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3335 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())); 3336 } 3337 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3338 SmallVector<Subscript, 4> Pair(Pairs); 3339 if (UsefulGEP) { 3340 DEBUG(dbgs() << " using GEPs\n"); 3341 unsigned P = 0; 3342 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3343 SrcEnd = SrcGEP->idx_end(), 3344 DstIdx = DstGEP->idx_begin(); 3345 SrcIdx != SrcEnd; 3346 ++SrcIdx, ++DstIdx, ++P) { 3347 Pair[P].Src = SE->getSCEV(*SrcIdx); 3348 Pair[P].Dst = SE->getSCEV(*DstIdx); 3349 } 3350 } 3351 else { 3352 DEBUG(dbgs() << " ignoring GEPs\n"); 3353 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3354 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3355 DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3356 DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3357 Pair[0].Src = SrcSCEV; 3358 Pair[0].Dst = DstSCEV; 3359 } 3360 3361 if (Delinearize && Pairs == 1 && CommonLevels > 1 && 3362 tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { 3363 DEBUG(dbgs() << " delinerized GEP\n"); 3364 Pairs = Pair.size(); 3365 } 3366 3367 for (unsigned P = 0; P < Pairs; ++P) { 3368 Pair[P].Loops.resize(MaxLevels + 1); 3369 Pair[P].GroupLoops.resize(MaxLevels + 1); 3370 Pair[P].Group.resize(Pairs); 3371 removeMatchingExtensions(&Pair[P]); 3372 Pair[P].Classification = 3373 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3374 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3375 Pair[P].Loops); 3376 Pair[P].GroupLoops = Pair[P].Loops; 3377 Pair[P].Group.set(P); 3378 DEBUG(dbgs() << " subscript " << P << "\n"); 3379 DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3380 DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3381 DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3382 DEBUG(dbgs() << "\tloops = "); 3383 DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3384 } 3385 3386 SmallBitVector Separable(Pairs); 3387 SmallBitVector Coupled(Pairs); 3388 3389 // Partition subscripts into separable and minimally-coupled groups 3390 // Algorithm in paper is algorithmically better; 3391 // this may be faster in practice. Check someday. 3392 // 3393 // Here's an example of how it works. Consider this code: 3394 // 3395 // for (i = ...) { 3396 // for (j = ...) { 3397 // for (k = ...) { 3398 // for (l = ...) { 3399 // for (m = ...) { 3400 // A[i][j][k][m] = ...; 3401 // ... = A[0][j][l][i + j]; 3402 // } 3403 // } 3404 // } 3405 // } 3406 // } 3407 // 3408 // There are 4 subscripts here: 3409 // 0 [i] and [0] 3410 // 1 [j] and [j] 3411 // 2 [k] and [l] 3412 // 3 [m] and [i + j] 3413 // 3414 // We've already classified each subscript pair as ZIV, SIV, etc., 3415 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3416 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3417 // and set Pair[P].Group = {P}. 3418 // 3419 // Src Dst Classification Loops GroupLoops Group 3420 // 0 [i] [0] SIV {1} {1} {0} 3421 // 1 [j] [j] SIV {2} {2} {1} 3422 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3423 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3424 // 3425 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3426 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3427 // 3428 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3429 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3430 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3431 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3432 // to either Separable or Coupled). 3433 // 3434 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3435 // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty, 3436 // so Pair[3].Group = {0, 1, 3} and Done = false. 3437 // 3438 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3439 // Since Done remains true, we add 2 to the set of Separable pairs. 3440 // 3441 // Finally, we consider 3. There's nothing to compare it with, 3442 // so Done remains true and we add it to the Coupled set. 3443 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3444 // 3445 // In the end, we've got 1 separable subscript and 1 coupled group. 3446 for (unsigned SI = 0; SI < Pairs; ++SI) { 3447 if (Pair[SI].Classification == Subscript::NonLinear) { 3448 // ignore these, but collect loops for later 3449 ++NonlinearSubscriptPairs; 3450 collectCommonLoops(Pair[SI].Src, 3451 LI->getLoopFor(Src->getParent()), 3452 Pair[SI].Loops); 3453 collectCommonLoops(Pair[SI].Dst, 3454 LI->getLoopFor(Dst->getParent()), 3455 Pair[SI].Loops); 3456 Result.Consistent = false; 3457 } 3458 else if (Pair[SI].Classification == Subscript::ZIV) { 3459 // always separable 3460 Separable.set(SI); 3461 } 3462 else { 3463 // SIV, RDIV, or MIV, so check for coupled group 3464 bool Done = true; 3465 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3466 SmallBitVector Intersection = Pair[SI].GroupLoops; 3467 Intersection &= Pair[SJ].GroupLoops; 3468 if (Intersection.any()) { 3469 // accumulate set of all the loops in group 3470 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3471 // accumulate set of all subscripts in group 3472 Pair[SJ].Group |= Pair[SI].Group; 3473 Done = false; 3474 } 3475 } 3476 if (Done) { 3477 if (Pair[SI].Group.count() == 1) { 3478 Separable.set(SI); 3479 ++SeparableSubscriptPairs; 3480 } 3481 else { 3482 Coupled.set(SI); 3483 ++CoupledSubscriptPairs; 3484 } 3485 } 3486 } 3487 } 3488 3489 DEBUG(dbgs() << " Separable = "); 3490 DEBUG(dumpSmallBitVector(Separable)); 3491 DEBUG(dbgs() << " Coupled = "); 3492 DEBUG(dumpSmallBitVector(Coupled)); 3493 3494 Constraint NewConstraint; 3495 NewConstraint.setAny(SE); 3496 3497 // test separable subscripts 3498 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3499 DEBUG(dbgs() << "testing subscript " << SI); 3500 switch (Pair[SI].Classification) { 3501 case Subscript::ZIV: 3502 DEBUG(dbgs() << ", ZIV\n"); 3503 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3504 return nullptr; 3505 break; 3506 case Subscript::SIV: { 3507 DEBUG(dbgs() << ", SIV\n"); 3508 unsigned Level; 3509 const SCEV *SplitIter = nullptr; 3510 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3511 Result, NewConstraint, SplitIter)) 3512 return nullptr; 3513 break; 3514 } 3515 case Subscript::RDIV: 3516 DEBUG(dbgs() << ", RDIV\n"); 3517 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3518 return nullptr; 3519 break; 3520 case Subscript::MIV: 3521 DEBUG(dbgs() << ", MIV\n"); 3522 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3523 return nullptr; 3524 break; 3525 default: 3526 llvm_unreachable("subscript has unexpected classification"); 3527 } 3528 } 3529 3530 if (Coupled.count()) { 3531 // test coupled subscript groups 3532 DEBUG(dbgs() << "starting on coupled subscripts\n"); 3533 DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3534 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3535 for (unsigned II = 0; II <= MaxLevels; ++II) 3536 Constraints[II].setAny(SE); 3537 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3538 DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3539 SmallBitVector Group(Pair[SI].Group); 3540 SmallBitVector Sivs(Pairs); 3541 SmallBitVector Mivs(Pairs); 3542 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3543 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3544 DEBUG(dbgs() << SJ << " "); 3545 if (Pair[SJ].Classification == Subscript::SIV) 3546 Sivs.set(SJ); 3547 else 3548 Mivs.set(SJ); 3549 } 3550 DEBUG(dbgs() << "}\n"); 3551 while (Sivs.any()) { 3552 bool Changed = false; 3553 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3554 DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3555 // SJ is an SIV subscript that's part of the current coupled group 3556 unsigned Level; 3557 const SCEV *SplitIter = nullptr; 3558 DEBUG(dbgs() << "SIV\n"); 3559 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3560 Result, NewConstraint, SplitIter)) 3561 return nullptr; 3562 ConstrainedLevels.set(Level); 3563 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3564 if (Constraints[Level].isEmpty()) { 3565 ++DeltaIndependence; 3566 return nullptr; 3567 } 3568 Changed = true; 3569 } 3570 Sivs.reset(SJ); 3571 } 3572 if (Changed) { 3573 // propagate, possibly creating new SIVs and ZIVs 3574 DEBUG(dbgs() << " propagating\n"); 3575 DEBUG(dbgs() << "\tMivs = "); 3576 DEBUG(dumpSmallBitVector(Mivs)); 3577 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3578 // SJ is an MIV subscript that's part of the current coupled group 3579 DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3580 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3581 Constraints, Result.Consistent)) { 3582 DEBUG(dbgs() << "\t Changed\n"); 3583 ++DeltaPropagations; 3584 Pair[SJ].Classification = 3585 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3586 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3587 Pair[SJ].Loops); 3588 switch (Pair[SJ].Classification) { 3589 case Subscript::ZIV: 3590 DEBUG(dbgs() << "ZIV\n"); 3591 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3592 return nullptr; 3593 Mivs.reset(SJ); 3594 break; 3595 case Subscript::SIV: 3596 Sivs.set(SJ); 3597 Mivs.reset(SJ); 3598 break; 3599 case Subscript::RDIV: 3600 case Subscript::MIV: 3601 break; 3602 default: 3603 llvm_unreachable("bad subscript classification"); 3604 } 3605 } 3606 } 3607 } 3608 } 3609 3610 // test & propagate remaining RDIVs 3611 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3612 if (Pair[SJ].Classification == Subscript::RDIV) { 3613 DEBUG(dbgs() << "RDIV test\n"); 3614 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3615 return nullptr; 3616 // I don't yet understand how to propagate RDIV results 3617 Mivs.reset(SJ); 3618 } 3619 } 3620 3621 // test remaining MIVs 3622 // This code is temporary. 3623 // Better to somehow test all remaining subscripts simultaneously. 3624 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3625 if (Pair[SJ].Classification == Subscript::MIV) { 3626 DEBUG(dbgs() << "MIV test\n"); 3627 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3628 return nullptr; 3629 } 3630 else 3631 llvm_unreachable("expected only MIV subscripts at this point"); 3632 } 3633 3634 // update Result.DV from constraint vector 3635 DEBUG(dbgs() << " updating\n"); 3636 for (int SJ = ConstrainedLevels.find_first(); 3637 SJ >= 0; SJ = ConstrainedLevels.find_next(SJ)) { 3638 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3639 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3640 return nullptr; 3641 } 3642 } 3643 } 3644 3645 // Make sure the Scalar flags are set correctly. 3646 SmallBitVector CompleteLoops(MaxLevels + 1); 3647 for (unsigned SI = 0; SI < Pairs; ++SI) 3648 CompleteLoops |= Pair[SI].Loops; 3649 for (unsigned II = 1; II <= CommonLevels; ++II) 3650 if (CompleteLoops[II]) 3651 Result.DV[II - 1].Scalar = false; 3652 3653 if (PossiblyLoopIndependent) { 3654 // Make sure the LoopIndependent flag is set correctly. 3655 // All directions must include equal, otherwise no 3656 // loop-independent dependence is possible. 3657 for (unsigned II = 1; II <= CommonLevels; ++II) { 3658 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3659 Result.LoopIndependent = false; 3660 break; 3661 } 3662 } 3663 } 3664 else { 3665 // On the other hand, if all directions are equal and there's no 3666 // loop-independent dependence possible, then no dependence exists. 3667 bool AllEqual = true; 3668 for (unsigned II = 1; II <= CommonLevels; ++II) { 3669 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 3670 AllEqual = false; 3671 break; 3672 } 3673 } 3674 if (AllEqual) 3675 return nullptr; 3676 } 3677 3678 FullDependence *Final = new FullDependence(Result); 3679 Result.DV = nullptr; 3680 return Final; 3681 } 3682 3683 3684 3685 //===----------------------------------------------------------------------===// 3686 // getSplitIteration - 3687 // Rather than spend rarely-used space recording the splitting iteration 3688 // during the Weak-Crossing SIV test, we re-compute it on demand. 3689 // The re-computation is basically a repeat of the entire dependence test, 3690 // though simplified since we know that the dependence exists. 3691 // It's tedious, since we must go through all propagations, etc. 3692 // 3693 // Care is required to keep this code up to date with respect to the routine 3694 // above, depends(). 3695 // 3696 // Generally, the dependence analyzer will be used to build 3697 // a dependence graph for a function (basically a map from instructions 3698 // to dependences). Looking for cycles in the graph shows us loops 3699 // that cannot be trivially vectorized/parallelized. 3700 // 3701 // We can try to improve the situation by examining all the dependences 3702 // that make up the cycle, looking for ones we can break. 3703 // Sometimes, peeling the first or last iteration of a loop will break 3704 // dependences, and we've got flags for those possibilities. 3705 // Sometimes, splitting a loop at some other iteration will do the trick, 3706 // and we've got a flag for that case. Rather than waste the space to 3707 // record the exact iteration (since we rarely know), we provide 3708 // a method that calculates the iteration. It's a drag that it must work 3709 // from scratch, but wonderful in that it's possible. 3710 // 3711 // Here's an example: 3712 // 3713 // for (i = 0; i < 10; i++) 3714 // A[i] = ... 3715 // ... = A[11 - i] 3716 // 3717 // There's a loop-carried flow dependence from the store to the load, 3718 // found by the weak-crossing SIV test. The dependence will have a flag, 3719 // indicating that the dependence can be broken by splitting the loop. 3720 // Calling getSplitIteration will return 5. 3721 // Splitting the loop breaks the dependence, like so: 3722 // 3723 // for (i = 0; i <= 5; i++) 3724 // A[i] = ... 3725 // ... = A[11 - i] 3726 // for (i = 6; i < 10; i++) 3727 // A[i] = ... 3728 // ... = A[11 - i] 3729 // 3730 // breaks the dependence and allows us to vectorize/parallelize 3731 // both loops. 3732 const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, 3733 unsigned SplitLevel) { 3734 assert(Dep && "expected a pointer to a Dependence"); 3735 assert(Dep->isSplitable(SplitLevel) && 3736 "Dep should be splitable at SplitLevel"); 3737 Instruction *Src = Dep->getSrc(); 3738 Instruction *Dst = Dep->getDst(); 3739 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3740 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3741 assert(isLoadOrStore(Src)); 3742 assert(isLoadOrStore(Dst)); 3743 Value *SrcPtr = getPointerOperand(Src); 3744 Value *DstPtr = getPointerOperand(Dst); 3745 assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) == 3746 AliasAnalysis::MustAlias); 3747 3748 // establish loop nesting levels 3749 establishNestingLevels(Src, Dst); 3750 3751 FullDependence Result(Src, Dst, false, CommonLevels); 3752 3753 // See if there are GEPs we can use. 3754 bool UsefulGEP = false; 3755 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3756 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3757 if (SrcGEP && DstGEP && 3758 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3759 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3760 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3761 UsefulGEP = 3762 isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3763 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())); 3764 } 3765 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3766 SmallVector<Subscript, 4> Pair(Pairs); 3767 if (UsefulGEP) { 3768 unsigned P = 0; 3769 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3770 SrcEnd = SrcGEP->idx_end(), 3771 DstIdx = DstGEP->idx_begin(); 3772 SrcIdx != SrcEnd; 3773 ++SrcIdx, ++DstIdx, ++P) { 3774 Pair[P].Src = SE->getSCEV(*SrcIdx); 3775 Pair[P].Dst = SE->getSCEV(*DstIdx); 3776 } 3777 } 3778 else { 3779 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3780 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3781 Pair[0].Src = SrcSCEV; 3782 Pair[0].Dst = DstSCEV; 3783 } 3784 3785 if (Delinearize && Pairs == 1 && CommonLevels > 1 && 3786 tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { 3787 DEBUG(dbgs() << " delinerized GEP\n"); 3788 Pairs = Pair.size(); 3789 } 3790 3791 for (unsigned P = 0; P < Pairs; ++P) { 3792 Pair[P].Loops.resize(MaxLevels + 1); 3793 Pair[P].GroupLoops.resize(MaxLevels + 1); 3794 Pair[P].Group.resize(Pairs); 3795 removeMatchingExtensions(&Pair[P]); 3796 Pair[P].Classification = 3797 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3798 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3799 Pair[P].Loops); 3800 Pair[P].GroupLoops = Pair[P].Loops; 3801 Pair[P].Group.set(P); 3802 } 3803 3804 SmallBitVector Separable(Pairs); 3805 SmallBitVector Coupled(Pairs); 3806 3807 // partition subscripts into separable and minimally-coupled groups 3808 for (unsigned SI = 0; SI < Pairs; ++SI) { 3809 if (Pair[SI].Classification == Subscript::NonLinear) { 3810 // ignore these, but collect loops for later 3811 collectCommonLoops(Pair[SI].Src, 3812 LI->getLoopFor(Src->getParent()), 3813 Pair[SI].Loops); 3814 collectCommonLoops(Pair[SI].Dst, 3815 LI->getLoopFor(Dst->getParent()), 3816 Pair[SI].Loops); 3817 Result.Consistent = false; 3818 } 3819 else if (Pair[SI].Classification == Subscript::ZIV) 3820 Separable.set(SI); 3821 else { 3822 // SIV, RDIV, or MIV, so check for coupled group 3823 bool Done = true; 3824 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3825 SmallBitVector Intersection = Pair[SI].GroupLoops; 3826 Intersection &= Pair[SJ].GroupLoops; 3827 if (Intersection.any()) { 3828 // accumulate set of all the loops in group 3829 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3830 // accumulate set of all subscripts in group 3831 Pair[SJ].Group |= Pair[SI].Group; 3832 Done = false; 3833 } 3834 } 3835 if (Done) { 3836 if (Pair[SI].Group.count() == 1) 3837 Separable.set(SI); 3838 else 3839 Coupled.set(SI); 3840 } 3841 } 3842 } 3843 3844 Constraint NewConstraint; 3845 NewConstraint.setAny(SE); 3846 3847 // test separable subscripts 3848 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3849 switch (Pair[SI].Classification) { 3850 case Subscript::SIV: { 3851 unsigned Level; 3852 const SCEV *SplitIter = nullptr; 3853 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3854 Result, NewConstraint, SplitIter); 3855 if (Level == SplitLevel) { 3856 assert(SplitIter != nullptr); 3857 return SplitIter; 3858 } 3859 break; 3860 } 3861 case Subscript::ZIV: 3862 case Subscript::RDIV: 3863 case Subscript::MIV: 3864 break; 3865 default: 3866 llvm_unreachable("subscript has unexpected classification"); 3867 } 3868 } 3869 3870 if (Coupled.count()) { 3871 // test coupled subscript groups 3872 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3873 for (unsigned II = 0; II <= MaxLevels; ++II) 3874 Constraints[II].setAny(SE); 3875 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3876 SmallBitVector Group(Pair[SI].Group); 3877 SmallBitVector Sivs(Pairs); 3878 SmallBitVector Mivs(Pairs); 3879 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3880 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3881 if (Pair[SJ].Classification == Subscript::SIV) 3882 Sivs.set(SJ); 3883 else 3884 Mivs.set(SJ); 3885 } 3886 while (Sivs.any()) { 3887 bool Changed = false; 3888 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3889 // SJ is an SIV subscript that's part of the current coupled group 3890 unsigned Level; 3891 const SCEV *SplitIter = nullptr; 3892 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3893 Result, NewConstraint, SplitIter); 3894 if (Level == SplitLevel && SplitIter) 3895 return SplitIter; 3896 ConstrainedLevels.set(Level); 3897 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 3898 Changed = true; 3899 Sivs.reset(SJ); 3900 } 3901 if (Changed) { 3902 // propagate, possibly creating new SIVs and ZIVs 3903 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3904 // SJ is an MIV subscript that's part of the current coupled group 3905 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 3906 Pair[SJ].Loops, Constraints, Result.Consistent)) { 3907 Pair[SJ].Classification = 3908 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3909 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3910 Pair[SJ].Loops); 3911 switch (Pair[SJ].Classification) { 3912 case Subscript::ZIV: 3913 Mivs.reset(SJ); 3914 break; 3915 case Subscript::SIV: 3916 Sivs.set(SJ); 3917 Mivs.reset(SJ); 3918 break; 3919 case Subscript::RDIV: 3920 case Subscript::MIV: 3921 break; 3922 default: 3923 llvm_unreachable("bad subscript classification"); 3924 } 3925 } 3926 } 3927 } 3928 } 3929 } 3930 } 3931 llvm_unreachable("somehow reached end of routine"); 3932 return nullptr; 3933 } 3934