1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 11 // accesses. Currently, it is an (incomplete) implementation of the approach 12 // described in 13 // 14 // Practical Dependence Testing 15 // Goff, Kennedy, Tseng 16 // PLDI 1991 17 // 18 // There's a single entry point that analyzes the dependence between a pair 19 // of memory references in a function, returning either NULL, for no dependence, 20 // or a more-or-less detailed description of the dependence between them. 21 // 22 // Currently, the implementation cannot propagate constraints between 23 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 24 // Both of these are conservative weaknesses; 25 // that is, not a source of correctness problems. 26 // 27 // The implementation depends on the GEP instruction to differentiate 28 // subscripts. Since Clang linearizes some array subscripts, the dependence 29 // analysis is using SCEV->delinearize to recover the representation of multiple 30 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 31 // delinearization is controlled by the flag -da-delinearize. 32 // 33 // We should pay some careful attention to the possibility of integer overflow 34 // in the implementation of the various tests. This could happen with Add, 35 // Subtract, or Multiply, with both APInt's and SCEV's. 36 // 37 // Some non-linear subscript pairs can be handled by the GCD test 38 // (and perhaps other tests). 39 // Should explore how often these things occur. 40 // 41 // Finally, it seems like certain test cases expose weaknesses in the SCEV 42 // simplification, especially in the handling of sign and zero extensions. 43 // It could be useful to spend time exploring these. 44 // 45 // Please note that this is work in progress and the interface is subject to 46 // change. 47 // 48 //===----------------------------------------------------------------------===// 49 // // 50 // In memory of Ken Kennedy, 1945 - 2007 // 51 // // 52 //===----------------------------------------------------------------------===// 53 54 #include "llvm/Analysis/DependenceAnalysis.h" 55 #include "llvm/ADT/STLExtras.h" 56 #include "llvm/ADT/Statistic.h" 57 #include "llvm/Analysis/AliasAnalysis.h" 58 #include "llvm/Analysis/LoopInfo.h" 59 #include "llvm/Analysis/ScalarEvolution.h" 60 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 61 #include "llvm/Analysis/ValueTracking.h" 62 #include "llvm/IR/InstIterator.h" 63 #include "llvm/IR/Module.h" 64 #include "llvm/IR/Operator.h" 65 #include "llvm/Support/CommandLine.h" 66 #include "llvm/Support/Debug.h" 67 #include "llvm/Support/ErrorHandling.h" 68 #include "llvm/Support/raw_ostream.h" 69 70 using namespace llvm; 71 72 #define DEBUG_TYPE "da" 73 74 //===----------------------------------------------------------------------===// 75 // statistics 76 77 STATISTIC(TotalArrayPairs, "Array pairs tested"); 78 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 79 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 80 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 81 STATISTIC(ZIVapplications, "ZIV applications"); 82 STATISTIC(ZIVindependence, "ZIV independence"); 83 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 84 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 85 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 86 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 87 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 88 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 89 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 90 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 91 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 92 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 93 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 94 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 95 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 96 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 97 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 98 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 99 STATISTIC(DeltaApplications, "Delta applications"); 100 STATISTIC(DeltaSuccesses, "Delta successes"); 101 STATISTIC(DeltaIndependence, "Delta independence"); 102 STATISTIC(DeltaPropagations, "Delta propagations"); 103 STATISTIC(GCDapplications, "GCD applications"); 104 STATISTIC(GCDsuccesses, "GCD successes"); 105 STATISTIC(GCDindependence, "GCD independence"); 106 STATISTIC(BanerjeeApplications, "Banerjee applications"); 107 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 108 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 109 110 static cl::opt<bool> 111 Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore, 112 cl::desc("Try to delinearize array references.")); 113 114 //===----------------------------------------------------------------------===// 115 // basics 116 117 INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da", 118 "Dependence Analysis", true, true) 119 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 120 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 121 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 122 INITIALIZE_PASS_END(DependenceAnalysis, "da", 123 "Dependence Analysis", true, true) 124 125 char DependenceAnalysis::ID = 0; 126 127 128 FunctionPass *llvm::createDependenceAnalysisPass() { 129 return new DependenceAnalysis(); 130 } 131 132 133 bool DependenceAnalysis::runOnFunction(Function &F) { 134 this->F = &F; 135 AA = &getAnalysis<AliasAnalysis>(); 136 SE = &getAnalysis<ScalarEvolution>(); 137 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 138 return false; 139 } 140 141 142 void DependenceAnalysis::releaseMemory() { 143 } 144 145 146 void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 147 AU.setPreservesAll(); 148 AU.addRequiredTransitive<AliasAnalysis>(); 149 AU.addRequiredTransitive<ScalarEvolution>(); 150 AU.addRequiredTransitive<LoopInfoWrapperPass>(); 151 } 152 153 154 // Used to test the dependence analyzer. 155 // Looks through the function, noting loads and stores. 156 // Calls depends() on every possible pair and prints out the result. 157 // Ignores all other instructions. 158 static 159 void dumpExampleDependence(raw_ostream &OS, Function *F, 160 DependenceAnalysis *DA) { 161 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); 162 SrcI != SrcE; ++SrcI) { 163 if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) { 164 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 165 DstI != DstE; ++DstI) { 166 if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) { 167 OS << "da analyze - "; 168 if (auto D = DA->depends(&*SrcI, &*DstI, true)) { 169 D->dump(OS); 170 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 171 if (D->isSplitable(Level)) { 172 OS << "da analyze - split level = " << Level; 173 OS << ", iteration = " << *DA->getSplitIteration(*D, Level); 174 OS << "!\n"; 175 } 176 } 177 } 178 else 179 OS << "none!\n"; 180 } 181 } 182 } 183 } 184 } 185 186 187 void DependenceAnalysis::print(raw_ostream &OS, const Module*) const { 188 dumpExampleDependence(OS, F, const_cast<DependenceAnalysis *>(this)); 189 } 190 191 //===----------------------------------------------------------------------===// 192 // Dependence methods 193 194 // Returns true if this is an input dependence. 195 bool Dependence::isInput() const { 196 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 197 } 198 199 200 // Returns true if this is an output dependence. 201 bool Dependence::isOutput() const { 202 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 203 } 204 205 206 // Returns true if this is an flow (aka true) dependence. 207 bool Dependence::isFlow() const { 208 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 209 } 210 211 212 // Returns true if this is an anti dependence. 213 bool Dependence::isAnti() const { 214 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 215 } 216 217 218 // Returns true if a particular level is scalar; that is, 219 // if no subscript in the source or destination mention the induction 220 // variable associated with the loop at this level. 221 // Leave this out of line, so it will serve as a virtual method anchor 222 bool Dependence::isScalar(unsigned level) const { 223 return false; 224 } 225 226 227 //===----------------------------------------------------------------------===// 228 // FullDependence methods 229 230 FullDependence::FullDependence(Instruction *Source, Instruction *Destination, 231 bool PossiblyLoopIndependent, 232 unsigned CommonLevels) 233 : Dependence(Source, Destination), Levels(CommonLevels), 234 LoopIndependent(PossiblyLoopIndependent) { 235 Consistent = true; 236 DV = CommonLevels ? new DVEntry[CommonLevels] : nullptr; 237 } 238 239 // The rest are simple getters that hide the implementation. 240 241 // getDirection - Returns the direction associated with a particular level. 242 unsigned FullDependence::getDirection(unsigned Level) const { 243 assert(0 < Level && Level <= Levels && "Level out of range"); 244 return DV[Level - 1].Direction; 245 } 246 247 248 // Returns the distance (or NULL) associated with a particular level. 249 const SCEV *FullDependence::getDistance(unsigned Level) const { 250 assert(0 < Level && Level <= Levels && "Level out of range"); 251 return DV[Level - 1].Distance; 252 } 253 254 255 // Returns true if a particular level is scalar; that is, 256 // if no subscript in the source or destination mention the induction 257 // variable associated with the loop at this level. 258 bool FullDependence::isScalar(unsigned Level) const { 259 assert(0 < Level && Level <= Levels && "Level out of range"); 260 return DV[Level - 1].Scalar; 261 } 262 263 264 // Returns true if peeling the first iteration from this loop 265 // will break this dependence. 266 bool FullDependence::isPeelFirst(unsigned Level) const { 267 assert(0 < Level && Level <= Levels && "Level out of range"); 268 return DV[Level - 1].PeelFirst; 269 } 270 271 272 // Returns true if peeling the last iteration from this loop 273 // will break this dependence. 274 bool FullDependence::isPeelLast(unsigned Level) const { 275 assert(0 < Level && Level <= Levels && "Level out of range"); 276 return DV[Level - 1].PeelLast; 277 } 278 279 280 // Returns true if splitting this loop will break the dependence. 281 bool FullDependence::isSplitable(unsigned Level) const { 282 assert(0 < Level && Level <= Levels && "Level out of range"); 283 return DV[Level - 1].Splitable; 284 } 285 286 287 //===----------------------------------------------------------------------===// 288 // DependenceAnalysis::Constraint methods 289 290 // If constraint is a point <X, Y>, returns X. 291 // Otherwise assert. 292 const SCEV *DependenceAnalysis::Constraint::getX() const { 293 assert(Kind == Point && "Kind should be Point"); 294 return A; 295 } 296 297 298 // If constraint is a point <X, Y>, returns Y. 299 // Otherwise assert. 300 const SCEV *DependenceAnalysis::Constraint::getY() const { 301 assert(Kind == Point && "Kind should be Point"); 302 return B; 303 } 304 305 306 // If constraint is a line AX + BY = C, returns A. 307 // Otherwise assert. 308 const SCEV *DependenceAnalysis::Constraint::getA() const { 309 assert((Kind == Line || Kind == Distance) && 310 "Kind should be Line (or Distance)"); 311 return A; 312 } 313 314 315 // If constraint is a line AX + BY = C, returns B. 316 // Otherwise assert. 317 const SCEV *DependenceAnalysis::Constraint::getB() const { 318 assert((Kind == Line || Kind == Distance) && 319 "Kind should be Line (or Distance)"); 320 return B; 321 } 322 323 324 // If constraint is a line AX + BY = C, returns C. 325 // Otherwise assert. 326 const SCEV *DependenceAnalysis::Constraint::getC() const { 327 assert((Kind == Line || Kind == Distance) && 328 "Kind should be Line (or Distance)"); 329 return C; 330 } 331 332 333 // If constraint is a distance, returns D. 334 // Otherwise assert. 335 const SCEV *DependenceAnalysis::Constraint::getD() const { 336 assert(Kind == Distance && "Kind should be Distance"); 337 return SE->getNegativeSCEV(C); 338 } 339 340 341 // Returns the loop associated with this constraint. 342 const Loop *DependenceAnalysis::Constraint::getAssociatedLoop() const { 343 assert((Kind == Distance || Kind == Line || Kind == Point) && 344 "Kind should be Distance, Line, or Point"); 345 return AssociatedLoop; 346 } 347 348 349 void DependenceAnalysis::Constraint::setPoint(const SCEV *X, 350 const SCEV *Y, 351 const Loop *CurLoop) { 352 Kind = Point; 353 A = X; 354 B = Y; 355 AssociatedLoop = CurLoop; 356 } 357 358 359 void DependenceAnalysis::Constraint::setLine(const SCEV *AA, 360 const SCEV *BB, 361 const SCEV *CC, 362 const Loop *CurLoop) { 363 Kind = Line; 364 A = AA; 365 B = BB; 366 C = CC; 367 AssociatedLoop = CurLoop; 368 } 369 370 371 void DependenceAnalysis::Constraint::setDistance(const SCEV *D, 372 const Loop *CurLoop) { 373 Kind = Distance; 374 A = SE->getConstant(D->getType(), 1); 375 B = SE->getNegativeSCEV(A); 376 C = SE->getNegativeSCEV(D); 377 AssociatedLoop = CurLoop; 378 } 379 380 381 void DependenceAnalysis::Constraint::setEmpty() { 382 Kind = Empty; 383 } 384 385 386 void DependenceAnalysis::Constraint::setAny(ScalarEvolution *NewSE) { 387 SE = NewSE; 388 Kind = Any; 389 } 390 391 392 // For debugging purposes. Dumps the constraint out to OS. 393 void DependenceAnalysis::Constraint::dump(raw_ostream &OS) const { 394 if (isEmpty()) 395 OS << " Empty\n"; 396 else if (isAny()) 397 OS << " Any\n"; 398 else if (isPoint()) 399 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 400 else if (isDistance()) 401 OS << " Distance is " << *getD() << 402 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 403 else if (isLine()) 404 OS << " Line is " << *getA() << "*X + " << 405 *getB() << "*Y = " << *getC() << "\n"; 406 else 407 llvm_unreachable("unknown constraint type in Constraint::dump"); 408 } 409 410 411 // Updates X with the intersection 412 // of the Constraints X and Y. Returns true if X has changed. 413 // Corresponds to Figure 4 from the paper 414 // 415 // Practical Dependence Testing 416 // Goff, Kennedy, Tseng 417 // PLDI 1991 418 bool DependenceAnalysis::intersectConstraints(Constraint *X, 419 const Constraint *Y) { 420 ++DeltaApplications; 421 DEBUG(dbgs() << "\tintersect constraints\n"); 422 DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 423 DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 424 assert(!Y->isPoint() && "Y must not be a Point"); 425 if (X->isAny()) { 426 if (Y->isAny()) 427 return false; 428 *X = *Y; 429 return true; 430 } 431 if (X->isEmpty()) 432 return false; 433 if (Y->isEmpty()) { 434 X->setEmpty(); 435 return true; 436 } 437 438 if (X->isDistance() && Y->isDistance()) { 439 DEBUG(dbgs() << "\t intersect 2 distances\n"); 440 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 441 return false; 442 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 443 X->setEmpty(); 444 ++DeltaSuccesses; 445 return true; 446 } 447 // Hmmm, interesting situation. 448 // I guess if either is constant, keep it and ignore the other. 449 if (isa<SCEVConstant>(Y->getD())) { 450 *X = *Y; 451 return true; 452 } 453 return false; 454 } 455 456 // At this point, the pseudo-code in Figure 4 of the paper 457 // checks if (X->isPoint() && Y->isPoint()). 458 // This case can't occur in our implementation, 459 // since a Point can only arise as the result of intersecting 460 // two Line constraints, and the right-hand value, Y, is never 461 // the result of an intersection. 462 assert(!(X->isPoint() && Y->isPoint()) && 463 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 464 465 if (X->isLine() && Y->isLine()) { 466 DEBUG(dbgs() << "\t intersect 2 lines\n"); 467 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 468 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 469 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 470 // slopes are equal, so lines are parallel 471 DEBUG(dbgs() << "\t\tsame slope\n"); 472 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 473 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 474 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 475 return false; 476 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 477 X->setEmpty(); 478 ++DeltaSuccesses; 479 return true; 480 } 481 return false; 482 } 483 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 484 // slopes differ, so lines intersect 485 DEBUG(dbgs() << "\t\tdifferent slopes\n"); 486 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 487 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 488 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 489 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 490 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 491 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 492 const SCEVConstant *C1A2_C2A1 = 493 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 494 const SCEVConstant *C1B2_C2B1 = 495 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 496 const SCEVConstant *A1B2_A2B1 = 497 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 498 const SCEVConstant *A2B1_A1B2 = 499 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 500 if (!C1B2_C2B1 || !C1A2_C2A1 || 501 !A1B2_A2B1 || !A2B1_A1B2) 502 return false; 503 APInt Xtop = C1B2_C2B1->getValue()->getValue(); 504 APInt Xbot = A1B2_A2B1->getValue()->getValue(); 505 APInt Ytop = C1A2_C2A1->getValue()->getValue(); 506 APInt Ybot = A2B1_A1B2->getValue()->getValue(); 507 DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 508 DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 509 DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 510 DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 511 APInt Xq = Xtop; // these need to be initialized, even 512 APInt Xr = Xtop; // though they're just going to be overwritten 513 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 514 APInt Yq = Ytop; 515 APInt Yr = Ytop; 516 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 517 if (Xr != 0 || Yr != 0) { 518 X->setEmpty(); 519 ++DeltaSuccesses; 520 return true; 521 } 522 DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 523 if (Xq.slt(0) || Yq.slt(0)) { 524 X->setEmpty(); 525 ++DeltaSuccesses; 526 return true; 527 } 528 if (const SCEVConstant *CUB = 529 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 530 APInt UpperBound = CUB->getValue()->getValue(); 531 DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 532 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 533 X->setEmpty(); 534 ++DeltaSuccesses; 535 return true; 536 } 537 } 538 X->setPoint(SE->getConstant(Xq), 539 SE->getConstant(Yq), 540 X->getAssociatedLoop()); 541 ++DeltaSuccesses; 542 return true; 543 } 544 return false; 545 } 546 547 // if (X->isLine() && Y->isPoint()) This case can't occur. 548 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 549 550 if (X->isPoint() && Y->isLine()) { 551 DEBUG(dbgs() << "\t intersect Point and Line\n"); 552 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 553 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 554 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 555 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 556 return false; 557 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 558 X->setEmpty(); 559 ++DeltaSuccesses; 560 return true; 561 } 562 return false; 563 } 564 565 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 566 return false; 567 } 568 569 570 //===----------------------------------------------------------------------===// 571 // DependenceAnalysis methods 572 573 // For debugging purposes. Dumps a dependence to OS. 574 void Dependence::dump(raw_ostream &OS) const { 575 bool Splitable = false; 576 if (isConfused()) 577 OS << "confused"; 578 else { 579 if (isConsistent()) 580 OS << "consistent "; 581 if (isFlow()) 582 OS << "flow"; 583 else if (isOutput()) 584 OS << "output"; 585 else if (isAnti()) 586 OS << "anti"; 587 else if (isInput()) 588 OS << "input"; 589 unsigned Levels = getLevels(); 590 OS << " ["; 591 for (unsigned II = 1; II <= Levels; ++II) { 592 if (isSplitable(II)) 593 Splitable = true; 594 if (isPeelFirst(II)) 595 OS << 'p'; 596 const SCEV *Distance = getDistance(II); 597 if (Distance) 598 OS << *Distance; 599 else if (isScalar(II)) 600 OS << "S"; 601 else { 602 unsigned Direction = getDirection(II); 603 if (Direction == DVEntry::ALL) 604 OS << "*"; 605 else { 606 if (Direction & DVEntry::LT) 607 OS << "<"; 608 if (Direction & DVEntry::EQ) 609 OS << "="; 610 if (Direction & DVEntry::GT) 611 OS << ">"; 612 } 613 } 614 if (isPeelLast(II)) 615 OS << 'p'; 616 if (II < Levels) 617 OS << " "; 618 } 619 if (isLoopIndependent()) 620 OS << "|<"; 621 OS << "]"; 622 if (Splitable) 623 OS << " splitable"; 624 } 625 OS << "!\n"; 626 } 627 628 static AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA, 629 const DataLayout &DL, 630 const Value *A, 631 const Value *B) { 632 const Value *AObj = GetUnderlyingObject(A, DL); 633 const Value *BObj = GetUnderlyingObject(B, DL); 634 return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()), 635 BObj, AA->getTypeStoreSize(BObj->getType())); 636 } 637 638 639 // Returns true if the load or store can be analyzed. Atomic and volatile 640 // operations have properties which this analysis does not understand. 641 static 642 bool isLoadOrStore(const Instruction *I) { 643 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 644 return LI->isUnordered(); 645 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 646 return SI->isUnordered(); 647 return false; 648 } 649 650 651 static 652 Value *getPointerOperand(Instruction *I) { 653 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 654 return LI->getPointerOperand(); 655 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 656 return SI->getPointerOperand(); 657 llvm_unreachable("Value is not load or store instruction"); 658 return nullptr; 659 } 660 661 662 // Examines the loop nesting of the Src and Dst 663 // instructions and establishes their shared loops. Sets the variables 664 // CommonLevels, SrcLevels, and MaxLevels. 665 // The source and destination instructions needn't be contained in the same 666 // loop. The routine establishNestingLevels finds the level of most deeply 667 // nested loop that contains them both, CommonLevels. An instruction that's 668 // not contained in a loop is at level = 0. MaxLevels is equal to the level 669 // of the source plus the level of the destination, minus CommonLevels. 670 // This lets us allocate vectors MaxLevels in length, with room for every 671 // distinct loop referenced in both the source and destination subscripts. 672 // The variable SrcLevels is the nesting depth of the source instruction. 673 // It's used to help calculate distinct loops referenced by the destination. 674 // Here's the map from loops to levels: 675 // 0 - unused 676 // 1 - outermost common loop 677 // ... - other common loops 678 // CommonLevels - innermost common loop 679 // ... - loops containing Src but not Dst 680 // SrcLevels - innermost loop containing Src but not Dst 681 // ... - loops containing Dst but not Src 682 // MaxLevels - innermost loops containing Dst but not Src 683 // Consider the follow code fragment: 684 // for (a = ...) { 685 // for (b = ...) { 686 // for (c = ...) { 687 // for (d = ...) { 688 // A[] = ...; 689 // } 690 // } 691 // for (e = ...) { 692 // for (f = ...) { 693 // for (g = ...) { 694 // ... = A[]; 695 // } 696 // } 697 // } 698 // } 699 // } 700 // If we're looking at the possibility of a dependence between the store 701 // to A (the Src) and the load from A (the Dst), we'll note that they 702 // have 2 loops in common, so CommonLevels will equal 2 and the direction 703 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 704 // A map from loop names to loop numbers would look like 705 // a - 1 706 // b - 2 = CommonLevels 707 // c - 3 708 // d - 4 = SrcLevels 709 // e - 5 710 // f - 6 711 // g - 7 = MaxLevels 712 void DependenceAnalysis::establishNestingLevels(const Instruction *Src, 713 const Instruction *Dst) { 714 const BasicBlock *SrcBlock = Src->getParent(); 715 const BasicBlock *DstBlock = Dst->getParent(); 716 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 717 unsigned DstLevel = LI->getLoopDepth(DstBlock); 718 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 719 const Loop *DstLoop = LI->getLoopFor(DstBlock); 720 SrcLevels = SrcLevel; 721 MaxLevels = SrcLevel + DstLevel; 722 while (SrcLevel > DstLevel) { 723 SrcLoop = SrcLoop->getParentLoop(); 724 SrcLevel--; 725 } 726 while (DstLevel > SrcLevel) { 727 DstLoop = DstLoop->getParentLoop(); 728 DstLevel--; 729 } 730 while (SrcLoop != DstLoop) { 731 SrcLoop = SrcLoop->getParentLoop(); 732 DstLoop = DstLoop->getParentLoop(); 733 SrcLevel--; 734 } 735 CommonLevels = SrcLevel; 736 MaxLevels -= CommonLevels; 737 } 738 739 740 // Given one of the loops containing the source, return 741 // its level index in our numbering scheme. 742 unsigned DependenceAnalysis::mapSrcLoop(const Loop *SrcLoop) const { 743 return SrcLoop->getLoopDepth(); 744 } 745 746 747 // Given one of the loops containing the destination, 748 // return its level index in our numbering scheme. 749 unsigned DependenceAnalysis::mapDstLoop(const Loop *DstLoop) const { 750 unsigned D = DstLoop->getLoopDepth(); 751 if (D > CommonLevels) 752 return D - CommonLevels + SrcLevels; 753 else 754 return D; 755 } 756 757 758 // Returns true if Expression is loop invariant in LoopNest. 759 bool DependenceAnalysis::isLoopInvariant(const SCEV *Expression, 760 const Loop *LoopNest) const { 761 if (!LoopNest) 762 return true; 763 return SE->isLoopInvariant(Expression, LoopNest) && 764 isLoopInvariant(Expression, LoopNest->getParentLoop()); 765 } 766 767 768 769 // Finds the set of loops from the LoopNest that 770 // have a level <= CommonLevels and are referred to by the SCEV Expression. 771 void DependenceAnalysis::collectCommonLoops(const SCEV *Expression, 772 const Loop *LoopNest, 773 SmallBitVector &Loops) const { 774 while (LoopNest) { 775 unsigned Level = LoopNest->getLoopDepth(); 776 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 777 Loops.set(Level); 778 LoopNest = LoopNest->getParentLoop(); 779 } 780 } 781 782 void DependenceAnalysis::unifySubscriptType(Subscript *Pair) { 783 const SCEV *Src = Pair->Src; 784 const SCEV *Dst = Pair->Dst; 785 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 786 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 787 if (SrcTy == nullptr || DstTy == nullptr) { 788 assert(SrcTy == DstTy && "This function only unify integer types and " 789 "expect Src and Dst share the same type " 790 "otherwise."); 791 return; 792 } 793 if (SrcTy->getBitWidth() > DstTy->getBitWidth()) { 794 // Sign-extend Dst to typeof(Src) if typeof(Src) is wider than typeof(Dst). 795 Pair->Dst = SE->getSignExtendExpr(Dst, SrcTy); 796 } else if (SrcTy->getBitWidth() < DstTy->getBitWidth()) { 797 // Sign-extend Src to typeof(Dst) if typeof(Dst) is wider than typeof(Src). 798 Pair->Src = SE->getSignExtendExpr(Src, DstTy); 799 } 800 } 801 802 // removeMatchingExtensions - Examines a subscript pair. 803 // If the source and destination are identically sign (or zero) 804 // extended, it strips off the extension in an effect to simplify 805 // the actual analysis. 806 void DependenceAnalysis::removeMatchingExtensions(Subscript *Pair) { 807 const SCEV *Src = Pair->Src; 808 const SCEV *Dst = Pair->Dst; 809 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 810 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 811 const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src); 812 const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst); 813 const SCEV *SrcCastOp = SrcCast->getOperand(); 814 const SCEV *DstCastOp = DstCast->getOperand(); 815 if (SrcCastOp->getType() == DstCastOp->getType()) { 816 Pair->Src = SrcCastOp; 817 Pair->Dst = DstCastOp; 818 } 819 } 820 } 821 822 823 // Examine the scev and return true iff it's linear. 824 // Collect any loops mentioned in the set of "Loops". 825 bool DependenceAnalysis::checkSrcSubscript(const SCEV *Src, 826 const Loop *LoopNest, 827 SmallBitVector &Loops) { 828 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src); 829 if (!AddRec) 830 return isLoopInvariant(Src, LoopNest); 831 const SCEV *Start = AddRec->getStart(); 832 const SCEV *Step = AddRec->getStepRecurrence(*SE); 833 if (!isLoopInvariant(Step, LoopNest)) 834 return false; 835 Loops.set(mapSrcLoop(AddRec->getLoop())); 836 return checkSrcSubscript(Start, LoopNest, Loops); 837 } 838 839 840 841 // Examine the scev and return true iff it's linear. 842 // Collect any loops mentioned in the set of "Loops". 843 bool DependenceAnalysis::checkDstSubscript(const SCEV *Dst, 844 const Loop *LoopNest, 845 SmallBitVector &Loops) { 846 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst); 847 if (!AddRec) 848 return isLoopInvariant(Dst, LoopNest); 849 const SCEV *Start = AddRec->getStart(); 850 const SCEV *Step = AddRec->getStepRecurrence(*SE); 851 if (!isLoopInvariant(Step, LoopNest)) 852 return false; 853 Loops.set(mapDstLoop(AddRec->getLoop())); 854 return checkDstSubscript(Start, LoopNest, Loops); 855 } 856 857 858 // Examines the subscript pair (the Src and Dst SCEVs) 859 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 860 // Collects the associated loops in a set. 861 DependenceAnalysis::Subscript::ClassificationKind 862 DependenceAnalysis::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 863 const SCEV *Dst, const Loop *DstLoopNest, 864 SmallBitVector &Loops) { 865 SmallBitVector SrcLoops(MaxLevels + 1); 866 SmallBitVector DstLoops(MaxLevels + 1); 867 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 868 return Subscript::NonLinear; 869 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 870 return Subscript::NonLinear; 871 Loops = SrcLoops; 872 Loops |= DstLoops; 873 unsigned N = Loops.count(); 874 if (N == 0) 875 return Subscript::ZIV; 876 if (N == 1) 877 return Subscript::SIV; 878 if (N == 2 && (SrcLoops.count() == 0 || 879 DstLoops.count() == 0 || 880 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 881 return Subscript::RDIV; 882 return Subscript::MIV; 883 } 884 885 886 // A wrapper around SCEV::isKnownPredicate. 887 // Looks for cases where we're interested in comparing for equality. 888 // If both X and Y have been identically sign or zero extended, 889 // it strips off the (confusing) extensions before invoking 890 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 891 // will be similarly updated. 892 // 893 // If SCEV::isKnownPredicate can't prove the predicate, 894 // we try simple subtraction, which seems to help in some cases 895 // involving symbolics. 896 bool DependenceAnalysis::isKnownPredicate(ICmpInst::Predicate Pred, 897 const SCEV *X, 898 const SCEV *Y) const { 899 if (Pred == CmpInst::ICMP_EQ || 900 Pred == CmpInst::ICMP_NE) { 901 if ((isa<SCEVSignExtendExpr>(X) && 902 isa<SCEVSignExtendExpr>(Y)) || 903 (isa<SCEVZeroExtendExpr>(X) && 904 isa<SCEVZeroExtendExpr>(Y))) { 905 const SCEVCastExpr *CX = cast<SCEVCastExpr>(X); 906 const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y); 907 const SCEV *Xop = CX->getOperand(); 908 const SCEV *Yop = CY->getOperand(); 909 if (Xop->getType() == Yop->getType()) { 910 X = Xop; 911 Y = Yop; 912 } 913 } 914 } 915 if (SE->isKnownPredicate(Pred, X, Y)) 916 return true; 917 // If SE->isKnownPredicate can't prove the condition, 918 // we try the brute-force approach of subtracting 919 // and testing the difference. 920 // By testing with SE->isKnownPredicate first, we avoid 921 // the possibility of overflow when the arguments are constants. 922 const SCEV *Delta = SE->getMinusSCEV(X, Y); 923 switch (Pred) { 924 case CmpInst::ICMP_EQ: 925 return Delta->isZero(); 926 case CmpInst::ICMP_NE: 927 return SE->isKnownNonZero(Delta); 928 case CmpInst::ICMP_SGE: 929 return SE->isKnownNonNegative(Delta); 930 case CmpInst::ICMP_SLE: 931 return SE->isKnownNonPositive(Delta); 932 case CmpInst::ICMP_SGT: 933 return SE->isKnownPositive(Delta); 934 case CmpInst::ICMP_SLT: 935 return SE->isKnownNegative(Delta); 936 default: 937 llvm_unreachable("unexpected predicate in isKnownPredicate"); 938 } 939 } 940 941 942 // All subscripts are all the same type. 943 // Loop bound may be smaller (e.g., a char). 944 // Should zero extend loop bound, since it's always >= 0. 945 // This routine collects upper bound and extends if needed. 946 // Return null if no bound available. 947 const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L, 948 Type *T) const { 949 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 950 const SCEV *UB = SE->getBackedgeTakenCount(L); 951 return SE->getNoopOrZeroExtend(UB, T); 952 } 953 return nullptr; 954 } 955 956 957 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 958 // If the cast fails, returns NULL. 959 const SCEVConstant *DependenceAnalysis::collectConstantUpperBound(const Loop *L, 960 Type *T 961 ) const { 962 if (const SCEV *UB = collectUpperBound(L, T)) 963 return dyn_cast<SCEVConstant>(UB); 964 return nullptr; 965 } 966 967 968 // testZIV - 969 // When we have a pair of subscripts of the form [c1] and [c2], 970 // where c1 and c2 are both loop invariant, we attack it using 971 // the ZIV test. Basically, we test by comparing the two values, 972 // but there are actually three possible results: 973 // 1) the values are equal, so there's a dependence 974 // 2) the values are different, so there's no dependence 975 // 3) the values might be equal, so we have to assume a dependence. 976 // 977 // Return true if dependence disproved. 978 bool DependenceAnalysis::testZIV(const SCEV *Src, 979 const SCEV *Dst, 980 FullDependence &Result) const { 981 DEBUG(dbgs() << " src = " << *Src << "\n"); 982 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 983 ++ZIVapplications; 984 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 985 DEBUG(dbgs() << " provably dependent\n"); 986 return false; // provably dependent 987 } 988 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 989 DEBUG(dbgs() << " provably independent\n"); 990 ++ZIVindependence; 991 return true; // provably independent 992 } 993 DEBUG(dbgs() << " possibly dependent\n"); 994 Result.Consistent = false; 995 return false; // possibly dependent 996 } 997 998 999 // strongSIVtest - 1000 // From the paper, Practical Dependence Testing, Section 4.2.1 1001 // 1002 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 1003 // where i is an induction variable, c1 and c2 are loop invariant, 1004 // and a is a constant, we can solve it exactly using the Strong SIV test. 1005 // 1006 // Can prove independence. Failing that, can compute distance (and direction). 1007 // In the presence of symbolic terms, we can sometimes make progress. 1008 // 1009 // If there's a dependence, 1010 // 1011 // c1 + a*i = c2 + a*i' 1012 // 1013 // The dependence distance is 1014 // 1015 // d = i' - i = (c1 - c2)/a 1016 // 1017 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1018 // loop's upper bound. If a dependence exists, the dependence direction is 1019 // defined as 1020 // 1021 // { < if d > 0 1022 // direction = { = if d = 0 1023 // { > if d < 0 1024 // 1025 // Return true if dependence disproved. 1026 bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff, 1027 const SCEV *SrcConst, 1028 const SCEV *DstConst, 1029 const Loop *CurLoop, 1030 unsigned Level, 1031 FullDependence &Result, 1032 Constraint &NewConstraint) const { 1033 DEBUG(dbgs() << "\tStrong SIV test\n"); 1034 DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1035 DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1036 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1037 DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1038 DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1039 DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1040 ++StrongSIVapplications; 1041 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1042 Level--; 1043 1044 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1045 DEBUG(dbgs() << "\t Delta = " << *Delta); 1046 DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1047 1048 // check that |Delta| < iteration count 1049 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1050 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1051 DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1052 const SCEV *AbsDelta = 1053 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1054 const SCEV *AbsCoeff = 1055 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1056 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1057 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1058 // Distance greater than trip count - no dependence 1059 ++StrongSIVindependence; 1060 ++StrongSIVsuccesses; 1061 return true; 1062 } 1063 } 1064 1065 // Can we compute distance? 1066 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1067 APInt ConstDelta = cast<SCEVConstant>(Delta)->getValue()->getValue(); 1068 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getValue()->getValue(); 1069 APInt Distance = ConstDelta; // these need to be initialized 1070 APInt Remainder = ConstDelta; 1071 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1072 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1073 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1074 // Make sure Coeff divides Delta exactly 1075 if (Remainder != 0) { 1076 // Coeff doesn't divide Distance, no dependence 1077 ++StrongSIVindependence; 1078 ++StrongSIVsuccesses; 1079 return true; 1080 } 1081 Result.DV[Level].Distance = SE->getConstant(Distance); 1082 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1083 if (Distance.sgt(0)) 1084 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1085 else if (Distance.slt(0)) 1086 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1087 else 1088 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1089 ++StrongSIVsuccesses; 1090 } 1091 else if (Delta->isZero()) { 1092 // since 0/X == 0 1093 Result.DV[Level].Distance = Delta; 1094 NewConstraint.setDistance(Delta, CurLoop); 1095 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1096 ++StrongSIVsuccesses; 1097 } 1098 else { 1099 if (Coeff->isOne()) { 1100 DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1101 Result.DV[Level].Distance = Delta; // since X/1 == X 1102 NewConstraint.setDistance(Delta, CurLoop); 1103 } 1104 else { 1105 Result.Consistent = false; 1106 NewConstraint.setLine(Coeff, 1107 SE->getNegativeSCEV(Coeff), 1108 SE->getNegativeSCEV(Delta), CurLoop); 1109 } 1110 1111 // maybe we can get a useful direction 1112 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1113 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1114 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1115 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1116 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1117 // The double negatives above are confusing. 1118 // It helps to read !SE->isKnownNonZero(Delta) 1119 // as "Delta might be Zero" 1120 unsigned NewDirection = Dependence::DVEntry::NONE; 1121 if ((DeltaMaybePositive && CoeffMaybePositive) || 1122 (DeltaMaybeNegative && CoeffMaybeNegative)) 1123 NewDirection = Dependence::DVEntry::LT; 1124 if (DeltaMaybeZero) 1125 NewDirection |= Dependence::DVEntry::EQ; 1126 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1127 (DeltaMaybePositive && CoeffMaybeNegative)) 1128 NewDirection |= Dependence::DVEntry::GT; 1129 if (NewDirection < Result.DV[Level].Direction) 1130 ++StrongSIVsuccesses; 1131 Result.DV[Level].Direction &= NewDirection; 1132 } 1133 return false; 1134 } 1135 1136 1137 // weakCrossingSIVtest - 1138 // From the paper, Practical Dependence Testing, Section 4.2.2 1139 // 1140 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1141 // where i is an induction variable, c1 and c2 are loop invariant, 1142 // and a is a constant, we can solve it exactly using the 1143 // Weak-Crossing SIV test. 1144 // 1145 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1146 // the two lines, where i = i', yielding 1147 // 1148 // c1 + a*i = c2 - a*i 1149 // 2a*i = c2 - c1 1150 // i = (c2 - c1)/2a 1151 // 1152 // If i < 0, there is no dependence. 1153 // If i > upperbound, there is no dependence. 1154 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1155 // If i = upperbound, there's a dependence with distance = 0. 1156 // If i is integral, there's a dependence (all directions). 1157 // If the non-integer part = 1/2, there's a dependence (<> directions). 1158 // Otherwise, there's no dependence. 1159 // 1160 // Can prove independence. Failing that, 1161 // can sometimes refine the directions. 1162 // Can determine iteration for splitting. 1163 // 1164 // Return true if dependence disproved. 1165 bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff, 1166 const SCEV *SrcConst, 1167 const SCEV *DstConst, 1168 const Loop *CurLoop, 1169 unsigned Level, 1170 FullDependence &Result, 1171 Constraint &NewConstraint, 1172 const SCEV *&SplitIter) const { 1173 DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1174 DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1175 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1176 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1177 ++WeakCrossingSIVapplications; 1178 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1179 Level--; 1180 Result.Consistent = false; 1181 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1182 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1183 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1184 if (Delta->isZero()) { 1185 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1186 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1187 ++WeakCrossingSIVsuccesses; 1188 if (!Result.DV[Level].Direction) { 1189 ++WeakCrossingSIVindependence; 1190 return true; 1191 } 1192 Result.DV[Level].Distance = Delta; // = 0 1193 return false; 1194 } 1195 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1196 if (!ConstCoeff) 1197 return false; 1198 1199 Result.DV[Level].Splitable = true; 1200 if (SE->isKnownNegative(ConstCoeff)) { 1201 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1202 assert(ConstCoeff && 1203 "dynamic cast of negative of ConstCoeff should yield constant"); 1204 Delta = SE->getNegativeSCEV(Delta); 1205 } 1206 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1207 1208 // compute SplitIter for use by DependenceAnalysis::getSplitIteration() 1209 SplitIter = 1210 SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0), 1211 Delta), 1212 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), 1213 ConstCoeff)); 1214 DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1215 1216 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1217 if (!ConstDelta) 1218 return false; 1219 1220 // We're certain that ConstCoeff > 0; therefore, 1221 // if Delta < 0, then no dependence. 1222 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1223 DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1224 if (SE->isKnownNegative(Delta)) { 1225 // No dependence, Delta < 0 1226 ++WeakCrossingSIVindependence; 1227 ++WeakCrossingSIVsuccesses; 1228 return true; 1229 } 1230 1231 // We're certain that Delta > 0 and ConstCoeff > 0. 1232 // Check Delta/(2*ConstCoeff) against upper loop bound 1233 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1234 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1235 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1236 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1237 ConstantTwo); 1238 DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1239 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1240 // Delta too big, no dependence 1241 ++WeakCrossingSIVindependence; 1242 ++WeakCrossingSIVsuccesses; 1243 return true; 1244 } 1245 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1246 // i = i' = UB 1247 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1248 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1249 ++WeakCrossingSIVsuccesses; 1250 if (!Result.DV[Level].Direction) { 1251 ++WeakCrossingSIVindependence; 1252 return true; 1253 } 1254 Result.DV[Level].Splitable = false; 1255 Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0); 1256 return false; 1257 } 1258 } 1259 1260 // check that Coeff divides Delta 1261 APInt APDelta = ConstDelta->getValue()->getValue(); 1262 APInt APCoeff = ConstCoeff->getValue()->getValue(); 1263 APInt Distance = APDelta; // these need to be initialzed 1264 APInt Remainder = APDelta; 1265 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1266 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1267 if (Remainder != 0) { 1268 // Coeff doesn't divide Delta, no dependence 1269 ++WeakCrossingSIVindependence; 1270 ++WeakCrossingSIVsuccesses; 1271 return true; 1272 } 1273 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1274 1275 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1276 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1277 Remainder = Distance.srem(Two); 1278 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1279 if (Remainder != 0) { 1280 // Equal direction isn't possible 1281 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1282 ++WeakCrossingSIVsuccesses; 1283 } 1284 return false; 1285 } 1286 1287 1288 // Kirch's algorithm, from 1289 // 1290 // Optimizing Supercompilers for Supercomputers 1291 // Michael Wolfe 1292 // MIT Press, 1989 1293 // 1294 // Program 2.1, page 29. 1295 // Computes the GCD of AM and BM. 1296 // Also finds a solution to the equation ax - by = gcd(a, b). 1297 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1298 static 1299 bool findGCD(unsigned Bits, APInt AM, APInt BM, APInt Delta, 1300 APInt &G, APInt &X, APInt &Y) { 1301 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1302 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1303 APInt G0 = AM.abs(); 1304 APInt G1 = BM.abs(); 1305 APInt Q = G0; // these need to be initialized 1306 APInt R = G0; 1307 APInt::sdivrem(G0, G1, Q, R); 1308 while (R != 0) { 1309 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1310 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1311 G0 = G1; G1 = R; 1312 APInt::sdivrem(G0, G1, Q, R); 1313 } 1314 G = G1; 1315 DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1316 X = AM.slt(0) ? -A1 : A1; 1317 Y = BM.slt(0) ? B1 : -B1; 1318 1319 // make sure gcd divides Delta 1320 R = Delta.srem(G); 1321 if (R != 0) 1322 return true; // gcd doesn't divide Delta, no dependence 1323 Q = Delta.sdiv(G); 1324 X *= Q; 1325 Y *= Q; 1326 return false; 1327 } 1328 1329 1330 static 1331 APInt floorOfQuotient(APInt A, APInt B) { 1332 APInt Q = A; // these need to be initialized 1333 APInt R = A; 1334 APInt::sdivrem(A, B, Q, R); 1335 if (R == 0) 1336 return Q; 1337 if ((A.sgt(0) && B.sgt(0)) || 1338 (A.slt(0) && B.slt(0))) 1339 return Q; 1340 else 1341 return Q - 1; 1342 } 1343 1344 1345 static 1346 APInt ceilingOfQuotient(APInt A, APInt B) { 1347 APInt Q = A; // these need to be initialized 1348 APInt R = A; 1349 APInt::sdivrem(A, B, Q, R); 1350 if (R == 0) 1351 return Q; 1352 if ((A.sgt(0) && B.sgt(0)) || 1353 (A.slt(0) && B.slt(0))) 1354 return Q + 1; 1355 else 1356 return Q; 1357 } 1358 1359 1360 static 1361 APInt maxAPInt(APInt A, APInt B) { 1362 return A.sgt(B) ? A : B; 1363 } 1364 1365 1366 static 1367 APInt minAPInt(APInt A, APInt B) { 1368 return A.slt(B) ? A : B; 1369 } 1370 1371 1372 // exactSIVtest - 1373 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1374 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1375 // and a2 are constant, we can solve it exactly using an algorithm developed 1376 // by Banerjee and Wolfe. See Section 2.5.3 in 1377 // 1378 // Optimizing Supercompilers for Supercomputers 1379 // Michael Wolfe 1380 // MIT Press, 1989 1381 // 1382 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1383 // so use them if possible. They're also a bit better with symbolics and, 1384 // in the case of the strong SIV test, can compute Distances. 1385 // 1386 // Return true if dependence disproved. 1387 bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff, 1388 const SCEV *DstCoeff, 1389 const SCEV *SrcConst, 1390 const SCEV *DstConst, 1391 const Loop *CurLoop, 1392 unsigned Level, 1393 FullDependence &Result, 1394 Constraint &NewConstraint) const { 1395 DEBUG(dbgs() << "\tExact SIV test\n"); 1396 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1397 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1398 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1399 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1400 ++ExactSIVapplications; 1401 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1402 Level--; 1403 Result.Consistent = false; 1404 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1405 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1406 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), 1407 Delta, CurLoop); 1408 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1409 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1410 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1411 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1412 return false; 1413 1414 // find gcd 1415 APInt G, X, Y; 1416 APInt AM = ConstSrcCoeff->getValue()->getValue(); 1417 APInt BM = ConstDstCoeff->getValue()->getValue(); 1418 unsigned Bits = AM.getBitWidth(); 1419 if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { 1420 // gcd doesn't divide Delta, no dependence 1421 ++ExactSIVindependence; 1422 ++ExactSIVsuccesses; 1423 return true; 1424 } 1425 1426 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1427 1428 // since SCEV construction normalizes, LM = 0 1429 APInt UM(Bits, 1, true); 1430 bool UMvalid = false; 1431 // UM is perhaps unavailable, let's check 1432 if (const SCEVConstant *CUB = 1433 collectConstantUpperBound(CurLoop, Delta->getType())) { 1434 UM = CUB->getValue()->getValue(); 1435 DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1436 UMvalid = true; 1437 } 1438 1439 APInt TU(APInt::getSignedMaxValue(Bits)); 1440 APInt TL(APInt::getSignedMinValue(Bits)); 1441 1442 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1443 APInt TMUL = BM.sdiv(G); 1444 if (TMUL.sgt(0)) { 1445 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1446 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1447 if (UMvalid) { 1448 TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); 1449 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1450 } 1451 } 1452 else { 1453 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1454 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1455 if (UMvalid) { 1456 TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); 1457 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1458 } 1459 } 1460 1461 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1462 TMUL = AM.sdiv(G); 1463 if (TMUL.sgt(0)) { 1464 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1465 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1466 if (UMvalid) { 1467 TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); 1468 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1469 } 1470 } 1471 else { 1472 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1473 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1474 if (UMvalid) { 1475 TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); 1476 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1477 } 1478 } 1479 if (TL.sgt(TU)) { 1480 ++ExactSIVindependence; 1481 ++ExactSIVsuccesses; 1482 return true; 1483 } 1484 1485 // explore directions 1486 unsigned NewDirection = Dependence::DVEntry::NONE; 1487 1488 // less than 1489 APInt SaveTU(TU); // save these 1490 APInt SaveTL(TL); 1491 DEBUG(dbgs() << "\t exploring LT direction\n"); 1492 TMUL = AM - BM; 1493 if (TMUL.sgt(0)) { 1494 TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); 1495 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1496 } 1497 else { 1498 TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); 1499 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1500 } 1501 if (TL.sle(TU)) { 1502 NewDirection |= Dependence::DVEntry::LT; 1503 ++ExactSIVsuccesses; 1504 } 1505 1506 // equal 1507 TU = SaveTU; // restore 1508 TL = SaveTL; 1509 DEBUG(dbgs() << "\t exploring EQ direction\n"); 1510 if (TMUL.sgt(0)) { 1511 TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); 1512 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1513 } 1514 else { 1515 TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); 1516 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1517 } 1518 TMUL = BM - AM; 1519 if (TMUL.sgt(0)) { 1520 TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); 1521 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1522 } 1523 else { 1524 TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); 1525 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1526 } 1527 if (TL.sle(TU)) { 1528 NewDirection |= Dependence::DVEntry::EQ; 1529 ++ExactSIVsuccesses; 1530 } 1531 1532 // greater than 1533 TU = SaveTU; // restore 1534 TL = SaveTL; 1535 DEBUG(dbgs() << "\t exploring GT direction\n"); 1536 if (TMUL.sgt(0)) { 1537 TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); 1538 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1539 } 1540 else { 1541 TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); 1542 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1543 } 1544 if (TL.sle(TU)) { 1545 NewDirection |= Dependence::DVEntry::GT; 1546 ++ExactSIVsuccesses; 1547 } 1548 1549 // finished 1550 Result.DV[Level].Direction &= NewDirection; 1551 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1552 ++ExactSIVindependence; 1553 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1554 } 1555 1556 1557 1558 // Return true if the divisor evenly divides the dividend. 1559 static 1560 bool isRemainderZero(const SCEVConstant *Dividend, 1561 const SCEVConstant *Divisor) { 1562 APInt ConstDividend = Dividend->getValue()->getValue(); 1563 APInt ConstDivisor = Divisor->getValue()->getValue(); 1564 return ConstDividend.srem(ConstDivisor) == 0; 1565 } 1566 1567 1568 // weakZeroSrcSIVtest - 1569 // From the paper, Practical Dependence Testing, Section 4.2.2 1570 // 1571 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1572 // where i is an induction variable, c1 and c2 are loop invariant, 1573 // and a is a constant, we can solve it exactly using the 1574 // Weak-Zero SIV test. 1575 // 1576 // Given 1577 // 1578 // c1 = c2 + a*i 1579 // 1580 // we get 1581 // 1582 // (c1 - c2)/a = i 1583 // 1584 // If i is not an integer, there's no dependence. 1585 // If i < 0 or > UB, there's no dependence. 1586 // If i = 0, the direction is <= and peeling the 1587 // 1st iteration will break the dependence. 1588 // If i = UB, the direction is >= and peeling the 1589 // last iteration will break the dependence. 1590 // Otherwise, the direction is *. 1591 // 1592 // Can prove independence. Failing that, we can sometimes refine 1593 // the directions. Can sometimes show that first or last 1594 // iteration carries all the dependences (so worth peeling). 1595 // 1596 // (see also weakZeroDstSIVtest) 1597 // 1598 // Return true if dependence disproved. 1599 bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1600 const SCEV *SrcConst, 1601 const SCEV *DstConst, 1602 const Loop *CurLoop, 1603 unsigned Level, 1604 FullDependence &Result, 1605 Constraint &NewConstraint) const { 1606 // For the WeakSIV test, it's possible the loop isn't common to 1607 // the Src and Dst loops. If it isn't, then there's no need to 1608 // record a direction. 1609 DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1610 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1611 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1612 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1613 ++WeakZeroSIVapplications; 1614 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1615 Level--; 1616 Result.Consistent = false; 1617 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1618 NewConstraint.setLine(SE->getConstant(Delta->getType(), 0), 1619 DstCoeff, Delta, CurLoop); 1620 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1621 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1622 if (Level < CommonLevels) { 1623 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1624 Result.DV[Level].PeelFirst = true; 1625 ++WeakZeroSIVsuccesses; 1626 } 1627 return false; // dependences caused by first iteration 1628 } 1629 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1630 if (!ConstCoeff) 1631 return false; 1632 const SCEV *AbsCoeff = 1633 SE->isKnownNegative(ConstCoeff) ? 1634 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1635 const SCEV *NewDelta = 1636 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1637 1638 // check that Delta/SrcCoeff < iteration count 1639 // really check NewDelta < count*AbsCoeff 1640 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1641 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1642 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1643 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1644 ++WeakZeroSIVindependence; 1645 ++WeakZeroSIVsuccesses; 1646 return true; 1647 } 1648 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1649 // dependences caused by last iteration 1650 if (Level < CommonLevels) { 1651 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1652 Result.DV[Level].PeelLast = true; 1653 ++WeakZeroSIVsuccesses; 1654 } 1655 return false; 1656 } 1657 } 1658 1659 // check that Delta/SrcCoeff >= 0 1660 // really check that NewDelta >= 0 1661 if (SE->isKnownNegative(NewDelta)) { 1662 // No dependence, newDelta < 0 1663 ++WeakZeroSIVindependence; 1664 ++WeakZeroSIVsuccesses; 1665 return true; 1666 } 1667 1668 // if SrcCoeff doesn't divide Delta, then no dependence 1669 if (isa<SCEVConstant>(Delta) && 1670 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1671 ++WeakZeroSIVindependence; 1672 ++WeakZeroSIVsuccesses; 1673 return true; 1674 } 1675 return false; 1676 } 1677 1678 1679 // weakZeroDstSIVtest - 1680 // From the paper, Practical Dependence Testing, Section 4.2.2 1681 // 1682 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1683 // where i is an induction variable, c1 and c2 are loop invariant, 1684 // and a is a constant, we can solve it exactly using the 1685 // Weak-Zero SIV test. 1686 // 1687 // Given 1688 // 1689 // c1 + a*i = c2 1690 // 1691 // we get 1692 // 1693 // i = (c2 - c1)/a 1694 // 1695 // If i is not an integer, there's no dependence. 1696 // If i < 0 or > UB, there's no dependence. 1697 // If i = 0, the direction is <= and peeling the 1698 // 1st iteration will break the dependence. 1699 // If i = UB, the direction is >= and peeling the 1700 // last iteration will break the dependence. 1701 // Otherwise, the direction is *. 1702 // 1703 // Can prove independence. Failing that, we can sometimes refine 1704 // the directions. Can sometimes show that first or last 1705 // iteration carries all the dependences (so worth peeling). 1706 // 1707 // (see also weakZeroSrcSIVtest) 1708 // 1709 // Return true if dependence disproved. 1710 bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1711 const SCEV *SrcConst, 1712 const SCEV *DstConst, 1713 const Loop *CurLoop, 1714 unsigned Level, 1715 FullDependence &Result, 1716 Constraint &NewConstraint) const { 1717 // For the WeakSIV test, it's possible the loop isn't common to the 1718 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1719 DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1720 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1721 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1722 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1723 ++WeakZeroSIVapplications; 1724 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1725 Level--; 1726 Result.Consistent = false; 1727 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1728 NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0), 1729 Delta, CurLoop); 1730 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1731 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1732 if (Level < CommonLevels) { 1733 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1734 Result.DV[Level].PeelFirst = true; 1735 ++WeakZeroSIVsuccesses; 1736 } 1737 return false; // dependences caused by first iteration 1738 } 1739 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1740 if (!ConstCoeff) 1741 return false; 1742 const SCEV *AbsCoeff = 1743 SE->isKnownNegative(ConstCoeff) ? 1744 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1745 const SCEV *NewDelta = 1746 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1747 1748 // check that Delta/SrcCoeff < iteration count 1749 // really check NewDelta < count*AbsCoeff 1750 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1751 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1752 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1753 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1754 ++WeakZeroSIVindependence; 1755 ++WeakZeroSIVsuccesses; 1756 return true; 1757 } 1758 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1759 // dependences caused by last iteration 1760 if (Level < CommonLevels) { 1761 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1762 Result.DV[Level].PeelLast = true; 1763 ++WeakZeroSIVsuccesses; 1764 } 1765 return false; 1766 } 1767 } 1768 1769 // check that Delta/SrcCoeff >= 0 1770 // really check that NewDelta >= 0 1771 if (SE->isKnownNegative(NewDelta)) { 1772 // No dependence, newDelta < 0 1773 ++WeakZeroSIVindependence; 1774 ++WeakZeroSIVsuccesses; 1775 return true; 1776 } 1777 1778 // if SrcCoeff doesn't divide Delta, then no dependence 1779 if (isa<SCEVConstant>(Delta) && 1780 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1781 ++WeakZeroSIVindependence; 1782 ++WeakZeroSIVsuccesses; 1783 return true; 1784 } 1785 return false; 1786 } 1787 1788 1789 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1790 // Things of the form [c1 + a*i] and [c2 + b*j], 1791 // where i and j are induction variable, c1 and c2 are loop invariant, 1792 // and a and b are constants. 1793 // Returns true if any possible dependence is disproved. 1794 // Marks the result as inconsistent. 1795 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1796 bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff, 1797 const SCEV *DstCoeff, 1798 const SCEV *SrcConst, 1799 const SCEV *DstConst, 1800 const Loop *SrcLoop, 1801 const Loop *DstLoop, 1802 FullDependence &Result) const { 1803 DEBUG(dbgs() << "\tExact RDIV test\n"); 1804 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1805 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1806 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1807 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1808 ++ExactRDIVapplications; 1809 Result.Consistent = false; 1810 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1811 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1812 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1813 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1814 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1815 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1816 return false; 1817 1818 // find gcd 1819 APInt G, X, Y; 1820 APInt AM = ConstSrcCoeff->getValue()->getValue(); 1821 APInt BM = ConstDstCoeff->getValue()->getValue(); 1822 unsigned Bits = AM.getBitWidth(); 1823 if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { 1824 // gcd doesn't divide Delta, no dependence 1825 ++ExactRDIVindependence; 1826 return true; 1827 } 1828 1829 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1830 1831 // since SCEV construction seems to normalize, LM = 0 1832 APInt SrcUM(Bits, 1, true); 1833 bool SrcUMvalid = false; 1834 // SrcUM is perhaps unavailable, let's check 1835 if (const SCEVConstant *UpperBound = 1836 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1837 SrcUM = UpperBound->getValue()->getValue(); 1838 DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1839 SrcUMvalid = true; 1840 } 1841 1842 APInt DstUM(Bits, 1, true); 1843 bool DstUMvalid = false; 1844 // UM is perhaps unavailable, let's check 1845 if (const SCEVConstant *UpperBound = 1846 collectConstantUpperBound(DstLoop, Delta->getType())) { 1847 DstUM = UpperBound->getValue()->getValue(); 1848 DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1849 DstUMvalid = true; 1850 } 1851 1852 APInt TU(APInt::getSignedMaxValue(Bits)); 1853 APInt TL(APInt::getSignedMinValue(Bits)); 1854 1855 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1856 APInt TMUL = BM.sdiv(G); 1857 if (TMUL.sgt(0)) { 1858 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1859 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1860 if (SrcUMvalid) { 1861 TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); 1862 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1863 } 1864 } 1865 else { 1866 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1867 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1868 if (SrcUMvalid) { 1869 TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); 1870 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1871 } 1872 } 1873 1874 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1875 TMUL = AM.sdiv(G); 1876 if (TMUL.sgt(0)) { 1877 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1878 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1879 if (DstUMvalid) { 1880 TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); 1881 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1882 } 1883 } 1884 else { 1885 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1886 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1887 if (DstUMvalid) { 1888 TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); 1889 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1890 } 1891 } 1892 if (TL.sgt(TU)) 1893 ++ExactRDIVindependence; 1894 return TL.sgt(TU); 1895 } 1896 1897 1898 // symbolicRDIVtest - 1899 // In Section 4.5 of the Practical Dependence Testing paper,the authors 1900 // introduce a special case of Banerjee's Inequalities (also called the 1901 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 1902 // particularly cases with symbolics. Since it's only able to disprove 1903 // dependence (not compute distances or directions), we'll use it as a 1904 // fall back for the other tests. 1905 // 1906 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 1907 // where i and j are induction variables and c1 and c2 are loop invariants, 1908 // we can use the symbolic tests to disprove some dependences, serving as a 1909 // backup for the RDIV test. Note that i and j can be the same variable, 1910 // letting this test serve as a backup for the various SIV tests. 1911 // 1912 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 1913 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 1914 // loop bounds for the i and j loops, respectively. So, ... 1915 // 1916 // c1 + a1*i = c2 + a2*j 1917 // a1*i - a2*j = c2 - c1 1918 // 1919 // To test for a dependence, we compute c2 - c1 and make sure it's in the 1920 // range of the maximum and minimum possible values of a1*i - a2*j. 1921 // Considering the signs of a1 and a2, we have 4 possible cases: 1922 // 1923 // 1) If a1 >= 0 and a2 >= 0, then 1924 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 1925 // -a2*N2 <= c2 - c1 <= a1*N1 1926 // 1927 // 2) If a1 >= 0 and a2 <= 0, then 1928 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 1929 // 0 <= c2 - c1 <= a1*N1 - a2*N2 1930 // 1931 // 3) If a1 <= 0 and a2 >= 0, then 1932 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 1933 // a1*N1 - a2*N2 <= c2 - c1 <= 0 1934 // 1935 // 4) If a1 <= 0 and a2 <= 0, then 1936 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 1937 // a1*N1 <= c2 - c1 <= -a2*N2 1938 // 1939 // return true if dependence disproved 1940 bool DependenceAnalysis::symbolicRDIVtest(const SCEV *A1, 1941 const SCEV *A2, 1942 const SCEV *C1, 1943 const SCEV *C2, 1944 const Loop *Loop1, 1945 const Loop *Loop2) const { 1946 ++SymbolicRDIVapplications; 1947 DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 1948 DEBUG(dbgs() << "\t A1 = " << *A1); 1949 DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 1950 DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 1951 DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 1952 DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 1953 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 1954 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 1955 DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 1956 DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 1957 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 1958 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 1959 DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 1960 DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 1961 if (SE->isKnownNonNegative(A1)) { 1962 if (SE->isKnownNonNegative(A2)) { 1963 // A1 >= 0 && A2 >= 0 1964 if (N1) { 1965 // make sure that c2 - c1 <= a1*N1 1966 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1967 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 1968 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 1969 ++SymbolicRDIVindependence; 1970 return true; 1971 } 1972 } 1973 if (N2) { 1974 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 1975 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 1976 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 1977 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 1978 ++SymbolicRDIVindependence; 1979 return true; 1980 } 1981 } 1982 } 1983 else if (SE->isKnownNonPositive(A2)) { 1984 // a1 >= 0 && a2 <= 0 1985 if (N1 && N2) { 1986 // make sure that c2 - c1 <= a1*N1 - a2*N2 1987 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1988 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 1989 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 1990 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 1991 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 1992 ++SymbolicRDIVindependence; 1993 return true; 1994 } 1995 } 1996 // make sure that 0 <= c2 - c1 1997 if (SE->isKnownNegative(C2_C1)) { 1998 ++SymbolicRDIVindependence; 1999 return true; 2000 } 2001 } 2002 } 2003 else if (SE->isKnownNonPositive(A1)) { 2004 if (SE->isKnownNonNegative(A2)) { 2005 // a1 <= 0 && a2 >= 0 2006 if (N1 && N2) { 2007 // make sure that a1*N1 - a2*N2 <= c2 - c1 2008 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2009 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2010 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2011 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2012 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 2013 ++SymbolicRDIVindependence; 2014 return true; 2015 } 2016 } 2017 // make sure that c2 - c1 <= 0 2018 if (SE->isKnownPositive(C2_C1)) { 2019 ++SymbolicRDIVindependence; 2020 return true; 2021 } 2022 } 2023 else if (SE->isKnownNonPositive(A2)) { 2024 // a1 <= 0 && a2 <= 0 2025 if (N1) { 2026 // make sure that a1*N1 <= c2 - c1 2027 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2028 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2029 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2030 ++SymbolicRDIVindependence; 2031 return true; 2032 } 2033 } 2034 if (N2) { 2035 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2036 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2037 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2038 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2039 ++SymbolicRDIVindependence; 2040 return true; 2041 } 2042 } 2043 } 2044 } 2045 return false; 2046 } 2047 2048 2049 // testSIV - 2050 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2051 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2052 // a2 are constant, we attack it with an SIV test. While they can all be 2053 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2054 // they apply; they're cheaper and sometimes more precise. 2055 // 2056 // Return true if dependence disproved. 2057 bool DependenceAnalysis::testSIV(const SCEV *Src, 2058 const SCEV *Dst, 2059 unsigned &Level, 2060 FullDependence &Result, 2061 Constraint &NewConstraint, 2062 const SCEV *&SplitIter) const { 2063 DEBUG(dbgs() << " src = " << *Src << "\n"); 2064 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2065 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2066 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2067 if (SrcAddRec && DstAddRec) { 2068 const SCEV *SrcConst = SrcAddRec->getStart(); 2069 const SCEV *DstConst = DstAddRec->getStart(); 2070 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2071 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2072 const Loop *CurLoop = SrcAddRec->getLoop(); 2073 assert(CurLoop == DstAddRec->getLoop() && 2074 "both loops in SIV should be same"); 2075 Level = mapSrcLoop(CurLoop); 2076 bool disproven; 2077 if (SrcCoeff == DstCoeff) 2078 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2079 Level, Result, NewConstraint); 2080 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2081 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2082 Level, Result, NewConstraint, SplitIter); 2083 else 2084 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2085 Level, Result, NewConstraint); 2086 return disproven || 2087 gcdMIVtest(Src, Dst, Result) || 2088 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2089 } 2090 if (SrcAddRec) { 2091 const SCEV *SrcConst = SrcAddRec->getStart(); 2092 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2093 const SCEV *DstConst = Dst; 2094 const Loop *CurLoop = SrcAddRec->getLoop(); 2095 Level = mapSrcLoop(CurLoop); 2096 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2097 Level, Result, NewConstraint) || 2098 gcdMIVtest(Src, Dst, Result); 2099 } 2100 if (DstAddRec) { 2101 const SCEV *DstConst = DstAddRec->getStart(); 2102 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2103 const SCEV *SrcConst = Src; 2104 const Loop *CurLoop = DstAddRec->getLoop(); 2105 Level = mapDstLoop(CurLoop); 2106 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2107 CurLoop, Level, Result, NewConstraint) || 2108 gcdMIVtest(Src, Dst, Result); 2109 } 2110 llvm_unreachable("SIV test expected at least one AddRec"); 2111 return false; 2112 } 2113 2114 2115 // testRDIV - 2116 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2117 // where i and j are induction variables, c1 and c2 are loop invariant, 2118 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2119 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2120 // It doesn't make sense to talk about distance or direction in this case, 2121 // so there's no point in making special versions of the Strong SIV test or 2122 // the Weak-crossing SIV test. 2123 // 2124 // With minor algebra, this test can also be used for things like 2125 // [c1 + a1*i + a2*j][c2]. 2126 // 2127 // Return true if dependence disproved. 2128 bool DependenceAnalysis::testRDIV(const SCEV *Src, 2129 const SCEV *Dst, 2130 FullDependence &Result) const { 2131 // we have 3 possible situations here: 2132 // 1) [a*i + b] and [c*j + d] 2133 // 2) [a*i + c*j + b] and [d] 2134 // 3) [b] and [a*i + c*j + d] 2135 // We need to find what we've got and get organized 2136 2137 const SCEV *SrcConst, *DstConst; 2138 const SCEV *SrcCoeff, *DstCoeff; 2139 const Loop *SrcLoop, *DstLoop; 2140 2141 DEBUG(dbgs() << " src = " << *Src << "\n"); 2142 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2143 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2144 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2145 if (SrcAddRec && DstAddRec) { 2146 SrcConst = SrcAddRec->getStart(); 2147 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2148 SrcLoop = SrcAddRec->getLoop(); 2149 DstConst = DstAddRec->getStart(); 2150 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2151 DstLoop = DstAddRec->getLoop(); 2152 } 2153 else if (SrcAddRec) { 2154 if (const SCEVAddRecExpr *tmpAddRec = 2155 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2156 SrcConst = tmpAddRec->getStart(); 2157 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2158 SrcLoop = tmpAddRec->getLoop(); 2159 DstConst = Dst; 2160 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2161 DstLoop = SrcAddRec->getLoop(); 2162 } 2163 else 2164 llvm_unreachable("RDIV reached by surprising SCEVs"); 2165 } 2166 else if (DstAddRec) { 2167 if (const SCEVAddRecExpr *tmpAddRec = 2168 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2169 DstConst = tmpAddRec->getStart(); 2170 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2171 DstLoop = tmpAddRec->getLoop(); 2172 SrcConst = Src; 2173 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2174 SrcLoop = DstAddRec->getLoop(); 2175 } 2176 else 2177 llvm_unreachable("RDIV reached by surprising SCEVs"); 2178 } 2179 else 2180 llvm_unreachable("RDIV expected at least one AddRec"); 2181 return exactRDIVtest(SrcCoeff, DstCoeff, 2182 SrcConst, DstConst, 2183 SrcLoop, DstLoop, 2184 Result) || 2185 gcdMIVtest(Src, Dst, Result) || 2186 symbolicRDIVtest(SrcCoeff, DstCoeff, 2187 SrcConst, DstConst, 2188 SrcLoop, DstLoop); 2189 } 2190 2191 2192 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2193 // Return true if dependence disproved. 2194 // Can sometimes refine direction vectors. 2195 bool DependenceAnalysis::testMIV(const SCEV *Src, 2196 const SCEV *Dst, 2197 const SmallBitVector &Loops, 2198 FullDependence &Result) const { 2199 DEBUG(dbgs() << " src = " << *Src << "\n"); 2200 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2201 Result.Consistent = false; 2202 return gcdMIVtest(Src, Dst, Result) || 2203 banerjeeMIVtest(Src, Dst, Loops, Result); 2204 } 2205 2206 2207 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2208 // in this case 10. If there is no constant part, returns NULL. 2209 static 2210 const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) { 2211 for (unsigned Op = 0, Ops = Product->getNumOperands(); Op < Ops; Op++) { 2212 if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Product->getOperand(Op))) 2213 return Constant; 2214 } 2215 return nullptr; 2216 } 2217 2218 2219 //===----------------------------------------------------------------------===// 2220 // gcdMIVtest - 2221 // Tests an MIV subscript pair for dependence. 2222 // Returns true if any possible dependence is disproved. 2223 // Marks the result as inconsistent. 2224 // Can sometimes disprove the equal direction for 1 or more loops, 2225 // as discussed in Michael Wolfe's book, 2226 // High Performance Compilers for Parallel Computing, page 235. 2227 // 2228 // We spend some effort (code!) to handle cases like 2229 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2230 // but M and N are just loop-invariant variables. 2231 // This should help us handle linearized subscripts; 2232 // also makes this test a useful backup to the various SIV tests. 2233 // 2234 // It occurs to me that the presence of loop-invariant variables 2235 // changes the nature of the test from "greatest common divisor" 2236 // to "a common divisor". 2237 bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, 2238 const SCEV *Dst, 2239 FullDependence &Result) const { 2240 DEBUG(dbgs() << "starting gcd\n"); 2241 ++GCDapplications; 2242 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2243 APInt RunningGCD = APInt::getNullValue(BitWidth); 2244 2245 // Examine Src coefficients. 2246 // Compute running GCD and record source constant. 2247 // Because we're looking for the constant at the end of the chain, 2248 // we can't quit the loop just because the GCD == 1. 2249 const SCEV *Coefficients = Src; 2250 while (const SCEVAddRecExpr *AddRec = 2251 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2252 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2253 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); 2254 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2255 // If the coefficient is the product of a constant and other stuff, 2256 // we can use the constant in the GCD computation. 2257 Constant = getConstantPart(Product); 2258 if (!Constant) 2259 return false; 2260 APInt ConstCoeff = Constant->getValue()->getValue(); 2261 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2262 Coefficients = AddRec->getStart(); 2263 } 2264 const SCEV *SrcConst = Coefficients; 2265 2266 // Examine Dst coefficients. 2267 // Compute running GCD and record destination constant. 2268 // Because we're looking for the constant at the end of the chain, 2269 // we can't quit the loop just because the GCD == 1. 2270 Coefficients = Dst; 2271 while (const SCEVAddRecExpr *AddRec = 2272 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2273 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2274 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); 2275 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2276 // If the coefficient is the product of a constant and other stuff, 2277 // we can use the constant in the GCD computation. 2278 Constant = getConstantPart(Product); 2279 if (!Constant) 2280 return false; 2281 APInt ConstCoeff = Constant->getValue()->getValue(); 2282 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2283 Coefficients = AddRec->getStart(); 2284 } 2285 const SCEV *DstConst = Coefficients; 2286 2287 APInt ExtraGCD = APInt::getNullValue(BitWidth); 2288 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2289 DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2290 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2291 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2292 // If Delta is a sum of products, we may be able to make further progress. 2293 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2294 const SCEV *Operand = Sum->getOperand(Op); 2295 if (isa<SCEVConstant>(Operand)) { 2296 assert(!Constant && "Surprised to find multiple constants"); 2297 Constant = cast<SCEVConstant>(Operand); 2298 } 2299 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2300 // Search for constant operand to participate in GCD; 2301 // If none found; return false. 2302 const SCEVConstant *ConstOp = getConstantPart(Product); 2303 if (!ConstOp) 2304 return false; 2305 APInt ConstOpValue = ConstOp->getValue()->getValue(); 2306 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2307 ConstOpValue.abs()); 2308 } 2309 else 2310 return false; 2311 } 2312 } 2313 if (!Constant) 2314 return false; 2315 APInt ConstDelta = cast<SCEVConstant>(Constant)->getValue()->getValue(); 2316 DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2317 if (ConstDelta == 0) 2318 return false; 2319 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2320 DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2321 APInt Remainder = ConstDelta.srem(RunningGCD); 2322 if (Remainder != 0) { 2323 ++GCDindependence; 2324 return true; 2325 } 2326 2327 // Try to disprove equal directions. 2328 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2329 // the code above can't disprove the dependence because the GCD = 1. 2330 // So we consider what happen if i = i' and what happens if j = j'. 2331 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2332 // which is infeasible, so we can disallow the = direction for the i level. 2333 // Setting j = j' doesn't help matters, so we end up with a direction vector 2334 // of [<>, *] 2335 // 2336 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2337 // we need to remember that the constant part is 5 and the RunningGCD should 2338 // be initialized to ExtraGCD = 30. 2339 DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2340 2341 bool Improved = false; 2342 Coefficients = Src; 2343 while (const SCEVAddRecExpr *AddRec = 2344 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2345 Coefficients = AddRec->getStart(); 2346 const Loop *CurLoop = AddRec->getLoop(); 2347 RunningGCD = ExtraGCD; 2348 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2349 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2350 const SCEV *Inner = Src; 2351 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2352 AddRec = cast<SCEVAddRecExpr>(Inner); 2353 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2354 if (CurLoop == AddRec->getLoop()) 2355 ; // SrcCoeff == Coeff 2356 else { 2357 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2358 // If the coefficient is the product of a constant and other stuff, 2359 // we can use the constant in the GCD computation. 2360 Constant = getConstantPart(Product); 2361 else 2362 Constant = cast<SCEVConstant>(Coeff); 2363 APInt ConstCoeff = Constant->getValue()->getValue(); 2364 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2365 } 2366 Inner = AddRec->getStart(); 2367 } 2368 Inner = Dst; 2369 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2370 AddRec = cast<SCEVAddRecExpr>(Inner); 2371 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2372 if (CurLoop == AddRec->getLoop()) 2373 DstCoeff = Coeff; 2374 else { 2375 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2376 // If the coefficient is the product of a constant and other stuff, 2377 // we can use the constant in the GCD computation. 2378 Constant = getConstantPart(Product); 2379 else 2380 Constant = cast<SCEVConstant>(Coeff); 2381 APInt ConstCoeff = Constant->getValue()->getValue(); 2382 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2383 } 2384 Inner = AddRec->getStart(); 2385 } 2386 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2387 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Delta)) 2388 // If the coefficient is the product of a constant and other stuff, 2389 // we can use the constant in the GCD computation. 2390 Constant = getConstantPart(Product); 2391 else if (isa<SCEVConstant>(Delta)) 2392 Constant = cast<SCEVConstant>(Delta); 2393 else { 2394 // The difference of the two coefficients might not be a product 2395 // or constant, in which case we give up on this direction. 2396 continue; 2397 } 2398 APInt ConstCoeff = Constant->getValue()->getValue(); 2399 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2400 DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2401 if (RunningGCD != 0) { 2402 Remainder = ConstDelta.srem(RunningGCD); 2403 DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2404 if (Remainder != 0) { 2405 unsigned Level = mapSrcLoop(CurLoop); 2406 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2407 Improved = true; 2408 } 2409 } 2410 } 2411 if (Improved) 2412 ++GCDsuccesses; 2413 DEBUG(dbgs() << "all done\n"); 2414 return false; 2415 } 2416 2417 2418 //===----------------------------------------------------------------------===// 2419 // banerjeeMIVtest - 2420 // Use Banerjee's Inequalities to test an MIV subscript pair. 2421 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2422 // Generally follows the discussion in Section 2.5.2 of 2423 // 2424 // Optimizing Supercompilers for Supercomputers 2425 // Michael Wolfe 2426 // 2427 // The inequalities given on page 25 are simplified in that loops are 2428 // normalized so that the lower bound is always 0 and the stride is always 1. 2429 // For example, Wolfe gives 2430 // 2431 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2432 // 2433 // where A_k is the coefficient of the kth index in the source subscript, 2434 // B_k is the coefficient of the kth index in the destination subscript, 2435 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2436 // index, and N_k is the stride of the kth index. Since all loops are normalized 2437 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2438 // equation to 2439 // 2440 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2441 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2442 // 2443 // Similar simplifications are possible for the other equations. 2444 // 2445 // When we can't determine the number of iterations for a loop, 2446 // we use NULL as an indicator for the worst case, infinity. 2447 // When computing the upper bound, NULL denotes +inf; 2448 // for the lower bound, NULL denotes -inf. 2449 // 2450 // Return true if dependence disproved. 2451 bool DependenceAnalysis::banerjeeMIVtest(const SCEV *Src, 2452 const SCEV *Dst, 2453 const SmallBitVector &Loops, 2454 FullDependence &Result) const { 2455 DEBUG(dbgs() << "starting Banerjee\n"); 2456 ++BanerjeeApplications; 2457 DEBUG(dbgs() << " Src = " << *Src << '\n'); 2458 const SCEV *A0; 2459 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2460 DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2461 const SCEV *B0; 2462 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2463 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2464 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2465 DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2466 2467 // Compute bounds for all the * directions. 2468 DEBUG(dbgs() << "\tBounds[*]\n"); 2469 for (unsigned K = 1; K <= MaxLevels; ++K) { 2470 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2471 Bound[K].Direction = Dependence::DVEntry::ALL; 2472 Bound[K].DirSet = Dependence::DVEntry::NONE; 2473 findBoundsALL(A, B, Bound, K); 2474 #ifndef NDEBUG 2475 DEBUG(dbgs() << "\t " << K << '\t'); 2476 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2477 DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2478 else 2479 DEBUG(dbgs() << "-inf\t"); 2480 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2481 DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2482 else 2483 DEBUG(dbgs() << "+inf\n"); 2484 #endif 2485 } 2486 2487 // Test the *, *, *, ... case. 2488 bool Disproved = false; 2489 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2490 // Explore the direction vector hierarchy. 2491 unsigned DepthExpanded = 0; 2492 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2493 Loops, DepthExpanded, Delta); 2494 if (NewDeps > 0) { 2495 bool Improved = false; 2496 for (unsigned K = 1; K <= CommonLevels; ++K) { 2497 if (Loops[K]) { 2498 unsigned Old = Result.DV[K - 1].Direction; 2499 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2500 Improved |= Old != Result.DV[K - 1].Direction; 2501 if (!Result.DV[K - 1].Direction) { 2502 Improved = false; 2503 Disproved = true; 2504 break; 2505 } 2506 } 2507 } 2508 if (Improved) 2509 ++BanerjeeSuccesses; 2510 } 2511 else { 2512 ++BanerjeeIndependence; 2513 Disproved = true; 2514 } 2515 } 2516 else { 2517 ++BanerjeeIndependence; 2518 Disproved = true; 2519 } 2520 delete [] Bound; 2521 delete [] A; 2522 delete [] B; 2523 return Disproved; 2524 } 2525 2526 2527 // Hierarchically expands the direction vector 2528 // search space, combining the directions of discovered dependences 2529 // in the DirSet field of Bound. Returns the number of distinct 2530 // dependences discovered. If the dependence is disproved, 2531 // it will return 0. 2532 unsigned DependenceAnalysis::exploreDirections(unsigned Level, 2533 CoefficientInfo *A, 2534 CoefficientInfo *B, 2535 BoundInfo *Bound, 2536 const SmallBitVector &Loops, 2537 unsigned &DepthExpanded, 2538 const SCEV *Delta) const { 2539 if (Level > CommonLevels) { 2540 // record result 2541 DEBUG(dbgs() << "\t["); 2542 for (unsigned K = 1; K <= CommonLevels; ++K) { 2543 if (Loops[K]) { 2544 Bound[K].DirSet |= Bound[K].Direction; 2545 #ifndef NDEBUG 2546 switch (Bound[K].Direction) { 2547 case Dependence::DVEntry::LT: 2548 DEBUG(dbgs() << " <"); 2549 break; 2550 case Dependence::DVEntry::EQ: 2551 DEBUG(dbgs() << " ="); 2552 break; 2553 case Dependence::DVEntry::GT: 2554 DEBUG(dbgs() << " >"); 2555 break; 2556 case Dependence::DVEntry::ALL: 2557 DEBUG(dbgs() << " *"); 2558 break; 2559 default: 2560 llvm_unreachable("unexpected Bound[K].Direction"); 2561 } 2562 #endif 2563 } 2564 } 2565 DEBUG(dbgs() << " ]\n"); 2566 return 1; 2567 } 2568 if (Loops[Level]) { 2569 if (Level > DepthExpanded) { 2570 DepthExpanded = Level; 2571 // compute bounds for <, =, > at current level 2572 findBoundsLT(A, B, Bound, Level); 2573 findBoundsGT(A, B, Bound, Level); 2574 findBoundsEQ(A, B, Bound, Level); 2575 #ifndef NDEBUG 2576 DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2577 DEBUG(dbgs() << "\t <\t"); 2578 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2579 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t'); 2580 else 2581 DEBUG(dbgs() << "-inf\t"); 2582 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2583 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n'); 2584 else 2585 DEBUG(dbgs() << "+inf\n"); 2586 DEBUG(dbgs() << "\t =\t"); 2587 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2588 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t'); 2589 else 2590 DEBUG(dbgs() << "-inf\t"); 2591 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2592 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n'); 2593 else 2594 DEBUG(dbgs() << "+inf\n"); 2595 DEBUG(dbgs() << "\t >\t"); 2596 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2597 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t'); 2598 else 2599 DEBUG(dbgs() << "-inf\t"); 2600 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2601 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n'); 2602 else 2603 DEBUG(dbgs() << "+inf\n"); 2604 #endif 2605 } 2606 2607 unsigned NewDeps = 0; 2608 2609 // test bounds for <, *, *, ... 2610 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2611 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2612 Loops, DepthExpanded, Delta); 2613 2614 // Test bounds for =, *, *, ... 2615 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2616 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2617 Loops, DepthExpanded, Delta); 2618 2619 // test bounds for >, *, *, ... 2620 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2621 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2622 Loops, DepthExpanded, Delta); 2623 2624 Bound[Level].Direction = Dependence::DVEntry::ALL; 2625 return NewDeps; 2626 } 2627 else 2628 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2629 } 2630 2631 2632 // Returns true iff the current bounds are plausible. 2633 bool DependenceAnalysis::testBounds(unsigned char DirKind, 2634 unsigned Level, 2635 BoundInfo *Bound, 2636 const SCEV *Delta) const { 2637 Bound[Level].Direction = DirKind; 2638 if (const SCEV *LowerBound = getLowerBound(Bound)) 2639 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2640 return false; 2641 if (const SCEV *UpperBound = getUpperBound(Bound)) 2642 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2643 return false; 2644 return true; 2645 } 2646 2647 2648 // Computes the upper and lower bounds for level K 2649 // using the * direction. Records them in Bound. 2650 // Wolfe gives the equations 2651 // 2652 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2653 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2654 // 2655 // Since we normalize loops, we can simplify these equations to 2656 // 2657 // LB^*_k = (A^-_k - B^+_k)U_k 2658 // UB^*_k = (A^+_k - B^-_k)U_k 2659 // 2660 // We must be careful to handle the case where the upper bound is unknown. 2661 // Note that the lower bound is always <= 0 2662 // and the upper bound is always >= 0. 2663 void DependenceAnalysis::findBoundsALL(CoefficientInfo *A, 2664 CoefficientInfo *B, 2665 BoundInfo *Bound, 2666 unsigned K) const { 2667 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2668 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2669 if (Bound[K].Iterations) { 2670 Bound[K].Lower[Dependence::DVEntry::ALL] = 2671 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2672 Bound[K].Iterations); 2673 Bound[K].Upper[Dependence::DVEntry::ALL] = 2674 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2675 Bound[K].Iterations); 2676 } 2677 else { 2678 // If the difference is 0, we won't need to know the number of iterations. 2679 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2680 Bound[K].Lower[Dependence::DVEntry::ALL] = 2681 SE->getConstant(A[K].Coeff->getType(), 0); 2682 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2683 Bound[K].Upper[Dependence::DVEntry::ALL] = 2684 SE->getConstant(A[K].Coeff->getType(), 0); 2685 } 2686 } 2687 2688 2689 // Computes the upper and lower bounds for level K 2690 // using the = direction. Records them in Bound. 2691 // Wolfe gives the equations 2692 // 2693 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2694 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2695 // 2696 // Since we normalize loops, we can simplify these equations to 2697 // 2698 // LB^=_k = (A_k - B_k)^- U_k 2699 // UB^=_k = (A_k - B_k)^+ U_k 2700 // 2701 // We must be careful to handle the case where the upper bound is unknown. 2702 // Note that the lower bound is always <= 0 2703 // and the upper bound is always >= 0. 2704 void DependenceAnalysis::findBoundsEQ(CoefficientInfo *A, 2705 CoefficientInfo *B, 2706 BoundInfo *Bound, 2707 unsigned K) const { 2708 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2709 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2710 if (Bound[K].Iterations) { 2711 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2712 const SCEV *NegativePart = getNegativePart(Delta); 2713 Bound[K].Lower[Dependence::DVEntry::EQ] = 2714 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2715 const SCEV *PositivePart = getPositivePart(Delta); 2716 Bound[K].Upper[Dependence::DVEntry::EQ] = 2717 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2718 } 2719 else { 2720 // If the positive/negative part of the difference is 0, 2721 // we won't need to know the number of iterations. 2722 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2723 const SCEV *NegativePart = getNegativePart(Delta); 2724 if (NegativePart->isZero()) 2725 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2726 const SCEV *PositivePart = getPositivePart(Delta); 2727 if (PositivePart->isZero()) 2728 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2729 } 2730 } 2731 2732 2733 // Computes the upper and lower bounds for level K 2734 // using the < direction. Records them in Bound. 2735 // Wolfe gives the equations 2736 // 2737 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2738 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2739 // 2740 // Since we normalize loops, we can simplify these equations to 2741 // 2742 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2743 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2744 // 2745 // We must be careful to handle the case where the upper bound is unknown. 2746 void DependenceAnalysis::findBoundsLT(CoefficientInfo *A, 2747 CoefficientInfo *B, 2748 BoundInfo *Bound, 2749 unsigned K) const { 2750 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2751 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2752 if (Bound[K].Iterations) { 2753 const SCEV *Iter_1 = 2754 SE->getMinusSCEV(Bound[K].Iterations, 2755 SE->getConstant(Bound[K].Iterations->getType(), 1)); 2756 const SCEV *NegPart = 2757 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2758 Bound[K].Lower[Dependence::DVEntry::LT] = 2759 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2760 const SCEV *PosPart = 2761 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2762 Bound[K].Upper[Dependence::DVEntry::LT] = 2763 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2764 } 2765 else { 2766 // If the positive/negative part of the difference is 0, 2767 // we won't need to know the number of iterations. 2768 const SCEV *NegPart = 2769 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2770 if (NegPart->isZero()) 2771 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2772 const SCEV *PosPart = 2773 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2774 if (PosPart->isZero()) 2775 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2776 } 2777 } 2778 2779 2780 // Computes the upper and lower bounds for level K 2781 // using the > direction. Records them in Bound. 2782 // Wolfe gives the equations 2783 // 2784 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2785 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2786 // 2787 // Since we normalize loops, we can simplify these equations to 2788 // 2789 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2790 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2791 // 2792 // We must be careful to handle the case where the upper bound is unknown. 2793 void DependenceAnalysis::findBoundsGT(CoefficientInfo *A, 2794 CoefficientInfo *B, 2795 BoundInfo *Bound, 2796 unsigned K) const { 2797 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2798 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2799 if (Bound[K].Iterations) { 2800 const SCEV *Iter_1 = 2801 SE->getMinusSCEV(Bound[K].Iterations, 2802 SE->getConstant(Bound[K].Iterations->getType(), 1)); 2803 const SCEV *NegPart = 2804 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2805 Bound[K].Lower[Dependence::DVEntry::GT] = 2806 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2807 const SCEV *PosPart = 2808 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2809 Bound[K].Upper[Dependence::DVEntry::GT] = 2810 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2811 } 2812 else { 2813 // If the positive/negative part of the difference is 0, 2814 // we won't need to know the number of iterations. 2815 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2816 if (NegPart->isZero()) 2817 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2818 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2819 if (PosPart->isZero()) 2820 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2821 } 2822 } 2823 2824 2825 // X^+ = max(X, 0) 2826 const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const { 2827 return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0)); 2828 } 2829 2830 2831 // X^- = min(X, 0) 2832 const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const { 2833 return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0)); 2834 } 2835 2836 2837 // Walks through the subscript, 2838 // collecting each coefficient, the associated loop bounds, 2839 // and recording its positive and negative parts for later use. 2840 DependenceAnalysis::CoefficientInfo * 2841 DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript, 2842 bool SrcFlag, 2843 const SCEV *&Constant) const { 2844 const SCEV *Zero = SE->getConstant(Subscript->getType(), 0); 2845 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2846 for (unsigned K = 1; K <= MaxLevels; ++K) { 2847 CI[K].Coeff = Zero; 2848 CI[K].PosPart = Zero; 2849 CI[K].NegPart = Zero; 2850 CI[K].Iterations = nullptr; 2851 } 2852 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2853 const Loop *L = AddRec->getLoop(); 2854 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2855 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2856 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2857 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2858 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2859 Subscript = AddRec->getStart(); 2860 } 2861 Constant = Subscript; 2862 #ifndef NDEBUG 2863 DEBUG(dbgs() << "\tCoefficient Info\n"); 2864 for (unsigned K = 1; K <= MaxLevels; ++K) { 2865 DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2866 DEBUG(dbgs() << "\tPos Part = "); 2867 DEBUG(dbgs() << *CI[K].PosPart); 2868 DEBUG(dbgs() << "\tNeg Part = "); 2869 DEBUG(dbgs() << *CI[K].NegPart); 2870 DEBUG(dbgs() << "\tUpper Bound = "); 2871 if (CI[K].Iterations) 2872 DEBUG(dbgs() << *CI[K].Iterations); 2873 else 2874 DEBUG(dbgs() << "+inf"); 2875 DEBUG(dbgs() << '\n'); 2876 } 2877 DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2878 #endif 2879 return CI; 2880 } 2881 2882 2883 // Looks through all the bounds info and 2884 // computes the lower bound given the current direction settings 2885 // at each level. If the lower bound for any level is -inf, 2886 // the result is -inf. 2887 const SCEV *DependenceAnalysis::getLowerBound(BoundInfo *Bound) const { 2888 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2889 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2890 if (Bound[K].Lower[Bound[K].Direction]) 2891 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2892 else 2893 Sum = nullptr; 2894 } 2895 return Sum; 2896 } 2897 2898 2899 // Looks through all the bounds info and 2900 // computes the upper bound given the current direction settings 2901 // at each level. If the upper bound at any level is +inf, 2902 // the result is +inf. 2903 const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const { 2904 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 2905 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2906 if (Bound[K].Upper[Bound[K].Direction]) 2907 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 2908 else 2909 Sum = nullptr; 2910 } 2911 return Sum; 2912 } 2913 2914 2915 //===----------------------------------------------------------------------===// 2916 // Constraint manipulation for Delta test. 2917 2918 // Given a linear SCEV, 2919 // return the coefficient (the step) 2920 // corresponding to the specified loop. 2921 // If there isn't one, return 0. 2922 // For example, given a*i + b*j + c*k, zeroing the coefficient 2923 // corresponding to the j loop would yield b. 2924 const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr, 2925 const Loop *TargetLoop) const { 2926 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2927 if (!AddRec) 2928 return SE->getConstant(Expr->getType(), 0); 2929 if (AddRec->getLoop() == TargetLoop) 2930 return AddRec->getStepRecurrence(*SE); 2931 return findCoefficient(AddRec->getStart(), TargetLoop); 2932 } 2933 2934 2935 // Given a linear SCEV, 2936 // return the SCEV given by zeroing out the coefficient 2937 // corresponding to the specified loop. 2938 // For example, given a*i + b*j + c*k, zeroing the coefficient 2939 // corresponding to the j loop would yield a*i + c*k. 2940 const SCEV *DependenceAnalysis::zeroCoefficient(const SCEV *Expr, 2941 const Loop *TargetLoop) const { 2942 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2943 if (!AddRec) 2944 return Expr; // ignore 2945 if (AddRec->getLoop() == TargetLoop) 2946 return AddRec->getStart(); 2947 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 2948 AddRec->getStepRecurrence(*SE), 2949 AddRec->getLoop(), 2950 AddRec->getNoWrapFlags()); 2951 } 2952 2953 2954 // Given a linear SCEV Expr, 2955 // return the SCEV given by adding some Value to the 2956 // coefficient corresponding to the specified TargetLoop. 2957 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 2958 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 2959 const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr, 2960 const Loop *TargetLoop, 2961 const SCEV *Value) const { 2962 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2963 if (!AddRec) // create a new addRec 2964 return SE->getAddRecExpr(Expr, 2965 Value, 2966 TargetLoop, 2967 SCEV::FlagAnyWrap); // Worst case, with no info. 2968 if (AddRec->getLoop() == TargetLoop) { 2969 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 2970 if (Sum->isZero()) 2971 return AddRec->getStart(); 2972 return SE->getAddRecExpr(AddRec->getStart(), 2973 Sum, 2974 AddRec->getLoop(), 2975 AddRec->getNoWrapFlags()); 2976 } 2977 if (SE->isLoopInvariant(AddRec, TargetLoop)) 2978 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); 2979 return SE->getAddRecExpr( 2980 addToCoefficient(AddRec->getStart(), TargetLoop, Value), 2981 AddRec->getStepRecurrence(*SE), AddRec->getLoop(), 2982 AddRec->getNoWrapFlags()); 2983 } 2984 2985 2986 // Review the constraints, looking for opportunities 2987 // to simplify a subscript pair (Src and Dst). 2988 // Return true if some simplification occurs. 2989 // If the simplification isn't exact (that is, if it is conservative 2990 // in terms of dependence), set consistent to false. 2991 // Corresponds to Figure 5 from the paper 2992 // 2993 // Practical Dependence Testing 2994 // Goff, Kennedy, Tseng 2995 // PLDI 1991 2996 bool DependenceAnalysis::propagate(const SCEV *&Src, 2997 const SCEV *&Dst, 2998 SmallBitVector &Loops, 2999 SmallVectorImpl<Constraint> &Constraints, 3000 bool &Consistent) { 3001 bool Result = false; 3002 for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) { 3003 DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 3004 DEBUG(Constraints[LI].dump(dbgs())); 3005 if (Constraints[LI].isDistance()) 3006 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 3007 else if (Constraints[LI].isLine()) 3008 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 3009 else if (Constraints[LI].isPoint()) 3010 Result |= propagatePoint(Src, Dst, Constraints[LI]); 3011 } 3012 return Result; 3013 } 3014 3015 3016 // Attempt to propagate a distance 3017 // constraint into a subscript pair (Src and Dst). 3018 // Return true if some simplification occurs. 3019 // If the simplification isn't exact (that is, if it is conservative 3020 // in terms of dependence), set consistent to false. 3021 bool DependenceAnalysis::propagateDistance(const SCEV *&Src, 3022 const SCEV *&Dst, 3023 Constraint &CurConstraint, 3024 bool &Consistent) { 3025 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3026 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3027 const SCEV *A_K = findCoefficient(Src, CurLoop); 3028 if (A_K->isZero()) 3029 return false; 3030 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3031 Src = SE->getMinusSCEV(Src, DA_K); 3032 Src = zeroCoefficient(Src, CurLoop); 3033 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3034 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3035 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3036 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3037 if (!findCoefficient(Dst, CurLoop)->isZero()) 3038 Consistent = false; 3039 return true; 3040 } 3041 3042 3043 // Attempt to propagate a line 3044 // constraint into a subscript pair (Src and Dst). 3045 // Return true if some simplification occurs. 3046 // If the simplification isn't exact (that is, if it is conservative 3047 // in terms of dependence), set consistent to false. 3048 bool DependenceAnalysis::propagateLine(const SCEV *&Src, 3049 const SCEV *&Dst, 3050 Constraint &CurConstraint, 3051 bool &Consistent) { 3052 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3053 const SCEV *A = CurConstraint.getA(); 3054 const SCEV *B = CurConstraint.getB(); 3055 const SCEV *C = CurConstraint.getC(); 3056 DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n"); 3057 DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3058 DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3059 if (A->isZero()) { 3060 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3061 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3062 if (!Bconst || !Cconst) return false; 3063 APInt Beta = Bconst->getValue()->getValue(); 3064 APInt Charlie = Cconst->getValue()->getValue(); 3065 APInt CdivB = Charlie.sdiv(Beta); 3066 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3067 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3068 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3069 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3070 Dst = zeroCoefficient(Dst, CurLoop); 3071 if (!findCoefficient(Src, CurLoop)->isZero()) 3072 Consistent = false; 3073 } 3074 else if (B->isZero()) { 3075 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3076 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3077 if (!Aconst || !Cconst) return false; 3078 APInt Alpha = Aconst->getValue()->getValue(); 3079 APInt Charlie = Cconst->getValue()->getValue(); 3080 APInt CdivA = Charlie.sdiv(Alpha); 3081 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3082 const SCEV *A_K = findCoefficient(Src, CurLoop); 3083 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3084 Src = zeroCoefficient(Src, CurLoop); 3085 if (!findCoefficient(Dst, CurLoop)->isZero()) 3086 Consistent = false; 3087 } 3088 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3089 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3090 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3091 if (!Aconst || !Cconst) return false; 3092 APInt Alpha = Aconst->getValue()->getValue(); 3093 APInt Charlie = Cconst->getValue()->getValue(); 3094 APInt CdivA = Charlie.sdiv(Alpha); 3095 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3096 const SCEV *A_K = findCoefficient(Src, CurLoop); 3097 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3098 Src = zeroCoefficient(Src, CurLoop); 3099 Dst = addToCoefficient(Dst, CurLoop, A_K); 3100 if (!findCoefficient(Dst, CurLoop)->isZero()) 3101 Consistent = false; 3102 } 3103 else { 3104 // paper is incorrect here, or perhaps just misleading 3105 const SCEV *A_K = findCoefficient(Src, CurLoop); 3106 Src = SE->getMulExpr(Src, A); 3107 Dst = SE->getMulExpr(Dst, A); 3108 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3109 Src = zeroCoefficient(Src, CurLoop); 3110 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3111 if (!findCoefficient(Dst, CurLoop)->isZero()) 3112 Consistent = false; 3113 } 3114 DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3115 DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3116 return true; 3117 } 3118 3119 3120 // Attempt to propagate a point 3121 // constraint into a subscript pair (Src and Dst). 3122 // Return true if some simplification occurs. 3123 bool DependenceAnalysis::propagatePoint(const SCEV *&Src, 3124 const SCEV *&Dst, 3125 Constraint &CurConstraint) { 3126 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3127 const SCEV *A_K = findCoefficient(Src, CurLoop); 3128 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3129 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3130 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3131 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3132 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3133 Src = zeroCoefficient(Src, CurLoop); 3134 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3135 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3136 Dst = zeroCoefficient(Dst, CurLoop); 3137 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3138 return true; 3139 } 3140 3141 3142 // Update direction vector entry based on the current constraint. 3143 void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, 3144 const Constraint &CurConstraint 3145 ) const { 3146 DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3147 DEBUG(CurConstraint.dump(dbgs())); 3148 if (CurConstraint.isAny()) 3149 ; // use defaults 3150 else if (CurConstraint.isDistance()) { 3151 // this one is consistent, the others aren't 3152 Level.Scalar = false; 3153 Level.Distance = CurConstraint.getD(); 3154 unsigned NewDirection = Dependence::DVEntry::NONE; 3155 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3156 NewDirection = Dependence::DVEntry::EQ; 3157 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3158 NewDirection |= Dependence::DVEntry::LT; 3159 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3160 NewDirection |= Dependence::DVEntry::GT; 3161 Level.Direction &= NewDirection; 3162 } 3163 else if (CurConstraint.isLine()) { 3164 Level.Scalar = false; 3165 Level.Distance = nullptr; 3166 // direction should be accurate 3167 } 3168 else if (CurConstraint.isPoint()) { 3169 Level.Scalar = false; 3170 Level.Distance = nullptr; 3171 unsigned NewDirection = Dependence::DVEntry::NONE; 3172 if (!isKnownPredicate(CmpInst::ICMP_NE, 3173 CurConstraint.getY(), 3174 CurConstraint.getX())) 3175 // if X may be = Y 3176 NewDirection |= Dependence::DVEntry::EQ; 3177 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3178 CurConstraint.getY(), 3179 CurConstraint.getX())) 3180 // if Y may be > X 3181 NewDirection |= Dependence::DVEntry::LT; 3182 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3183 CurConstraint.getY(), 3184 CurConstraint.getX())) 3185 // if Y may be < X 3186 NewDirection |= Dependence::DVEntry::GT; 3187 Level.Direction &= NewDirection; 3188 } 3189 else 3190 llvm_unreachable("constraint has unexpected kind"); 3191 } 3192 3193 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3194 /// source and destination array references are recurrences on a nested loop, 3195 /// this function flattens the nested recurrences into separate recurrences 3196 /// for each loop level. 3197 bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, 3198 const SCEV *DstSCEV, 3199 SmallVectorImpl<Subscript> &Pair, 3200 const SCEV *ElementSize) { 3201 const SCEVUnknown *SrcBase = 3202 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcSCEV)); 3203 const SCEVUnknown *DstBase = 3204 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstSCEV)); 3205 3206 if (!SrcBase || !DstBase || SrcBase != DstBase) 3207 return false; 3208 3209 SrcSCEV = SE->getMinusSCEV(SrcSCEV, SrcBase); 3210 DstSCEV = SE->getMinusSCEV(DstSCEV, DstBase); 3211 3212 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3213 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3214 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3215 return false; 3216 3217 // First step: collect parametric terms in both array references. 3218 SmallVector<const SCEV *, 4> Terms; 3219 SrcAR->collectParametricTerms(*SE, Terms); 3220 DstAR->collectParametricTerms(*SE, Terms); 3221 3222 // Second step: find subscript sizes. 3223 SmallVector<const SCEV *, 4> Sizes; 3224 SE->findArrayDimensions(Terms, Sizes, ElementSize); 3225 3226 // Third step: compute the access functions for each subscript. 3227 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3228 SrcAR->computeAccessFunctions(*SE, SrcSubscripts, Sizes); 3229 DstAR->computeAccessFunctions(*SE, DstSubscripts, Sizes); 3230 3231 // Fail when there is only a subscript: that's a linearized access function. 3232 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3233 SrcSubscripts.size() != DstSubscripts.size()) 3234 return false; 3235 3236 int size = SrcSubscripts.size(); 3237 3238 DEBUG({ 3239 dbgs() << "\nSrcSubscripts: "; 3240 for (int i = 0; i < size; i++) 3241 dbgs() << *SrcSubscripts[i]; 3242 dbgs() << "\nDstSubscripts: "; 3243 for (int i = 0; i < size; i++) 3244 dbgs() << *DstSubscripts[i]; 3245 }); 3246 3247 // The delinearization transforms a single-subscript MIV dependence test into 3248 // a multi-subscript SIV dependence test that is easier to compute. So we 3249 // resize Pair to contain as many pairs of subscripts as the delinearization 3250 // has found, and then initialize the pairs following the delinearization. 3251 Pair.resize(size); 3252 for (int i = 0; i < size; ++i) { 3253 Pair[i].Src = SrcSubscripts[i]; 3254 Pair[i].Dst = DstSubscripts[i]; 3255 unifySubscriptType(&Pair[i]); 3256 3257 // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the 3258 // delinearization has found, and add these constraints to the dependence 3259 // check to avoid memory accesses overflow from one dimension into another. 3260 // This is related to the problem of determining the existence of data 3261 // dependences in array accesses using a different number of subscripts: in 3262 // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc. 3263 } 3264 3265 return true; 3266 } 3267 3268 //===----------------------------------------------------------------------===// 3269 3270 #ifndef NDEBUG 3271 // For debugging purposes, dump a small bit vector to dbgs(). 3272 static void dumpSmallBitVector(SmallBitVector &BV) { 3273 dbgs() << "{"; 3274 for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) { 3275 dbgs() << VI; 3276 if (BV.find_next(VI) >= 0) 3277 dbgs() << ' '; 3278 } 3279 dbgs() << "}\n"; 3280 } 3281 #endif 3282 3283 3284 // depends - 3285 // Returns NULL if there is no dependence. 3286 // Otherwise, return a Dependence with as many details as possible. 3287 // Corresponds to Section 3.1 in the paper 3288 // 3289 // Practical Dependence Testing 3290 // Goff, Kennedy, Tseng 3291 // PLDI 1991 3292 // 3293 // Care is required to keep the routine below, getSplitIteration(), 3294 // up to date with respect to this routine. 3295 std::unique_ptr<Dependence> 3296 DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, 3297 bool PossiblyLoopIndependent) { 3298 if (Src == Dst) 3299 PossiblyLoopIndependent = false; 3300 3301 if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || 3302 (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) 3303 // if both instructions don't reference memory, there's no dependence 3304 return nullptr; 3305 3306 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3307 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3308 DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3309 return make_unique<Dependence>(Src, Dst); 3310 } 3311 3312 Value *SrcPtr = getPointerOperand(Src); 3313 Value *DstPtr = getPointerOperand(Dst); 3314 3315 switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr, 3316 SrcPtr)) { 3317 case AliasAnalysis::MayAlias: 3318 case AliasAnalysis::PartialAlias: 3319 // cannot analyse objects if we don't understand their aliasing. 3320 DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3321 return make_unique<Dependence>(Src, Dst); 3322 case AliasAnalysis::NoAlias: 3323 // If the objects noalias, they are distinct, accesses are independent. 3324 DEBUG(dbgs() << "no alias\n"); 3325 return nullptr; 3326 case AliasAnalysis::MustAlias: 3327 break; // The underlying objects alias; test accesses for dependence. 3328 } 3329 3330 // establish loop nesting levels 3331 establishNestingLevels(Src, Dst); 3332 DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3333 DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3334 3335 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3336 ++TotalArrayPairs; 3337 3338 // See if there are GEPs we can use. 3339 bool UsefulGEP = false; 3340 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3341 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3342 if (SrcGEP && DstGEP && 3343 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3344 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3345 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3346 DEBUG(dbgs() << " SrcPtrSCEV = " << *SrcPtrSCEV << "\n"); 3347 DEBUG(dbgs() << " DstPtrSCEV = " << *DstPtrSCEV << "\n"); 3348 3349 UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3350 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) && 3351 (SrcGEP->getNumOperands() == DstGEP->getNumOperands()); 3352 } 3353 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3354 SmallVector<Subscript, 4> Pair(Pairs); 3355 if (UsefulGEP) { 3356 DEBUG(dbgs() << " using GEPs\n"); 3357 unsigned P = 0; 3358 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3359 SrcEnd = SrcGEP->idx_end(), 3360 DstIdx = DstGEP->idx_begin(); 3361 SrcIdx != SrcEnd; 3362 ++SrcIdx, ++DstIdx, ++P) { 3363 Pair[P].Src = SE->getSCEV(*SrcIdx); 3364 Pair[P].Dst = SE->getSCEV(*DstIdx); 3365 unifySubscriptType(&Pair[P]); 3366 } 3367 } 3368 else { 3369 DEBUG(dbgs() << " ignoring GEPs\n"); 3370 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3371 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3372 DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3373 DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3374 Pair[0].Src = SrcSCEV; 3375 Pair[0].Dst = DstSCEV; 3376 } 3377 3378 if (Delinearize && Pairs == 1 && CommonLevels > 1 && 3379 tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { 3380 DEBUG(dbgs() << " delinerized GEP\n"); 3381 Pairs = Pair.size(); 3382 } 3383 3384 for (unsigned P = 0; P < Pairs; ++P) { 3385 Pair[P].Loops.resize(MaxLevels + 1); 3386 Pair[P].GroupLoops.resize(MaxLevels + 1); 3387 Pair[P].Group.resize(Pairs); 3388 removeMatchingExtensions(&Pair[P]); 3389 Pair[P].Classification = 3390 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3391 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3392 Pair[P].Loops); 3393 Pair[P].GroupLoops = Pair[P].Loops; 3394 Pair[P].Group.set(P); 3395 DEBUG(dbgs() << " subscript " << P << "\n"); 3396 DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3397 DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3398 DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3399 DEBUG(dbgs() << "\tloops = "); 3400 DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3401 } 3402 3403 SmallBitVector Separable(Pairs); 3404 SmallBitVector Coupled(Pairs); 3405 3406 // Partition subscripts into separable and minimally-coupled groups 3407 // Algorithm in paper is algorithmically better; 3408 // this may be faster in practice. Check someday. 3409 // 3410 // Here's an example of how it works. Consider this code: 3411 // 3412 // for (i = ...) { 3413 // for (j = ...) { 3414 // for (k = ...) { 3415 // for (l = ...) { 3416 // for (m = ...) { 3417 // A[i][j][k][m] = ...; 3418 // ... = A[0][j][l][i + j]; 3419 // } 3420 // } 3421 // } 3422 // } 3423 // } 3424 // 3425 // There are 4 subscripts here: 3426 // 0 [i] and [0] 3427 // 1 [j] and [j] 3428 // 2 [k] and [l] 3429 // 3 [m] and [i + j] 3430 // 3431 // We've already classified each subscript pair as ZIV, SIV, etc., 3432 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3433 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3434 // and set Pair[P].Group = {P}. 3435 // 3436 // Src Dst Classification Loops GroupLoops Group 3437 // 0 [i] [0] SIV {1} {1} {0} 3438 // 1 [j] [j] SIV {2} {2} {1} 3439 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3440 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3441 // 3442 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3443 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3444 // 3445 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3446 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3447 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3448 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3449 // to either Separable or Coupled). 3450 // 3451 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3452 // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty, 3453 // so Pair[3].Group = {0, 1, 3} and Done = false. 3454 // 3455 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3456 // Since Done remains true, we add 2 to the set of Separable pairs. 3457 // 3458 // Finally, we consider 3. There's nothing to compare it with, 3459 // so Done remains true and we add it to the Coupled set. 3460 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3461 // 3462 // In the end, we've got 1 separable subscript and 1 coupled group. 3463 for (unsigned SI = 0; SI < Pairs; ++SI) { 3464 if (Pair[SI].Classification == Subscript::NonLinear) { 3465 // ignore these, but collect loops for later 3466 ++NonlinearSubscriptPairs; 3467 collectCommonLoops(Pair[SI].Src, 3468 LI->getLoopFor(Src->getParent()), 3469 Pair[SI].Loops); 3470 collectCommonLoops(Pair[SI].Dst, 3471 LI->getLoopFor(Dst->getParent()), 3472 Pair[SI].Loops); 3473 Result.Consistent = false; 3474 } else if (Pair[SI].Classification == Subscript::ZIV) { 3475 // always separable 3476 Separable.set(SI); 3477 } 3478 else { 3479 // SIV, RDIV, or MIV, so check for coupled group 3480 bool Done = true; 3481 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3482 SmallBitVector Intersection = Pair[SI].GroupLoops; 3483 Intersection &= Pair[SJ].GroupLoops; 3484 if (Intersection.any()) { 3485 // accumulate set of all the loops in group 3486 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3487 // accumulate set of all subscripts in group 3488 Pair[SJ].Group |= Pair[SI].Group; 3489 Done = false; 3490 } 3491 } 3492 if (Done) { 3493 if (Pair[SI].Group.count() == 1) { 3494 Separable.set(SI); 3495 ++SeparableSubscriptPairs; 3496 } 3497 else { 3498 Coupled.set(SI); 3499 ++CoupledSubscriptPairs; 3500 } 3501 } 3502 } 3503 } 3504 3505 DEBUG(dbgs() << " Separable = "); 3506 DEBUG(dumpSmallBitVector(Separable)); 3507 DEBUG(dbgs() << " Coupled = "); 3508 DEBUG(dumpSmallBitVector(Coupled)); 3509 3510 Constraint NewConstraint; 3511 NewConstraint.setAny(SE); 3512 3513 // test separable subscripts 3514 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3515 DEBUG(dbgs() << "testing subscript " << SI); 3516 switch (Pair[SI].Classification) { 3517 case Subscript::ZIV: 3518 DEBUG(dbgs() << ", ZIV\n"); 3519 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3520 return nullptr; 3521 break; 3522 case Subscript::SIV: { 3523 DEBUG(dbgs() << ", SIV\n"); 3524 unsigned Level; 3525 const SCEV *SplitIter = nullptr; 3526 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, 3527 SplitIter)) 3528 return nullptr; 3529 break; 3530 } 3531 case Subscript::RDIV: 3532 DEBUG(dbgs() << ", RDIV\n"); 3533 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3534 return nullptr; 3535 break; 3536 case Subscript::MIV: 3537 DEBUG(dbgs() << ", MIV\n"); 3538 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3539 return nullptr; 3540 break; 3541 default: 3542 llvm_unreachable("subscript has unexpected classification"); 3543 } 3544 } 3545 3546 if (Coupled.count()) { 3547 // test coupled subscript groups 3548 DEBUG(dbgs() << "starting on coupled subscripts\n"); 3549 DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3550 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3551 for (unsigned II = 0; II <= MaxLevels; ++II) 3552 Constraints[II].setAny(SE); 3553 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3554 DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3555 SmallBitVector Group(Pair[SI].Group); 3556 SmallBitVector Sivs(Pairs); 3557 SmallBitVector Mivs(Pairs); 3558 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3559 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3560 DEBUG(dbgs() << SJ << " "); 3561 if (Pair[SJ].Classification == Subscript::SIV) 3562 Sivs.set(SJ); 3563 else 3564 Mivs.set(SJ); 3565 } 3566 DEBUG(dbgs() << "}\n"); 3567 while (Sivs.any()) { 3568 bool Changed = false; 3569 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3570 DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3571 // SJ is an SIV subscript that's part of the current coupled group 3572 unsigned Level; 3573 const SCEV *SplitIter = nullptr; 3574 DEBUG(dbgs() << "SIV\n"); 3575 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, 3576 SplitIter)) 3577 return nullptr; 3578 ConstrainedLevels.set(Level); 3579 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3580 if (Constraints[Level].isEmpty()) { 3581 ++DeltaIndependence; 3582 return nullptr; 3583 } 3584 Changed = true; 3585 } 3586 Sivs.reset(SJ); 3587 } 3588 if (Changed) { 3589 // propagate, possibly creating new SIVs and ZIVs 3590 DEBUG(dbgs() << " propagating\n"); 3591 DEBUG(dbgs() << "\tMivs = "); 3592 DEBUG(dumpSmallBitVector(Mivs)); 3593 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3594 // SJ is an MIV subscript that's part of the current coupled group 3595 DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3596 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3597 Constraints, Result.Consistent)) { 3598 DEBUG(dbgs() << "\t Changed\n"); 3599 ++DeltaPropagations; 3600 Pair[SJ].Classification = 3601 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3602 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3603 Pair[SJ].Loops); 3604 switch (Pair[SJ].Classification) { 3605 case Subscript::ZIV: 3606 DEBUG(dbgs() << "ZIV\n"); 3607 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3608 return nullptr; 3609 Mivs.reset(SJ); 3610 break; 3611 case Subscript::SIV: 3612 Sivs.set(SJ); 3613 Mivs.reset(SJ); 3614 break; 3615 case Subscript::RDIV: 3616 case Subscript::MIV: 3617 break; 3618 default: 3619 llvm_unreachable("bad subscript classification"); 3620 } 3621 } 3622 } 3623 } 3624 } 3625 3626 // test & propagate remaining RDIVs 3627 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3628 if (Pair[SJ].Classification == Subscript::RDIV) { 3629 DEBUG(dbgs() << "RDIV test\n"); 3630 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3631 return nullptr; 3632 // I don't yet understand how to propagate RDIV results 3633 Mivs.reset(SJ); 3634 } 3635 } 3636 3637 // test remaining MIVs 3638 // This code is temporary. 3639 // Better to somehow test all remaining subscripts simultaneously. 3640 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3641 if (Pair[SJ].Classification == Subscript::MIV) { 3642 DEBUG(dbgs() << "MIV test\n"); 3643 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3644 return nullptr; 3645 } 3646 else 3647 llvm_unreachable("expected only MIV subscripts at this point"); 3648 } 3649 3650 // update Result.DV from constraint vector 3651 DEBUG(dbgs() << " updating\n"); 3652 for (int SJ = ConstrainedLevels.find_first(); SJ >= 0; 3653 SJ = ConstrainedLevels.find_next(SJ)) { 3654 if (SJ > (int)CommonLevels) 3655 break; 3656 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3657 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3658 return nullptr; 3659 } 3660 } 3661 } 3662 3663 // Make sure the Scalar flags are set correctly. 3664 SmallBitVector CompleteLoops(MaxLevels + 1); 3665 for (unsigned SI = 0; SI < Pairs; ++SI) 3666 CompleteLoops |= Pair[SI].Loops; 3667 for (unsigned II = 1; II <= CommonLevels; ++II) 3668 if (CompleteLoops[II]) 3669 Result.DV[II - 1].Scalar = false; 3670 3671 if (PossiblyLoopIndependent) { 3672 // Make sure the LoopIndependent flag is set correctly. 3673 // All directions must include equal, otherwise no 3674 // loop-independent dependence is possible. 3675 for (unsigned II = 1; II <= CommonLevels; ++II) { 3676 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3677 Result.LoopIndependent = false; 3678 break; 3679 } 3680 } 3681 } 3682 else { 3683 // On the other hand, if all directions are equal and there's no 3684 // loop-independent dependence possible, then no dependence exists. 3685 bool AllEqual = true; 3686 for (unsigned II = 1; II <= CommonLevels; ++II) { 3687 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 3688 AllEqual = false; 3689 break; 3690 } 3691 } 3692 if (AllEqual) 3693 return nullptr; 3694 } 3695 3696 auto Final = make_unique<FullDependence>(Result); 3697 Result.DV = nullptr; 3698 return std::move(Final); 3699 } 3700 3701 3702 3703 //===----------------------------------------------------------------------===// 3704 // getSplitIteration - 3705 // Rather than spend rarely-used space recording the splitting iteration 3706 // during the Weak-Crossing SIV test, we re-compute it on demand. 3707 // The re-computation is basically a repeat of the entire dependence test, 3708 // though simplified since we know that the dependence exists. 3709 // It's tedious, since we must go through all propagations, etc. 3710 // 3711 // Care is required to keep this code up to date with respect to the routine 3712 // above, depends(). 3713 // 3714 // Generally, the dependence analyzer will be used to build 3715 // a dependence graph for a function (basically a map from instructions 3716 // to dependences). Looking for cycles in the graph shows us loops 3717 // that cannot be trivially vectorized/parallelized. 3718 // 3719 // We can try to improve the situation by examining all the dependences 3720 // that make up the cycle, looking for ones we can break. 3721 // Sometimes, peeling the first or last iteration of a loop will break 3722 // dependences, and we've got flags for those possibilities. 3723 // Sometimes, splitting a loop at some other iteration will do the trick, 3724 // and we've got a flag for that case. Rather than waste the space to 3725 // record the exact iteration (since we rarely know), we provide 3726 // a method that calculates the iteration. It's a drag that it must work 3727 // from scratch, but wonderful in that it's possible. 3728 // 3729 // Here's an example: 3730 // 3731 // for (i = 0; i < 10; i++) 3732 // A[i] = ... 3733 // ... = A[11 - i] 3734 // 3735 // There's a loop-carried flow dependence from the store to the load, 3736 // found by the weak-crossing SIV test. The dependence will have a flag, 3737 // indicating that the dependence can be broken by splitting the loop. 3738 // Calling getSplitIteration will return 5. 3739 // Splitting the loop breaks the dependence, like so: 3740 // 3741 // for (i = 0; i <= 5; i++) 3742 // A[i] = ... 3743 // ... = A[11 - i] 3744 // for (i = 6; i < 10; i++) 3745 // A[i] = ... 3746 // ... = A[11 - i] 3747 // 3748 // breaks the dependence and allows us to vectorize/parallelize 3749 // both loops. 3750 const SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep, 3751 unsigned SplitLevel) { 3752 assert(Dep.isSplitable(SplitLevel) && 3753 "Dep should be splitable at SplitLevel"); 3754 Instruction *Src = Dep.getSrc(); 3755 Instruction *Dst = Dep.getDst(); 3756 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3757 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3758 assert(isLoadOrStore(Src)); 3759 assert(isLoadOrStore(Dst)); 3760 Value *SrcPtr = getPointerOperand(Src); 3761 Value *DstPtr = getPointerOperand(Dst); 3762 assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr, 3763 SrcPtr) == AliasAnalysis::MustAlias); 3764 3765 // establish loop nesting levels 3766 establishNestingLevels(Src, Dst); 3767 3768 FullDependence Result(Src, Dst, false, CommonLevels); 3769 3770 // See if there are GEPs we can use. 3771 bool UsefulGEP = false; 3772 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3773 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3774 if (SrcGEP && DstGEP && 3775 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3776 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3777 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3778 UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3779 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) && 3780 (SrcGEP->getNumOperands() == DstGEP->getNumOperands()); 3781 } 3782 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3783 SmallVector<Subscript, 4> Pair(Pairs); 3784 if (UsefulGEP) { 3785 unsigned P = 0; 3786 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3787 SrcEnd = SrcGEP->idx_end(), 3788 DstIdx = DstGEP->idx_begin(); 3789 SrcIdx != SrcEnd; 3790 ++SrcIdx, ++DstIdx, ++P) { 3791 Pair[P].Src = SE->getSCEV(*SrcIdx); 3792 Pair[P].Dst = SE->getSCEV(*DstIdx); 3793 } 3794 } 3795 else { 3796 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3797 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3798 Pair[0].Src = SrcSCEV; 3799 Pair[0].Dst = DstSCEV; 3800 } 3801 3802 if (Delinearize && Pairs == 1 && CommonLevels > 1 && 3803 tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { 3804 DEBUG(dbgs() << " delinerized GEP\n"); 3805 Pairs = Pair.size(); 3806 } 3807 3808 for (unsigned P = 0; P < Pairs; ++P) { 3809 Pair[P].Loops.resize(MaxLevels + 1); 3810 Pair[P].GroupLoops.resize(MaxLevels + 1); 3811 Pair[P].Group.resize(Pairs); 3812 removeMatchingExtensions(&Pair[P]); 3813 Pair[P].Classification = 3814 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3815 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3816 Pair[P].Loops); 3817 Pair[P].GroupLoops = Pair[P].Loops; 3818 Pair[P].Group.set(P); 3819 } 3820 3821 SmallBitVector Separable(Pairs); 3822 SmallBitVector Coupled(Pairs); 3823 3824 // partition subscripts into separable and minimally-coupled groups 3825 for (unsigned SI = 0; SI < Pairs; ++SI) { 3826 if (Pair[SI].Classification == Subscript::NonLinear) { 3827 // ignore these, but collect loops for later 3828 collectCommonLoops(Pair[SI].Src, 3829 LI->getLoopFor(Src->getParent()), 3830 Pair[SI].Loops); 3831 collectCommonLoops(Pair[SI].Dst, 3832 LI->getLoopFor(Dst->getParent()), 3833 Pair[SI].Loops); 3834 Result.Consistent = false; 3835 } 3836 else if (Pair[SI].Classification == Subscript::ZIV) 3837 Separable.set(SI); 3838 else { 3839 // SIV, RDIV, or MIV, so check for coupled group 3840 bool Done = true; 3841 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3842 SmallBitVector Intersection = Pair[SI].GroupLoops; 3843 Intersection &= Pair[SJ].GroupLoops; 3844 if (Intersection.any()) { 3845 // accumulate set of all the loops in group 3846 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3847 // accumulate set of all subscripts in group 3848 Pair[SJ].Group |= Pair[SI].Group; 3849 Done = false; 3850 } 3851 } 3852 if (Done) { 3853 if (Pair[SI].Group.count() == 1) 3854 Separable.set(SI); 3855 else 3856 Coupled.set(SI); 3857 } 3858 } 3859 } 3860 3861 Constraint NewConstraint; 3862 NewConstraint.setAny(SE); 3863 3864 // test separable subscripts 3865 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3866 switch (Pair[SI].Classification) { 3867 case Subscript::SIV: { 3868 unsigned Level; 3869 const SCEV *SplitIter = nullptr; 3870 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3871 Result, NewConstraint, SplitIter); 3872 if (Level == SplitLevel) { 3873 assert(SplitIter != nullptr); 3874 return SplitIter; 3875 } 3876 break; 3877 } 3878 case Subscript::ZIV: 3879 case Subscript::RDIV: 3880 case Subscript::MIV: 3881 break; 3882 default: 3883 llvm_unreachable("subscript has unexpected classification"); 3884 } 3885 } 3886 3887 if (Coupled.count()) { 3888 // test coupled subscript groups 3889 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3890 for (unsigned II = 0; II <= MaxLevels; ++II) 3891 Constraints[II].setAny(SE); 3892 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3893 SmallBitVector Group(Pair[SI].Group); 3894 SmallBitVector Sivs(Pairs); 3895 SmallBitVector Mivs(Pairs); 3896 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3897 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3898 if (Pair[SJ].Classification == Subscript::SIV) 3899 Sivs.set(SJ); 3900 else 3901 Mivs.set(SJ); 3902 } 3903 while (Sivs.any()) { 3904 bool Changed = false; 3905 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3906 // SJ is an SIV subscript that's part of the current coupled group 3907 unsigned Level; 3908 const SCEV *SplitIter = nullptr; 3909 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3910 Result, NewConstraint, SplitIter); 3911 if (Level == SplitLevel && SplitIter) 3912 return SplitIter; 3913 ConstrainedLevels.set(Level); 3914 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 3915 Changed = true; 3916 Sivs.reset(SJ); 3917 } 3918 if (Changed) { 3919 // propagate, possibly creating new SIVs and ZIVs 3920 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3921 // SJ is an MIV subscript that's part of the current coupled group 3922 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 3923 Pair[SJ].Loops, Constraints, Result.Consistent)) { 3924 Pair[SJ].Classification = 3925 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3926 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3927 Pair[SJ].Loops); 3928 switch (Pair[SJ].Classification) { 3929 case Subscript::ZIV: 3930 Mivs.reset(SJ); 3931 break; 3932 case Subscript::SIV: 3933 Sivs.set(SJ); 3934 Mivs.reset(SJ); 3935 break; 3936 case Subscript::RDIV: 3937 case Subscript::MIV: 3938 break; 3939 default: 3940 llvm_unreachable("bad subscript classification"); 3941 } 3942 } 3943 } 3944 } 3945 } 3946 } 3947 } 3948 llvm_unreachable("somehow reached end of routine"); 3949 return nullptr; 3950 } 3951