1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 11 // accesses. Currently, it is an (incomplete) implementation of the approach 12 // described in 13 // 14 // Practical Dependence Testing 15 // Goff, Kennedy, Tseng 16 // PLDI 1991 17 // 18 // There's a single entry point that analyzes the dependence between a pair 19 // of memory references in a function, returning either NULL, for no dependence, 20 // or a more-or-less detailed description of the dependence between them. 21 // 22 // Currently, the implementation cannot propagate constraints between 23 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 24 // Both of these are conservative weaknesses; 25 // that is, not a source of correctness problems. 26 // 27 // The implementation depends on the GEP instruction to differentiate 28 // subscripts. Since Clang linearizes some array subscripts, the dependence 29 // analysis is using SCEV->delinearize to recover the representation of multiple 30 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 31 // delinearization is controlled by the flag -da-delinearize. 32 // 33 // We should pay some careful attention to the possibility of integer overflow 34 // in the implementation of the various tests. This could happen with Add, 35 // Subtract, or Multiply, with both APInt's and SCEV's. 36 // 37 // Some non-linear subscript pairs can be handled by the GCD test 38 // (and perhaps other tests). 39 // Should explore how often these things occur. 40 // 41 // Finally, it seems like certain test cases expose weaknesses in the SCEV 42 // simplification, especially in the handling of sign and zero extensions. 43 // It could be useful to spend time exploring these. 44 // 45 // Please note that this is work in progress and the interface is subject to 46 // change. 47 // 48 //===----------------------------------------------------------------------===// 49 // // 50 // In memory of Ken Kennedy, 1945 - 2007 // 51 // // 52 //===----------------------------------------------------------------------===// 53 54 #include "llvm/Analysis/DependenceAnalysis.h" 55 #include "llvm/ADT/STLExtras.h" 56 #include "llvm/ADT/Statistic.h" 57 #include "llvm/Analysis/AliasAnalysis.h" 58 #include "llvm/Analysis/LoopInfo.h" 59 #include "llvm/Analysis/ScalarEvolution.h" 60 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 61 #include "llvm/Analysis/ValueTracking.h" 62 #include "llvm/IR/InstIterator.h" 63 #include "llvm/IR/Module.h" 64 #include "llvm/IR/Operator.h" 65 #include "llvm/Support/CommandLine.h" 66 #include "llvm/Support/Debug.h" 67 #include "llvm/Support/ErrorHandling.h" 68 #include "llvm/Support/raw_ostream.h" 69 70 using namespace llvm; 71 72 #define DEBUG_TYPE "da" 73 74 //===----------------------------------------------------------------------===// 75 // statistics 76 77 STATISTIC(TotalArrayPairs, "Array pairs tested"); 78 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 79 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 80 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 81 STATISTIC(ZIVapplications, "ZIV applications"); 82 STATISTIC(ZIVindependence, "ZIV independence"); 83 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 84 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 85 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 86 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 87 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 88 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 89 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 90 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 91 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 92 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 93 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 94 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 95 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 96 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 97 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 98 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 99 STATISTIC(DeltaApplications, "Delta applications"); 100 STATISTIC(DeltaSuccesses, "Delta successes"); 101 STATISTIC(DeltaIndependence, "Delta independence"); 102 STATISTIC(DeltaPropagations, "Delta propagations"); 103 STATISTIC(GCDapplications, "GCD applications"); 104 STATISTIC(GCDsuccesses, "GCD successes"); 105 STATISTIC(GCDindependence, "GCD independence"); 106 STATISTIC(BanerjeeApplications, "Banerjee applications"); 107 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 108 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 109 110 static cl::opt<bool> 111 Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore, 112 cl::desc("Try to delinearize array references.")); 113 114 //===----------------------------------------------------------------------===// 115 // basics 116 117 DependenceAnalysis::Result 118 DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 119 auto &AA = FAM.getResult<AAManager>(F); 120 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 121 auto &LI = FAM.getResult<LoopAnalysis>(F); 122 return DependenceInfo(&F, &AA, &SE, &LI); 123 } 124 125 char DependenceAnalysis::PassID; 126 127 INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da", 128 "Dependence Analysis", true, true) 129 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 130 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 131 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 132 INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis", 133 true, true) 134 135 char DependenceAnalysisWrapperPass::ID = 0; 136 137 FunctionPass *llvm::createDependenceAnalysisWrapperPass() { 138 return new DependenceAnalysisWrapperPass(); 139 } 140 141 bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) { 142 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 143 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 144 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 145 info.reset(new DependenceInfo(&F, &AA, &SE, &LI)); 146 return false; 147 } 148 149 DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; } 150 151 void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); } 152 153 void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 154 AU.setPreservesAll(); 155 AU.addRequiredTransitive<AAResultsWrapperPass>(); 156 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 157 AU.addRequiredTransitive<LoopInfoWrapperPass>(); 158 } 159 160 161 // Used to test the dependence analyzer. 162 // Looks through the function, noting loads and stores. 163 // Calls depends() on every possible pair and prints out the result. 164 // Ignores all other instructions. 165 static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA) { 166 auto *F = DA->getFunction(); 167 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE; 168 ++SrcI) { 169 if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) { 170 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 171 DstI != DstE; ++DstI) { 172 if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) { 173 OS << "da analyze - "; 174 if (auto D = DA->depends(&*SrcI, &*DstI, true)) { 175 D->dump(OS); 176 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 177 if (D->isSplitable(Level)) { 178 OS << "da analyze - split level = " << Level; 179 OS << ", iteration = " << *DA->getSplitIteration(*D, Level); 180 OS << "!\n"; 181 } 182 } 183 } 184 else 185 OS << "none!\n"; 186 } 187 } 188 } 189 } 190 } 191 192 void DependenceAnalysisWrapperPass::print(raw_ostream &OS, 193 const Module *) const { 194 dumpExampleDependence(OS, info.get()); 195 } 196 197 //===----------------------------------------------------------------------===// 198 // Dependence methods 199 200 // Returns true if this is an input dependence. 201 bool Dependence::isInput() const { 202 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 203 } 204 205 206 // Returns true if this is an output dependence. 207 bool Dependence::isOutput() const { 208 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 209 } 210 211 212 // Returns true if this is an flow (aka true) dependence. 213 bool Dependence::isFlow() const { 214 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 215 } 216 217 218 // Returns true if this is an anti dependence. 219 bool Dependence::isAnti() const { 220 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 221 } 222 223 224 // Returns true if a particular level is scalar; that is, 225 // if no subscript in the source or destination mention the induction 226 // variable associated with the loop at this level. 227 // Leave this out of line, so it will serve as a virtual method anchor 228 bool Dependence::isScalar(unsigned level) const { 229 return false; 230 } 231 232 233 //===----------------------------------------------------------------------===// 234 // FullDependence methods 235 236 FullDependence::FullDependence(Instruction *Source, Instruction *Destination, 237 bool PossiblyLoopIndependent, 238 unsigned CommonLevels) 239 : Dependence(Source, Destination), Levels(CommonLevels), 240 LoopIndependent(PossiblyLoopIndependent) { 241 Consistent = true; 242 if (CommonLevels) 243 DV = make_unique<DVEntry[]>(CommonLevels); 244 } 245 246 // The rest are simple getters that hide the implementation. 247 248 // getDirection - Returns the direction associated with a particular level. 249 unsigned FullDependence::getDirection(unsigned Level) const { 250 assert(0 < Level && Level <= Levels && "Level out of range"); 251 return DV[Level - 1].Direction; 252 } 253 254 255 // Returns the distance (or NULL) associated with a particular level. 256 const SCEV *FullDependence::getDistance(unsigned Level) const { 257 assert(0 < Level && Level <= Levels && "Level out of range"); 258 return DV[Level - 1].Distance; 259 } 260 261 262 // Returns true if a particular level is scalar; that is, 263 // if no subscript in the source or destination mention the induction 264 // variable associated with the loop at this level. 265 bool FullDependence::isScalar(unsigned Level) const { 266 assert(0 < Level && Level <= Levels && "Level out of range"); 267 return DV[Level - 1].Scalar; 268 } 269 270 271 // Returns true if peeling the first iteration from this loop 272 // will break this dependence. 273 bool FullDependence::isPeelFirst(unsigned Level) const { 274 assert(0 < Level && Level <= Levels && "Level out of range"); 275 return DV[Level - 1].PeelFirst; 276 } 277 278 279 // Returns true if peeling the last iteration from this loop 280 // will break this dependence. 281 bool FullDependence::isPeelLast(unsigned Level) const { 282 assert(0 < Level && Level <= Levels && "Level out of range"); 283 return DV[Level - 1].PeelLast; 284 } 285 286 287 // Returns true if splitting this loop will break the dependence. 288 bool FullDependence::isSplitable(unsigned Level) const { 289 assert(0 < Level && Level <= Levels && "Level out of range"); 290 return DV[Level - 1].Splitable; 291 } 292 293 294 //===----------------------------------------------------------------------===// 295 // DependenceInfo::Constraint methods 296 297 // If constraint is a point <X, Y>, returns X. 298 // Otherwise assert. 299 const SCEV *DependenceInfo::Constraint::getX() const { 300 assert(Kind == Point && "Kind should be Point"); 301 return A; 302 } 303 304 305 // If constraint is a point <X, Y>, returns Y. 306 // Otherwise assert. 307 const SCEV *DependenceInfo::Constraint::getY() const { 308 assert(Kind == Point && "Kind should be Point"); 309 return B; 310 } 311 312 313 // If constraint is a line AX + BY = C, returns A. 314 // Otherwise assert. 315 const SCEV *DependenceInfo::Constraint::getA() const { 316 assert((Kind == Line || Kind == Distance) && 317 "Kind should be Line (or Distance)"); 318 return A; 319 } 320 321 322 // If constraint is a line AX + BY = C, returns B. 323 // Otherwise assert. 324 const SCEV *DependenceInfo::Constraint::getB() const { 325 assert((Kind == Line || Kind == Distance) && 326 "Kind should be Line (or Distance)"); 327 return B; 328 } 329 330 331 // If constraint is a line AX + BY = C, returns C. 332 // Otherwise assert. 333 const SCEV *DependenceInfo::Constraint::getC() const { 334 assert((Kind == Line || Kind == Distance) && 335 "Kind should be Line (or Distance)"); 336 return C; 337 } 338 339 340 // If constraint is a distance, returns D. 341 // Otherwise assert. 342 const SCEV *DependenceInfo::Constraint::getD() const { 343 assert(Kind == Distance && "Kind should be Distance"); 344 return SE->getNegativeSCEV(C); 345 } 346 347 348 // Returns the loop associated with this constraint. 349 const Loop *DependenceInfo::Constraint::getAssociatedLoop() const { 350 assert((Kind == Distance || Kind == Line || Kind == Point) && 351 "Kind should be Distance, Line, or Point"); 352 return AssociatedLoop; 353 } 354 355 void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y, 356 const Loop *CurLoop) { 357 Kind = Point; 358 A = X; 359 B = Y; 360 AssociatedLoop = CurLoop; 361 } 362 363 void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB, 364 const SCEV *CC, const Loop *CurLoop) { 365 Kind = Line; 366 A = AA; 367 B = BB; 368 C = CC; 369 AssociatedLoop = CurLoop; 370 } 371 372 void DependenceInfo::Constraint::setDistance(const SCEV *D, 373 const Loop *CurLoop) { 374 Kind = Distance; 375 A = SE->getOne(D->getType()); 376 B = SE->getNegativeSCEV(A); 377 C = SE->getNegativeSCEV(D); 378 AssociatedLoop = CurLoop; 379 } 380 381 void DependenceInfo::Constraint::setEmpty() { Kind = Empty; } 382 383 void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) { 384 SE = NewSE; 385 Kind = Any; 386 } 387 388 389 // For debugging purposes. Dumps the constraint out to OS. 390 void DependenceInfo::Constraint::dump(raw_ostream &OS) const { 391 if (isEmpty()) 392 OS << " Empty\n"; 393 else if (isAny()) 394 OS << " Any\n"; 395 else if (isPoint()) 396 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 397 else if (isDistance()) 398 OS << " Distance is " << *getD() << 399 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 400 else if (isLine()) 401 OS << " Line is " << *getA() << "*X + " << 402 *getB() << "*Y = " << *getC() << "\n"; 403 else 404 llvm_unreachable("unknown constraint type in Constraint::dump"); 405 } 406 407 408 // Updates X with the intersection 409 // of the Constraints X and Y. Returns true if X has changed. 410 // Corresponds to Figure 4 from the paper 411 // 412 // Practical Dependence Testing 413 // Goff, Kennedy, Tseng 414 // PLDI 1991 415 bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { 416 ++DeltaApplications; 417 DEBUG(dbgs() << "\tintersect constraints\n"); 418 DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 419 DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 420 assert(!Y->isPoint() && "Y must not be a Point"); 421 if (X->isAny()) { 422 if (Y->isAny()) 423 return false; 424 *X = *Y; 425 return true; 426 } 427 if (X->isEmpty()) 428 return false; 429 if (Y->isEmpty()) { 430 X->setEmpty(); 431 return true; 432 } 433 434 if (X->isDistance() && Y->isDistance()) { 435 DEBUG(dbgs() << "\t intersect 2 distances\n"); 436 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 437 return false; 438 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 439 X->setEmpty(); 440 ++DeltaSuccesses; 441 return true; 442 } 443 // Hmmm, interesting situation. 444 // I guess if either is constant, keep it and ignore the other. 445 if (isa<SCEVConstant>(Y->getD())) { 446 *X = *Y; 447 return true; 448 } 449 return false; 450 } 451 452 // At this point, the pseudo-code in Figure 4 of the paper 453 // checks if (X->isPoint() && Y->isPoint()). 454 // This case can't occur in our implementation, 455 // since a Point can only arise as the result of intersecting 456 // two Line constraints, and the right-hand value, Y, is never 457 // the result of an intersection. 458 assert(!(X->isPoint() && Y->isPoint()) && 459 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 460 461 if (X->isLine() && Y->isLine()) { 462 DEBUG(dbgs() << "\t intersect 2 lines\n"); 463 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 464 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 465 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 466 // slopes are equal, so lines are parallel 467 DEBUG(dbgs() << "\t\tsame slope\n"); 468 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 469 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 470 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 471 return false; 472 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 473 X->setEmpty(); 474 ++DeltaSuccesses; 475 return true; 476 } 477 return false; 478 } 479 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 480 // slopes differ, so lines intersect 481 DEBUG(dbgs() << "\t\tdifferent slopes\n"); 482 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 483 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 484 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 485 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 486 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 487 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 488 const SCEVConstant *C1A2_C2A1 = 489 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 490 const SCEVConstant *C1B2_C2B1 = 491 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 492 const SCEVConstant *A1B2_A2B1 = 493 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 494 const SCEVConstant *A2B1_A1B2 = 495 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 496 if (!C1B2_C2B1 || !C1A2_C2A1 || 497 !A1B2_A2B1 || !A2B1_A1B2) 498 return false; 499 APInt Xtop = C1B2_C2B1->getAPInt(); 500 APInt Xbot = A1B2_A2B1->getAPInt(); 501 APInt Ytop = C1A2_C2A1->getAPInt(); 502 APInt Ybot = A2B1_A1B2->getAPInt(); 503 DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 504 DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 505 DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 506 DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 507 APInt Xq = Xtop; // these need to be initialized, even 508 APInt Xr = Xtop; // though they're just going to be overwritten 509 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 510 APInt Yq = Ytop; 511 APInt Yr = Ytop; 512 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 513 if (Xr != 0 || Yr != 0) { 514 X->setEmpty(); 515 ++DeltaSuccesses; 516 return true; 517 } 518 DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 519 if (Xq.slt(0) || Yq.slt(0)) { 520 X->setEmpty(); 521 ++DeltaSuccesses; 522 return true; 523 } 524 if (const SCEVConstant *CUB = 525 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 526 const APInt &UpperBound = CUB->getAPInt(); 527 DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 528 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 529 X->setEmpty(); 530 ++DeltaSuccesses; 531 return true; 532 } 533 } 534 X->setPoint(SE->getConstant(Xq), 535 SE->getConstant(Yq), 536 X->getAssociatedLoop()); 537 ++DeltaSuccesses; 538 return true; 539 } 540 return false; 541 } 542 543 // if (X->isLine() && Y->isPoint()) This case can't occur. 544 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 545 546 if (X->isPoint() && Y->isLine()) { 547 DEBUG(dbgs() << "\t intersect Point and Line\n"); 548 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 549 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 550 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 551 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 552 return false; 553 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 554 X->setEmpty(); 555 ++DeltaSuccesses; 556 return true; 557 } 558 return false; 559 } 560 561 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 562 return false; 563 } 564 565 566 //===----------------------------------------------------------------------===// 567 // DependenceInfo methods 568 569 // For debugging purposes. Dumps a dependence to OS. 570 void Dependence::dump(raw_ostream &OS) const { 571 bool Splitable = false; 572 if (isConfused()) 573 OS << "confused"; 574 else { 575 if (isConsistent()) 576 OS << "consistent "; 577 if (isFlow()) 578 OS << "flow"; 579 else if (isOutput()) 580 OS << "output"; 581 else if (isAnti()) 582 OS << "anti"; 583 else if (isInput()) 584 OS << "input"; 585 unsigned Levels = getLevels(); 586 OS << " ["; 587 for (unsigned II = 1; II <= Levels; ++II) { 588 if (isSplitable(II)) 589 Splitable = true; 590 if (isPeelFirst(II)) 591 OS << 'p'; 592 const SCEV *Distance = getDistance(II); 593 if (Distance) 594 OS << *Distance; 595 else if (isScalar(II)) 596 OS << "S"; 597 else { 598 unsigned Direction = getDirection(II); 599 if (Direction == DVEntry::ALL) 600 OS << "*"; 601 else { 602 if (Direction & DVEntry::LT) 603 OS << "<"; 604 if (Direction & DVEntry::EQ) 605 OS << "="; 606 if (Direction & DVEntry::GT) 607 OS << ">"; 608 } 609 } 610 if (isPeelLast(II)) 611 OS << 'p'; 612 if (II < Levels) 613 OS << " "; 614 } 615 if (isLoopIndependent()) 616 OS << "|<"; 617 OS << "]"; 618 if (Splitable) 619 OS << " splitable"; 620 } 621 OS << "!\n"; 622 } 623 624 static AliasResult underlyingObjectsAlias(AliasAnalysis *AA, 625 const DataLayout &DL, const Value *A, 626 const Value *B) { 627 const Value *AObj = GetUnderlyingObject(A, DL); 628 const Value *BObj = GetUnderlyingObject(B, DL); 629 return AA->alias(AObj, DL.getTypeStoreSize(AObj->getType()), 630 BObj, DL.getTypeStoreSize(BObj->getType())); 631 } 632 633 634 // Returns true if the load or store can be analyzed. Atomic and volatile 635 // operations have properties which this analysis does not understand. 636 static 637 bool isLoadOrStore(const Instruction *I) { 638 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 639 return LI->isUnordered(); 640 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 641 return SI->isUnordered(); 642 return false; 643 } 644 645 646 static 647 Value *getPointerOperand(Instruction *I) { 648 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 649 return LI->getPointerOperand(); 650 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 651 return SI->getPointerOperand(); 652 llvm_unreachable("Value is not load or store instruction"); 653 return nullptr; 654 } 655 656 657 // Examines the loop nesting of the Src and Dst 658 // instructions and establishes their shared loops. Sets the variables 659 // CommonLevels, SrcLevels, and MaxLevels. 660 // The source and destination instructions needn't be contained in the same 661 // loop. The routine establishNestingLevels finds the level of most deeply 662 // nested loop that contains them both, CommonLevels. An instruction that's 663 // not contained in a loop is at level = 0. MaxLevels is equal to the level 664 // of the source plus the level of the destination, minus CommonLevels. 665 // This lets us allocate vectors MaxLevels in length, with room for every 666 // distinct loop referenced in both the source and destination subscripts. 667 // The variable SrcLevels is the nesting depth of the source instruction. 668 // It's used to help calculate distinct loops referenced by the destination. 669 // Here's the map from loops to levels: 670 // 0 - unused 671 // 1 - outermost common loop 672 // ... - other common loops 673 // CommonLevels - innermost common loop 674 // ... - loops containing Src but not Dst 675 // SrcLevels - innermost loop containing Src but not Dst 676 // ... - loops containing Dst but not Src 677 // MaxLevels - innermost loops containing Dst but not Src 678 // Consider the follow code fragment: 679 // for (a = ...) { 680 // for (b = ...) { 681 // for (c = ...) { 682 // for (d = ...) { 683 // A[] = ...; 684 // } 685 // } 686 // for (e = ...) { 687 // for (f = ...) { 688 // for (g = ...) { 689 // ... = A[]; 690 // } 691 // } 692 // } 693 // } 694 // } 695 // If we're looking at the possibility of a dependence between the store 696 // to A (the Src) and the load from A (the Dst), we'll note that they 697 // have 2 loops in common, so CommonLevels will equal 2 and the direction 698 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 699 // A map from loop names to loop numbers would look like 700 // a - 1 701 // b - 2 = CommonLevels 702 // c - 3 703 // d - 4 = SrcLevels 704 // e - 5 705 // f - 6 706 // g - 7 = MaxLevels 707 void DependenceInfo::establishNestingLevels(const Instruction *Src, 708 const Instruction *Dst) { 709 const BasicBlock *SrcBlock = Src->getParent(); 710 const BasicBlock *DstBlock = Dst->getParent(); 711 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 712 unsigned DstLevel = LI->getLoopDepth(DstBlock); 713 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 714 const Loop *DstLoop = LI->getLoopFor(DstBlock); 715 SrcLevels = SrcLevel; 716 MaxLevels = SrcLevel + DstLevel; 717 while (SrcLevel > DstLevel) { 718 SrcLoop = SrcLoop->getParentLoop(); 719 SrcLevel--; 720 } 721 while (DstLevel > SrcLevel) { 722 DstLoop = DstLoop->getParentLoop(); 723 DstLevel--; 724 } 725 while (SrcLoop != DstLoop) { 726 SrcLoop = SrcLoop->getParentLoop(); 727 DstLoop = DstLoop->getParentLoop(); 728 SrcLevel--; 729 } 730 CommonLevels = SrcLevel; 731 MaxLevels -= CommonLevels; 732 } 733 734 735 // Given one of the loops containing the source, return 736 // its level index in our numbering scheme. 737 unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const { 738 return SrcLoop->getLoopDepth(); 739 } 740 741 742 // Given one of the loops containing the destination, 743 // return its level index in our numbering scheme. 744 unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const { 745 unsigned D = DstLoop->getLoopDepth(); 746 if (D > CommonLevels) 747 return D - CommonLevels + SrcLevels; 748 else 749 return D; 750 } 751 752 753 // Returns true if Expression is loop invariant in LoopNest. 754 bool DependenceInfo::isLoopInvariant(const SCEV *Expression, 755 const Loop *LoopNest) const { 756 if (!LoopNest) 757 return true; 758 return SE->isLoopInvariant(Expression, LoopNest) && 759 isLoopInvariant(Expression, LoopNest->getParentLoop()); 760 } 761 762 763 764 // Finds the set of loops from the LoopNest that 765 // have a level <= CommonLevels and are referred to by the SCEV Expression. 766 void DependenceInfo::collectCommonLoops(const SCEV *Expression, 767 const Loop *LoopNest, 768 SmallBitVector &Loops) const { 769 while (LoopNest) { 770 unsigned Level = LoopNest->getLoopDepth(); 771 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 772 Loops.set(Level); 773 LoopNest = LoopNest->getParentLoop(); 774 } 775 } 776 777 void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) { 778 779 unsigned widestWidthSeen = 0; 780 Type *widestType; 781 782 // Go through each pair and find the widest bit to which we need 783 // to extend all of them. 784 for (Subscript *Pair : Pairs) { 785 const SCEV *Src = Pair->Src; 786 const SCEV *Dst = Pair->Dst; 787 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 788 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 789 if (SrcTy == nullptr || DstTy == nullptr) { 790 assert(SrcTy == DstTy && "This function only unify integer types and " 791 "expect Src and Dst share the same type " 792 "otherwise."); 793 continue; 794 } 795 if (SrcTy->getBitWidth() > widestWidthSeen) { 796 widestWidthSeen = SrcTy->getBitWidth(); 797 widestType = SrcTy; 798 } 799 if (DstTy->getBitWidth() > widestWidthSeen) { 800 widestWidthSeen = DstTy->getBitWidth(); 801 widestType = DstTy; 802 } 803 } 804 805 806 assert(widestWidthSeen > 0); 807 808 // Now extend each pair to the widest seen. 809 for (Subscript *Pair : Pairs) { 810 const SCEV *Src = Pair->Src; 811 const SCEV *Dst = Pair->Dst; 812 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 813 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 814 if (SrcTy == nullptr || DstTy == nullptr) { 815 assert(SrcTy == DstTy && "This function only unify integer types and " 816 "expect Src and Dst share the same type " 817 "otherwise."); 818 continue; 819 } 820 if (SrcTy->getBitWidth() < widestWidthSeen) 821 // Sign-extend Src to widestType 822 Pair->Src = SE->getSignExtendExpr(Src, widestType); 823 if (DstTy->getBitWidth() < widestWidthSeen) { 824 // Sign-extend Dst to widestType 825 Pair->Dst = SE->getSignExtendExpr(Dst, widestType); 826 } 827 } 828 } 829 830 // removeMatchingExtensions - Examines a subscript pair. 831 // If the source and destination are identically sign (or zero) 832 // extended, it strips off the extension in an effect to simplify 833 // the actual analysis. 834 void DependenceInfo::removeMatchingExtensions(Subscript *Pair) { 835 const SCEV *Src = Pair->Src; 836 const SCEV *Dst = Pair->Dst; 837 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 838 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 839 const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src); 840 const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst); 841 const SCEV *SrcCastOp = SrcCast->getOperand(); 842 const SCEV *DstCastOp = DstCast->getOperand(); 843 if (SrcCastOp->getType() == DstCastOp->getType()) { 844 Pair->Src = SrcCastOp; 845 Pair->Dst = DstCastOp; 846 } 847 } 848 } 849 850 851 // Examine the scev and return true iff it's linear. 852 // Collect any loops mentioned in the set of "Loops". 853 bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest, 854 SmallBitVector &Loops) { 855 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src); 856 if (!AddRec) 857 return isLoopInvariant(Src, LoopNest); 858 const SCEV *Start = AddRec->getStart(); 859 const SCEV *Step = AddRec->getStepRecurrence(*SE); 860 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 861 if (!isa<SCEVCouldNotCompute>(UB)) { 862 if (SE->getTypeSizeInBits(Start->getType()) < 863 SE->getTypeSizeInBits(UB->getType())) { 864 if (!AddRec->getNoWrapFlags()) 865 return false; 866 } 867 } 868 if (!isLoopInvariant(Step, LoopNest)) 869 return false; 870 Loops.set(mapSrcLoop(AddRec->getLoop())); 871 return checkSrcSubscript(Start, LoopNest, Loops); 872 } 873 874 875 876 // Examine the scev and return true iff it's linear. 877 // Collect any loops mentioned in the set of "Loops". 878 bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest, 879 SmallBitVector &Loops) { 880 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst); 881 if (!AddRec) 882 return isLoopInvariant(Dst, LoopNest); 883 const SCEV *Start = AddRec->getStart(); 884 const SCEV *Step = AddRec->getStepRecurrence(*SE); 885 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 886 if (!isa<SCEVCouldNotCompute>(UB)) { 887 if (SE->getTypeSizeInBits(Start->getType()) < 888 SE->getTypeSizeInBits(UB->getType())) { 889 if (!AddRec->getNoWrapFlags()) 890 return false; 891 } 892 } 893 if (!isLoopInvariant(Step, LoopNest)) 894 return false; 895 Loops.set(mapDstLoop(AddRec->getLoop())); 896 return checkDstSubscript(Start, LoopNest, Loops); 897 } 898 899 900 // Examines the subscript pair (the Src and Dst SCEVs) 901 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 902 // Collects the associated loops in a set. 903 DependenceInfo::Subscript::ClassificationKind 904 DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 905 const SCEV *Dst, const Loop *DstLoopNest, 906 SmallBitVector &Loops) { 907 SmallBitVector SrcLoops(MaxLevels + 1); 908 SmallBitVector DstLoops(MaxLevels + 1); 909 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 910 return Subscript::NonLinear; 911 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 912 return Subscript::NonLinear; 913 Loops = SrcLoops; 914 Loops |= DstLoops; 915 unsigned N = Loops.count(); 916 if (N == 0) 917 return Subscript::ZIV; 918 if (N == 1) 919 return Subscript::SIV; 920 if (N == 2 && (SrcLoops.count() == 0 || 921 DstLoops.count() == 0 || 922 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 923 return Subscript::RDIV; 924 return Subscript::MIV; 925 } 926 927 928 // A wrapper around SCEV::isKnownPredicate. 929 // Looks for cases where we're interested in comparing for equality. 930 // If both X and Y have been identically sign or zero extended, 931 // it strips off the (confusing) extensions before invoking 932 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 933 // will be similarly updated. 934 // 935 // If SCEV::isKnownPredicate can't prove the predicate, 936 // we try simple subtraction, which seems to help in some cases 937 // involving symbolics. 938 bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X, 939 const SCEV *Y) const { 940 if (Pred == CmpInst::ICMP_EQ || 941 Pred == CmpInst::ICMP_NE) { 942 if ((isa<SCEVSignExtendExpr>(X) && 943 isa<SCEVSignExtendExpr>(Y)) || 944 (isa<SCEVZeroExtendExpr>(X) && 945 isa<SCEVZeroExtendExpr>(Y))) { 946 const SCEVCastExpr *CX = cast<SCEVCastExpr>(X); 947 const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y); 948 const SCEV *Xop = CX->getOperand(); 949 const SCEV *Yop = CY->getOperand(); 950 if (Xop->getType() == Yop->getType()) { 951 X = Xop; 952 Y = Yop; 953 } 954 } 955 } 956 if (SE->isKnownPredicate(Pred, X, Y)) 957 return true; 958 // If SE->isKnownPredicate can't prove the condition, 959 // we try the brute-force approach of subtracting 960 // and testing the difference. 961 // By testing with SE->isKnownPredicate first, we avoid 962 // the possibility of overflow when the arguments are constants. 963 const SCEV *Delta = SE->getMinusSCEV(X, Y); 964 switch (Pred) { 965 case CmpInst::ICMP_EQ: 966 return Delta->isZero(); 967 case CmpInst::ICMP_NE: 968 return SE->isKnownNonZero(Delta); 969 case CmpInst::ICMP_SGE: 970 return SE->isKnownNonNegative(Delta); 971 case CmpInst::ICMP_SLE: 972 return SE->isKnownNonPositive(Delta); 973 case CmpInst::ICMP_SGT: 974 return SE->isKnownPositive(Delta); 975 case CmpInst::ICMP_SLT: 976 return SE->isKnownNegative(Delta); 977 default: 978 llvm_unreachable("unexpected predicate in isKnownPredicate"); 979 } 980 } 981 982 983 // All subscripts are all the same type. 984 // Loop bound may be smaller (e.g., a char). 985 // Should zero extend loop bound, since it's always >= 0. 986 // This routine collects upper bound and extends or truncates if needed. 987 // Truncating is safe when subscripts are known not to wrap. Cases without 988 // nowrap flags should have been rejected earlier. 989 // Return null if no bound available. 990 const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const { 991 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 992 const SCEV *UB = SE->getBackedgeTakenCount(L); 993 return SE->getTruncateOrZeroExtend(UB, T); 994 } 995 return nullptr; 996 } 997 998 999 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 1000 // If the cast fails, returns NULL. 1001 const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L, 1002 Type *T) const { 1003 if (const SCEV *UB = collectUpperBound(L, T)) 1004 return dyn_cast<SCEVConstant>(UB); 1005 return nullptr; 1006 } 1007 1008 1009 // testZIV - 1010 // When we have a pair of subscripts of the form [c1] and [c2], 1011 // where c1 and c2 are both loop invariant, we attack it using 1012 // the ZIV test. Basically, we test by comparing the two values, 1013 // but there are actually three possible results: 1014 // 1) the values are equal, so there's a dependence 1015 // 2) the values are different, so there's no dependence 1016 // 3) the values might be equal, so we have to assume a dependence. 1017 // 1018 // Return true if dependence disproved. 1019 bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst, 1020 FullDependence &Result) const { 1021 DEBUG(dbgs() << " src = " << *Src << "\n"); 1022 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 1023 ++ZIVapplications; 1024 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 1025 DEBUG(dbgs() << " provably dependent\n"); 1026 return false; // provably dependent 1027 } 1028 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 1029 DEBUG(dbgs() << " provably independent\n"); 1030 ++ZIVindependence; 1031 return true; // provably independent 1032 } 1033 DEBUG(dbgs() << " possibly dependent\n"); 1034 Result.Consistent = false; 1035 return false; // possibly dependent 1036 } 1037 1038 1039 // strongSIVtest - 1040 // From the paper, Practical Dependence Testing, Section 4.2.1 1041 // 1042 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 1043 // where i is an induction variable, c1 and c2 are loop invariant, 1044 // and a is a constant, we can solve it exactly using the Strong SIV test. 1045 // 1046 // Can prove independence. Failing that, can compute distance (and direction). 1047 // In the presence of symbolic terms, we can sometimes make progress. 1048 // 1049 // If there's a dependence, 1050 // 1051 // c1 + a*i = c2 + a*i' 1052 // 1053 // The dependence distance is 1054 // 1055 // d = i' - i = (c1 - c2)/a 1056 // 1057 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1058 // loop's upper bound. If a dependence exists, the dependence direction is 1059 // defined as 1060 // 1061 // { < if d > 0 1062 // direction = { = if d = 0 1063 // { > if d < 0 1064 // 1065 // Return true if dependence disproved. 1066 bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, 1067 const SCEV *DstConst, const Loop *CurLoop, 1068 unsigned Level, FullDependence &Result, 1069 Constraint &NewConstraint) const { 1070 DEBUG(dbgs() << "\tStrong SIV test\n"); 1071 DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1072 DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1073 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1074 DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1075 DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1076 DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1077 ++StrongSIVapplications; 1078 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1079 Level--; 1080 1081 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1082 DEBUG(dbgs() << "\t Delta = " << *Delta); 1083 DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1084 1085 // check that |Delta| < iteration count 1086 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1087 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1088 DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1089 const SCEV *AbsDelta = 1090 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1091 const SCEV *AbsCoeff = 1092 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1093 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1094 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1095 // Distance greater than trip count - no dependence 1096 ++StrongSIVindependence; 1097 ++StrongSIVsuccesses; 1098 return true; 1099 } 1100 } 1101 1102 // Can we compute distance? 1103 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1104 APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt(); 1105 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt(); 1106 APInt Distance = ConstDelta; // these need to be initialized 1107 APInt Remainder = ConstDelta; 1108 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1109 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1110 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1111 // Make sure Coeff divides Delta exactly 1112 if (Remainder != 0) { 1113 // Coeff doesn't divide Distance, no dependence 1114 ++StrongSIVindependence; 1115 ++StrongSIVsuccesses; 1116 return true; 1117 } 1118 Result.DV[Level].Distance = SE->getConstant(Distance); 1119 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1120 if (Distance.sgt(0)) 1121 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1122 else if (Distance.slt(0)) 1123 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1124 else 1125 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1126 ++StrongSIVsuccesses; 1127 } 1128 else if (Delta->isZero()) { 1129 // since 0/X == 0 1130 Result.DV[Level].Distance = Delta; 1131 NewConstraint.setDistance(Delta, CurLoop); 1132 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1133 ++StrongSIVsuccesses; 1134 } 1135 else { 1136 if (Coeff->isOne()) { 1137 DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1138 Result.DV[Level].Distance = Delta; // since X/1 == X 1139 NewConstraint.setDistance(Delta, CurLoop); 1140 } 1141 else { 1142 Result.Consistent = false; 1143 NewConstraint.setLine(Coeff, 1144 SE->getNegativeSCEV(Coeff), 1145 SE->getNegativeSCEV(Delta), CurLoop); 1146 } 1147 1148 // maybe we can get a useful direction 1149 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1150 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1151 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1152 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1153 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1154 // The double negatives above are confusing. 1155 // It helps to read !SE->isKnownNonZero(Delta) 1156 // as "Delta might be Zero" 1157 unsigned NewDirection = Dependence::DVEntry::NONE; 1158 if ((DeltaMaybePositive && CoeffMaybePositive) || 1159 (DeltaMaybeNegative && CoeffMaybeNegative)) 1160 NewDirection = Dependence::DVEntry::LT; 1161 if (DeltaMaybeZero) 1162 NewDirection |= Dependence::DVEntry::EQ; 1163 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1164 (DeltaMaybePositive && CoeffMaybeNegative)) 1165 NewDirection |= Dependence::DVEntry::GT; 1166 if (NewDirection < Result.DV[Level].Direction) 1167 ++StrongSIVsuccesses; 1168 Result.DV[Level].Direction &= NewDirection; 1169 } 1170 return false; 1171 } 1172 1173 1174 // weakCrossingSIVtest - 1175 // From the paper, Practical Dependence Testing, Section 4.2.2 1176 // 1177 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1178 // where i is an induction variable, c1 and c2 are loop invariant, 1179 // and a is a constant, we can solve it exactly using the 1180 // Weak-Crossing SIV test. 1181 // 1182 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1183 // the two lines, where i = i', yielding 1184 // 1185 // c1 + a*i = c2 - a*i 1186 // 2a*i = c2 - c1 1187 // i = (c2 - c1)/2a 1188 // 1189 // If i < 0, there is no dependence. 1190 // If i > upperbound, there is no dependence. 1191 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1192 // If i = upperbound, there's a dependence with distance = 0. 1193 // If i is integral, there's a dependence (all directions). 1194 // If the non-integer part = 1/2, there's a dependence (<> directions). 1195 // Otherwise, there's no dependence. 1196 // 1197 // Can prove independence. Failing that, 1198 // can sometimes refine the directions. 1199 // Can determine iteration for splitting. 1200 // 1201 // Return true if dependence disproved. 1202 bool DependenceInfo::weakCrossingSIVtest( 1203 const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst, 1204 const Loop *CurLoop, unsigned Level, FullDependence &Result, 1205 Constraint &NewConstraint, const SCEV *&SplitIter) const { 1206 DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1207 DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1208 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1209 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1210 ++WeakCrossingSIVapplications; 1211 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1212 Level--; 1213 Result.Consistent = false; 1214 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1215 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1216 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1217 if (Delta->isZero()) { 1218 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1219 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1220 ++WeakCrossingSIVsuccesses; 1221 if (!Result.DV[Level].Direction) { 1222 ++WeakCrossingSIVindependence; 1223 return true; 1224 } 1225 Result.DV[Level].Distance = Delta; // = 0 1226 return false; 1227 } 1228 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1229 if (!ConstCoeff) 1230 return false; 1231 1232 Result.DV[Level].Splitable = true; 1233 if (SE->isKnownNegative(ConstCoeff)) { 1234 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1235 assert(ConstCoeff && 1236 "dynamic cast of negative of ConstCoeff should yield constant"); 1237 Delta = SE->getNegativeSCEV(Delta); 1238 } 1239 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1240 1241 // compute SplitIter for use by DependenceInfo::getSplitIteration() 1242 SplitIter = SE->getUDivExpr( 1243 SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta), 1244 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff)); 1245 DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1246 1247 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1248 if (!ConstDelta) 1249 return false; 1250 1251 // We're certain that ConstCoeff > 0; therefore, 1252 // if Delta < 0, then no dependence. 1253 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1254 DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1255 if (SE->isKnownNegative(Delta)) { 1256 // No dependence, Delta < 0 1257 ++WeakCrossingSIVindependence; 1258 ++WeakCrossingSIVsuccesses; 1259 return true; 1260 } 1261 1262 // We're certain that Delta > 0 and ConstCoeff > 0. 1263 // Check Delta/(2*ConstCoeff) against upper loop bound 1264 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1265 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1266 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1267 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1268 ConstantTwo); 1269 DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1270 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1271 // Delta too big, no dependence 1272 ++WeakCrossingSIVindependence; 1273 ++WeakCrossingSIVsuccesses; 1274 return true; 1275 } 1276 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1277 // i = i' = UB 1278 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1279 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1280 ++WeakCrossingSIVsuccesses; 1281 if (!Result.DV[Level].Direction) { 1282 ++WeakCrossingSIVindependence; 1283 return true; 1284 } 1285 Result.DV[Level].Splitable = false; 1286 Result.DV[Level].Distance = SE->getZero(Delta->getType()); 1287 return false; 1288 } 1289 } 1290 1291 // check that Coeff divides Delta 1292 APInt APDelta = ConstDelta->getAPInt(); 1293 APInt APCoeff = ConstCoeff->getAPInt(); 1294 APInt Distance = APDelta; // these need to be initialzed 1295 APInt Remainder = APDelta; 1296 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1297 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1298 if (Remainder != 0) { 1299 // Coeff doesn't divide Delta, no dependence 1300 ++WeakCrossingSIVindependence; 1301 ++WeakCrossingSIVsuccesses; 1302 return true; 1303 } 1304 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1305 1306 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1307 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1308 Remainder = Distance.srem(Two); 1309 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1310 if (Remainder != 0) { 1311 // Equal direction isn't possible 1312 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1313 ++WeakCrossingSIVsuccesses; 1314 } 1315 return false; 1316 } 1317 1318 1319 // Kirch's algorithm, from 1320 // 1321 // Optimizing Supercompilers for Supercomputers 1322 // Michael Wolfe 1323 // MIT Press, 1989 1324 // 1325 // Program 2.1, page 29. 1326 // Computes the GCD of AM and BM. 1327 // Also finds a solution to the equation ax - by = gcd(a, b). 1328 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1329 static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM, 1330 const APInt &Delta, APInt &G, APInt &X, APInt &Y) { 1331 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1332 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1333 APInt G0 = AM.abs(); 1334 APInt G1 = BM.abs(); 1335 APInt Q = G0; // these need to be initialized 1336 APInt R = G0; 1337 APInt::sdivrem(G0, G1, Q, R); 1338 while (R != 0) { 1339 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1340 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1341 G0 = G1; G1 = R; 1342 APInt::sdivrem(G0, G1, Q, R); 1343 } 1344 G = G1; 1345 DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1346 X = AM.slt(0) ? -A1 : A1; 1347 Y = BM.slt(0) ? B1 : -B1; 1348 1349 // make sure gcd divides Delta 1350 R = Delta.srem(G); 1351 if (R != 0) 1352 return true; // gcd doesn't divide Delta, no dependence 1353 Q = Delta.sdiv(G); 1354 X *= Q; 1355 Y *= Q; 1356 return false; 1357 } 1358 1359 static APInt floorOfQuotient(const APInt &A, const APInt &B) { 1360 APInt Q = A; // these need to be initialized 1361 APInt R = A; 1362 APInt::sdivrem(A, B, Q, R); 1363 if (R == 0) 1364 return Q; 1365 if ((A.sgt(0) && B.sgt(0)) || 1366 (A.slt(0) && B.slt(0))) 1367 return Q; 1368 else 1369 return Q - 1; 1370 } 1371 1372 static APInt ceilingOfQuotient(const APInt &A, const APInt &B) { 1373 APInt Q = A; // these need to be initialized 1374 APInt R = A; 1375 APInt::sdivrem(A, B, Q, R); 1376 if (R == 0) 1377 return Q; 1378 if ((A.sgt(0) && B.sgt(0)) || 1379 (A.slt(0) && B.slt(0))) 1380 return Q + 1; 1381 else 1382 return Q; 1383 } 1384 1385 1386 static 1387 APInt maxAPInt(APInt A, APInt B) { 1388 return A.sgt(B) ? A : B; 1389 } 1390 1391 1392 static 1393 APInt minAPInt(APInt A, APInt B) { 1394 return A.slt(B) ? A : B; 1395 } 1396 1397 1398 // exactSIVtest - 1399 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1400 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1401 // and a2 are constant, we can solve it exactly using an algorithm developed 1402 // by Banerjee and Wolfe. See Section 2.5.3 in 1403 // 1404 // Optimizing Supercompilers for Supercomputers 1405 // Michael Wolfe 1406 // MIT Press, 1989 1407 // 1408 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1409 // so use them if possible. They're also a bit better with symbolics and, 1410 // in the case of the strong SIV test, can compute Distances. 1411 // 1412 // Return true if dependence disproved. 1413 bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1414 const SCEV *SrcConst, const SCEV *DstConst, 1415 const Loop *CurLoop, unsigned Level, 1416 FullDependence &Result, 1417 Constraint &NewConstraint) const { 1418 DEBUG(dbgs() << "\tExact SIV test\n"); 1419 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1420 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1421 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1422 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1423 ++ExactSIVapplications; 1424 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1425 Level--; 1426 Result.Consistent = false; 1427 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1428 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1429 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), 1430 Delta, CurLoop); 1431 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1432 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1433 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1434 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1435 return false; 1436 1437 // find gcd 1438 APInt G, X, Y; 1439 APInt AM = ConstSrcCoeff->getAPInt(); 1440 APInt BM = ConstDstCoeff->getAPInt(); 1441 unsigned Bits = AM.getBitWidth(); 1442 if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { 1443 // gcd doesn't divide Delta, no dependence 1444 ++ExactSIVindependence; 1445 ++ExactSIVsuccesses; 1446 return true; 1447 } 1448 1449 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1450 1451 // since SCEV construction normalizes, LM = 0 1452 APInt UM(Bits, 1, true); 1453 bool UMvalid = false; 1454 // UM is perhaps unavailable, let's check 1455 if (const SCEVConstant *CUB = 1456 collectConstantUpperBound(CurLoop, Delta->getType())) { 1457 UM = CUB->getAPInt(); 1458 DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1459 UMvalid = true; 1460 } 1461 1462 APInt TU(APInt::getSignedMaxValue(Bits)); 1463 APInt TL(APInt::getSignedMinValue(Bits)); 1464 1465 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1466 APInt TMUL = BM.sdiv(G); 1467 if (TMUL.sgt(0)) { 1468 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1469 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1470 if (UMvalid) { 1471 TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); 1472 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1473 } 1474 } 1475 else { 1476 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1477 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1478 if (UMvalid) { 1479 TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); 1480 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1481 } 1482 } 1483 1484 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1485 TMUL = AM.sdiv(G); 1486 if (TMUL.sgt(0)) { 1487 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1488 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1489 if (UMvalid) { 1490 TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); 1491 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1492 } 1493 } 1494 else { 1495 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1496 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1497 if (UMvalid) { 1498 TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); 1499 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1500 } 1501 } 1502 if (TL.sgt(TU)) { 1503 ++ExactSIVindependence; 1504 ++ExactSIVsuccesses; 1505 return true; 1506 } 1507 1508 // explore directions 1509 unsigned NewDirection = Dependence::DVEntry::NONE; 1510 1511 // less than 1512 APInt SaveTU(TU); // save these 1513 APInt SaveTL(TL); 1514 DEBUG(dbgs() << "\t exploring LT direction\n"); 1515 TMUL = AM - BM; 1516 if (TMUL.sgt(0)) { 1517 TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); 1518 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1519 } 1520 else { 1521 TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); 1522 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1523 } 1524 if (TL.sle(TU)) { 1525 NewDirection |= Dependence::DVEntry::LT; 1526 ++ExactSIVsuccesses; 1527 } 1528 1529 // equal 1530 TU = SaveTU; // restore 1531 TL = SaveTL; 1532 DEBUG(dbgs() << "\t exploring EQ direction\n"); 1533 if (TMUL.sgt(0)) { 1534 TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); 1535 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1536 } 1537 else { 1538 TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); 1539 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1540 } 1541 TMUL = BM - AM; 1542 if (TMUL.sgt(0)) { 1543 TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); 1544 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1545 } 1546 else { 1547 TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); 1548 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1549 } 1550 if (TL.sle(TU)) { 1551 NewDirection |= Dependence::DVEntry::EQ; 1552 ++ExactSIVsuccesses; 1553 } 1554 1555 // greater than 1556 TU = SaveTU; // restore 1557 TL = SaveTL; 1558 DEBUG(dbgs() << "\t exploring GT direction\n"); 1559 if (TMUL.sgt(0)) { 1560 TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); 1561 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1562 } 1563 else { 1564 TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); 1565 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1566 } 1567 if (TL.sle(TU)) { 1568 NewDirection |= Dependence::DVEntry::GT; 1569 ++ExactSIVsuccesses; 1570 } 1571 1572 // finished 1573 Result.DV[Level].Direction &= NewDirection; 1574 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1575 ++ExactSIVindependence; 1576 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1577 } 1578 1579 1580 1581 // Return true if the divisor evenly divides the dividend. 1582 static 1583 bool isRemainderZero(const SCEVConstant *Dividend, 1584 const SCEVConstant *Divisor) { 1585 const APInt &ConstDividend = Dividend->getAPInt(); 1586 const APInt &ConstDivisor = Divisor->getAPInt(); 1587 return ConstDividend.srem(ConstDivisor) == 0; 1588 } 1589 1590 1591 // weakZeroSrcSIVtest - 1592 // From the paper, Practical Dependence Testing, Section 4.2.2 1593 // 1594 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1595 // where i is an induction variable, c1 and c2 are loop invariant, 1596 // and a is a constant, we can solve it exactly using the 1597 // Weak-Zero SIV test. 1598 // 1599 // Given 1600 // 1601 // c1 = c2 + a*i 1602 // 1603 // we get 1604 // 1605 // (c1 - c2)/a = i 1606 // 1607 // If i is not an integer, there's no dependence. 1608 // If i < 0 or > UB, there's no dependence. 1609 // If i = 0, the direction is <= and peeling the 1610 // 1st iteration will break the dependence. 1611 // If i = UB, the direction is >= and peeling the 1612 // last iteration will break the dependence. 1613 // Otherwise, the direction is *. 1614 // 1615 // Can prove independence. Failing that, we can sometimes refine 1616 // the directions. Can sometimes show that first or last 1617 // iteration carries all the dependences (so worth peeling). 1618 // 1619 // (see also weakZeroDstSIVtest) 1620 // 1621 // Return true if dependence disproved. 1622 bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1623 const SCEV *SrcConst, 1624 const SCEV *DstConst, 1625 const Loop *CurLoop, unsigned Level, 1626 FullDependence &Result, 1627 Constraint &NewConstraint) const { 1628 // For the WeakSIV test, it's possible the loop isn't common to 1629 // the Src and Dst loops. If it isn't, then there's no need to 1630 // record a direction. 1631 DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1632 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1633 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1634 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1635 ++WeakZeroSIVapplications; 1636 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1637 Level--; 1638 Result.Consistent = false; 1639 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1640 NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta, 1641 CurLoop); 1642 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1643 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1644 if (Level < CommonLevels) { 1645 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1646 Result.DV[Level].PeelFirst = true; 1647 ++WeakZeroSIVsuccesses; 1648 } 1649 return false; // dependences caused by first iteration 1650 } 1651 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1652 if (!ConstCoeff) 1653 return false; 1654 const SCEV *AbsCoeff = 1655 SE->isKnownNegative(ConstCoeff) ? 1656 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1657 const SCEV *NewDelta = 1658 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1659 1660 // check that Delta/SrcCoeff < iteration count 1661 // really check NewDelta < count*AbsCoeff 1662 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1663 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1664 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1665 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1666 ++WeakZeroSIVindependence; 1667 ++WeakZeroSIVsuccesses; 1668 return true; 1669 } 1670 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1671 // dependences caused by last iteration 1672 if (Level < CommonLevels) { 1673 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1674 Result.DV[Level].PeelLast = true; 1675 ++WeakZeroSIVsuccesses; 1676 } 1677 return false; 1678 } 1679 } 1680 1681 // check that Delta/SrcCoeff >= 0 1682 // really check that NewDelta >= 0 1683 if (SE->isKnownNegative(NewDelta)) { 1684 // No dependence, newDelta < 0 1685 ++WeakZeroSIVindependence; 1686 ++WeakZeroSIVsuccesses; 1687 return true; 1688 } 1689 1690 // if SrcCoeff doesn't divide Delta, then no dependence 1691 if (isa<SCEVConstant>(Delta) && 1692 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1693 ++WeakZeroSIVindependence; 1694 ++WeakZeroSIVsuccesses; 1695 return true; 1696 } 1697 return false; 1698 } 1699 1700 1701 // weakZeroDstSIVtest - 1702 // From the paper, Practical Dependence Testing, Section 4.2.2 1703 // 1704 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1705 // where i is an induction variable, c1 and c2 are loop invariant, 1706 // and a is a constant, we can solve it exactly using the 1707 // Weak-Zero SIV test. 1708 // 1709 // Given 1710 // 1711 // c1 + a*i = c2 1712 // 1713 // we get 1714 // 1715 // i = (c2 - c1)/a 1716 // 1717 // If i is not an integer, there's no dependence. 1718 // If i < 0 or > UB, there's no dependence. 1719 // If i = 0, the direction is <= and peeling the 1720 // 1st iteration will break the dependence. 1721 // If i = UB, the direction is >= and peeling the 1722 // last iteration will break the dependence. 1723 // Otherwise, the direction is *. 1724 // 1725 // Can prove independence. Failing that, we can sometimes refine 1726 // the directions. Can sometimes show that first or last 1727 // iteration carries all the dependences (so worth peeling). 1728 // 1729 // (see also weakZeroSrcSIVtest) 1730 // 1731 // Return true if dependence disproved. 1732 bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1733 const SCEV *SrcConst, 1734 const SCEV *DstConst, 1735 const Loop *CurLoop, unsigned Level, 1736 FullDependence &Result, 1737 Constraint &NewConstraint) const { 1738 // For the WeakSIV test, it's possible the loop isn't common to the 1739 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1740 DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1741 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1742 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1743 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1744 ++WeakZeroSIVapplications; 1745 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1746 Level--; 1747 Result.Consistent = false; 1748 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1749 NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta, 1750 CurLoop); 1751 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1752 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1753 if (Level < CommonLevels) { 1754 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1755 Result.DV[Level].PeelFirst = true; 1756 ++WeakZeroSIVsuccesses; 1757 } 1758 return false; // dependences caused by first iteration 1759 } 1760 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1761 if (!ConstCoeff) 1762 return false; 1763 const SCEV *AbsCoeff = 1764 SE->isKnownNegative(ConstCoeff) ? 1765 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1766 const SCEV *NewDelta = 1767 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1768 1769 // check that Delta/SrcCoeff < iteration count 1770 // really check NewDelta < count*AbsCoeff 1771 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1772 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1773 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1774 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1775 ++WeakZeroSIVindependence; 1776 ++WeakZeroSIVsuccesses; 1777 return true; 1778 } 1779 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1780 // dependences caused by last iteration 1781 if (Level < CommonLevels) { 1782 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1783 Result.DV[Level].PeelLast = true; 1784 ++WeakZeroSIVsuccesses; 1785 } 1786 return false; 1787 } 1788 } 1789 1790 // check that Delta/SrcCoeff >= 0 1791 // really check that NewDelta >= 0 1792 if (SE->isKnownNegative(NewDelta)) { 1793 // No dependence, newDelta < 0 1794 ++WeakZeroSIVindependence; 1795 ++WeakZeroSIVsuccesses; 1796 return true; 1797 } 1798 1799 // if SrcCoeff doesn't divide Delta, then no dependence 1800 if (isa<SCEVConstant>(Delta) && 1801 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1802 ++WeakZeroSIVindependence; 1803 ++WeakZeroSIVsuccesses; 1804 return true; 1805 } 1806 return false; 1807 } 1808 1809 1810 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1811 // Things of the form [c1 + a*i] and [c2 + b*j], 1812 // where i and j are induction variable, c1 and c2 are loop invariant, 1813 // and a and b are constants. 1814 // Returns true if any possible dependence is disproved. 1815 // Marks the result as inconsistent. 1816 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1817 bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1818 const SCEV *SrcConst, const SCEV *DstConst, 1819 const Loop *SrcLoop, const Loop *DstLoop, 1820 FullDependence &Result) const { 1821 DEBUG(dbgs() << "\tExact RDIV test\n"); 1822 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1823 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1824 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1825 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1826 ++ExactRDIVapplications; 1827 Result.Consistent = false; 1828 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1829 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1830 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1831 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1832 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1833 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1834 return false; 1835 1836 // find gcd 1837 APInt G, X, Y; 1838 APInt AM = ConstSrcCoeff->getAPInt(); 1839 APInt BM = ConstDstCoeff->getAPInt(); 1840 unsigned Bits = AM.getBitWidth(); 1841 if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { 1842 // gcd doesn't divide Delta, no dependence 1843 ++ExactRDIVindependence; 1844 return true; 1845 } 1846 1847 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1848 1849 // since SCEV construction seems to normalize, LM = 0 1850 APInt SrcUM(Bits, 1, true); 1851 bool SrcUMvalid = false; 1852 // SrcUM is perhaps unavailable, let's check 1853 if (const SCEVConstant *UpperBound = 1854 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1855 SrcUM = UpperBound->getAPInt(); 1856 DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1857 SrcUMvalid = true; 1858 } 1859 1860 APInt DstUM(Bits, 1, true); 1861 bool DstUMvalid = false; 1862 // UM is perhaps unavailable, let's check 1863 if (const SCEVConstant *UpperBound = 1864 collectConstantUpperBound(DstLoop, Delta->getType())) { 1865 DstUM = UpperBound->getAPInt(); 1866 DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1867 DstUMvalid = true; 1868 } 1869 1870 APInt TU(APInt::getSignedMaxValue(Bits)); 1871 APInt TL(APInt::getSignedMinValue(Bits)); 1872 1873 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1874 APInt TMUL = BM.sdiv(G); 1875 if (TMUL.sgt(0)) { 1876 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1877 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1878 if (SrcUMvalid) { 1879 TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); 1880 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1881 } 1882 } 1883 else { 1884 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1885 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1886 if (SrcUMvalid) { 1887 TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); 1888 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1889 } 1890 } 1891 1892 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1893 TMUL = AM.sdiv(G); 1894 if (TMUL.sgt(0)) { 1895 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1896 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1897 if (DstUMvalid) { 1898 TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); 1899 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1900 } 1901 } 1902 else { 1903 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1904 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1905 if (DstUMvalid) { 1906 TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); 1907 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1908 } 1909 } 1910 if (TL.sgt(TU)) 1911 ++ExactRDIVindependence; 1912 return TL.sgt(TU); 1913 } 1914 1915 1916 // symbolicRDIVtest - 1917 // In Section 4.5 of the Practical Dependence Testing paper,the authors 1918 // introduce a special case of Banerjee's Inequalities (also called the 1919 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 1920 // particularly cases with symbolics. Since it's only able to disprove 1921 // dependence (not compute distances or directions), we'll use it as a 1922 // fall back for the other tests. 1923 // 1924 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 1925 // where i and j are induction variables and c1 and c2 are loop invariants, 1926 // we can use the symbolic tests to disprove some dependences, serving as a 1927 // backup for the RDIV test. Note that i and j can be the same variable, 1928 // letting this test serve as a backup for the various SIV tests. 1929 // 1930 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 1931 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 1932 // loop bounds for the i and j loops, respectively. So, ... 1933 // 1934 // c1 + a1*i = c2 + a2*j 1935 // a1*i - a2*j = c2 - c1 1936 // 1937 // To test for a dependence, we compute c2 - c1 and make sure it's in the 1938 // range of the maximum and minimum possible values of a1*i - a2*j. 1939 // Considering the signs of a1 and a2, we have 4 possible cases: 1940 // 1941 // 1) If a1 >= 0 and a2 >= 0, then 1942 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 1943 // -a2*N2 <= c2 - c1 <= a1*N1 1944 // 1945 // 2) If a1 >= 0 and a2 <= 0, then 1946 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 1947 // 0 <= c2 - c1 <= a1*N1 - a2*N2 1948 // 1949 // 3) If a1 <= 0 and a2 >= 0, then 1950 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 1951 // a1*N1 - a2*N2 <= c2 - c1 <= 0 1952 // 1953 // 4) If a1 <= 0 and a2 <= 0, then 1954 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 1955 // a1*N1 <= c2 - c1 <= -a2*N2 1956 // 1957 // return true if dependence disproved 1958 bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2, 1959 const SCEV *C1, const SCEV *C2, 1960 const Loop *Loop1, 1961 const Loop *Loop2) const { 1962 ++SymbolicRDIVapplications; 1963 DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 1964 DEBUG(dbgs() << "\t A1 = " << *A1); 1965 DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 1966 DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 1967 DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 1968 DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 1969 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 1970 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 1971 DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 1972 DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 1973 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 1974 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 1975 DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 1976 DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 1977 if (SE->isKnownNonNegative(A1)) { 1978 if (SE->isKnownNonNegative(A2)) { 1979 // A1 >= 0 && A2 >= 0 1980 if (N1) { 1981 // make sure that c2 - c1 <= a1*N1 1982 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1983 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 1984 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 1985 ++SymbolicRDIVindependence; 1986 return true; 1987 } 1988 } 1989 if (N2) { 1990 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 1991 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 1992 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 1993 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 1994 ++SymbolicRDIVindependence; 1995 return true; 1996 } 1997 } 1998 } 1999 else if (SE->isKnownNonPositive(A2)) { 2000 // a1 >= 0 && a2 <= 0 2001 if (N1 && N2) { 2002 // make sure that c2 - c1 <= a1*N1 - a2*N2 2003 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2004 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2005 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2006 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2007 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 2008 ++SymbolicRDIVindependence; 2009 return true; 2010 } 2011 } 2012 // make sure that 0 <= c2 - c1 2013 if (SE->isKnownNegative(C2_C1)) { 2014 ++SymbolicRDIVindependence; 2015 return true; 2016 } 2017 } 2018 } 2019 else if (SE->isKnownNonPositive(A1)) { 2020 if (SE->isKnownNonNegative(A2)) { 2021 // a1 <= 0 && a2 >= 0 2022 if (N1 && N2) { 2023 // make sure that a1*N1 - a2*N2 <= c2 - c1 2024 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2025 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2026 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2027 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2028 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 2029 ++SymbolicRDIVindependence; 2030 return true; 2031 } 2032 } 2033 // make sure that c2 - c1 <= 0 2034 if (SE->isKnownPositive(C2_C1)) { 2035 ++SymbolicRDIVindependence; 2036 return true; 2037 } 2038 } 2039 else if (SE->isKnownNonPositive(A2)) { 2040 // a1 <= 0 && a2 <= 0 2041 if (N1) { 2042 // make sure that a1*N1 <= c2 - c1 2043 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2044 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2045 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2046 ++SymbolicRDIVindependence; 2047 return true; 2048 } 2049 } 2050 if (N2) { 2051 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2052 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2053 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2054 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2055 ++SymbolicRDIVindependence; 2056 return true; 2057 } 2058 } 2059 } 2060 } 2061 return false; 2062 } 2063 2064 2065 // testSIV - 2066 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2067 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2068 // a2 are constant, we attack it with an SIV test. While they can all be 2069 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2070 // they apply; they're cheaper and sometimes more precise. 2071 // 2072 // Return true if dependence disproved. 2073 bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level, 2074 FullDependence &Result, Constraint &NewConstraint, 2075 const SCEV *&SplitIter) const { 2076 DEBUG(dbgs() << " src = " << *Src << "\n"); 2077 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2078 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2079 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2080 if (SrcAddRec && DstAddRec) { 2081 const SCEV *SrcConst = SrcAddRec->getStart(); 2082 const SCEV *DstConst = DstAddRec->getStart(); 2083 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2084 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2085 const Loop *CurLoop = SrcAddRec->getLoop(); 2086 assert(CurLoop == DstAddRec->getLoop() && 2087 "both loops in SIV should be same"); 2088 Level = mapSrcLoop(CurLoop); 2089 bool disproven; 2090 if (SrcCoeff == DstCoeff) 2091 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2092 Level, Result, NewConstraint); 2093 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2094 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2095 Level, Result, NewConstraint, SplitIter); 2096 else 2097 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2098 Level, Result, NewConstraint); 2099 return disproven || 2100 gcdMIVtest(Src, Dst, Result) || 2101 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2102 } 2103 if (SrcAddRec) { 2104 const SCEV *SrcConst = SrcAddRec->getStart(); 2105 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2106 const SCEV *DstConst = Dst; 2107 const Loop *CurLoop = SrcAddRec->getLoop(); 2108 Level = mapSrcLoop(CurLoop); 2109 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2110 Level, Result, NewConstraint) || 2111 gcdMIVtest(Src, Dst, Result); 2112 } 2113 if (DstAddRec) { 2114 const SCEV *DstConst = DstAddRec->getStart(); 2115 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2116 const SCEV *SrcConst = Src; 2117 const Loop *CurLoop = DstAddRec->getLoop(); 2118 Level = mapDstLoop(CurLoop); 2119 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2120 CurLoop, Level, Result, NewConstraint) || 2121 gcdMIVtest(Src, Dst, Result); 2122 } 2123 llvm_unreachable("SIV test expected at least one AddRec"); 2124 return false; 2125 } 2126 2127 2128 // testRDIV - 2129 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2130 // where i and j are induction variables, c1 and c2 are loop invariant, 2131 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2132 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2133 // It doesn't make sense to talk about distance or direction in this case, 2134 // so there's no point in making special versions of the Strong SIV test or 2135 // the Weak-crossing SIV test. 2136 // 2137 // With minor algebra, this test can also be used for things like 2138 // [c1 + a1*i + a2*j][c2]. 2139 // 2140 // Return true if dependence disproved. 2141 bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst, 2142 FullDependence &Result) const { 2143 // we have 3 possible situations here: 2144 // 1) [a*i + b] and [c*j + d] 2145 // 2) [a*i + c*j + b] and [d] 2146 // 3) [b] and [a*i + c*j + d] 2147 // We need to find what we've got and get organized 2148 2149 const SCEV *SrcConst, *DstConst; 2150 const SCEV *SrcCoeff, *DstCoeff; 2151 const Loop *SrcLoop, *DstLoop; 2152 2153 DEBUG(dbgs() << " src = " << *Src << "\n"); 2154 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2155 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2156 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2157 if (SrcAddRec && DstAddRec) { 2158 SrcConst = SrcAddRec->getStart(); 2159 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2160 SrcLoop = SrcAddRec->getLoop(); 2161 DstConst = DstAddRec->getStart(); 2162 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2163 DstLoop = DstAddRec->getLoop(); 2164 } 2165 else if (SrcAddRec) { 2166 if (const SCEVAddRecExpr *tmpAddRec = 2167 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2168 SrcConst = tmpAddRec->getStart(); 2169 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2170 SrcLoop = tmpAddRec->getLoop(); 2171 DstConst = Dst; 2172 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2173 DstLoop = SrcAddRec->getLoop(); 2174 } 2175 else 2176 llvm_unreachable("RDIV reached by surprising SCEVs"); 2177 } 2178 else if (DstAddRec) { 2179 if (const SCEVAddRecExpr *tmpAddRec = 2180 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2181 DstConst = tmpAddRec->getStart(); 2182 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2183 DstLoop = tmpAddRec->getLoop(); 2184 SrcConst = Src; 2185 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2186 SrcLoop = DstAddRec->getLoop(); 2187 } 2188 else 2189 llvm_unreachable("RDIV reached by surprising SCEVs"); 2190 } 2191 else 2192 llvm_unreachable("RDIV expected at least one AddRec"); 2193 return exactRDIVtest(SrcCoeff, DstCoeff, 2194 SrcConst, DstConst, 2195 SrcLoop, DstLoop, 2196 Result) || 2197 gcdMIVtest(Src, Dst, Result) || 2198 symbolicRDIVtest(SrcCoeff, DstCoeff, 2199 SrcConst, DstConst, 2200 SrcLoop, DstLoop); 2201 } 2202 2203 2204 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2205 // Return true if dependence disproved. 2206 // Can sometimes refine direction vectors. 2207 bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst, 2208 const SmallBitVector &Loops, 2209 FullDependence &Result) const { 2210 DEBUG(dbgs() << " src = " << *Src << "\n"); 2211 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2212 Result.Consistent = false; 2213 return gcdMIVtest(Src, Dst, Result) || 2214 banerjeeMIVtest(Src, Dst, Loops, Result); 2215 } 2216 2217 2218 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2219 // in this case 10. If there is no constant part, returns NULL. 2220 static 2221 const SCEVConstant *getConstantPart(const SCEV *Expr) { 2222 if (const auto *Constant = dyn_cast<SCEVConstant>(Expr)) 2223 return Constant; 2224 else if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr)) 2225 if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0))) 2226 return Constant; 2227 return nullptr; 2228 } 2229 2230 2231 //===----------------------------------------------------------------------===// 2232 // gcdMIVtest - 2233 // Tests an MIV subscript pair for dependence. 2234 // Returns true if any possible dependence is disproved. 2235 // Marks the result as inconsistent. 2236 // Can sometimes disprove the equal direction for 1 or more loops, 2237 // as discussed in Michael Wolfe's book, 2238 // High Performance Compilers for Parallel Computing, page 235. 2239 // 2240 // We spend some effort (code!) to handle cases like 2241 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2242 // but M and N are just loop-invariant variables. 2243 // This should help us handle linearized subscripts; 2244 // also makes this test a useful backup to the various SIV tests. 2245 // 2246 // It occurs to me that the presence of loop-invariant variables 2247 // changes the nature of the test from "greatest common divisor" 2248 // to "a common divisor". 2249 bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, 2250 FullDependence &Result) const { 2251 DEBUG(dbgs() << "starting gcd\n"); 2252 ++GCDapplications; 2253 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2254 APInt RunningGCD = APInt::getNullValue(BitWidth); 2255 2256 // Examine Src coefficients. 2257 // Compute running GCD and record source constant. 2258 // Because we're looking for the constant at the end of the chain, 2259 // we can't quit the loop just because the GCD == 1. 2260 const SCEV *Coefficients = Src; 2261 while (const SCEVAddRecExpr *AddRec = 2262 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2263 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2264 // If the coefficient is the product of a constant and other stuff, 2265 // we can use the constant in the GCD computation. 2266 const auto *Constant = getConstantPart(Coeff); 2267 if (!Constant) 2268 return false; 2269 APInt ConstCoeff = Constant->getAPInt(); 2270 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2271 Coefficients = AddRec->getStart(); 2272 } 2273 const SCEV *SrcConst = Coefficients; 2274 2275 // Examine Dst coefficients. 2276 // Compute running GCD and record destination constant. 2277 // Because we're looking for the constant at the end of the chain, 2278 // we can't quit the loop just because the GCD == 1. 2279 Coefficients = Dst; 2280 while (const SCEVAddRecExpr *AddRec = 2281 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2282 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2283 // If the coefficient is the product of a constant and other stuff, 2284 // we can use the constant in the GCD computation. 2285 const auto *Constant = getConstantPart(Coeff); 2286 if (!Constant) 2287 return false; 2288 APInt ConstCoeff = Constant->getAPInt(); 2289 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2290 Coefficients = AddRec->getStart(); 2291 } 2292 const SCEV *DstConst = Coefficients; 2293 2294 APInt ExtraGCD = APInt::getNullValue(BitWidth); 2295 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2296 DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2297 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2298 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2299 // If Delta is a sum of products, we may be able to make further progress. 2300 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2301 const SCEV *Operand = Sum->getOperand(Op); 2302 if (isa<SCEVConstant>(Operand)) { 2303 assert(!Constant && "Surprised to find multiple constants"); 2304 Constant = cast<SCEVConstant>(Operand); 2305 } 2306 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2307 // Search for constant operand to participate in GCD; 2308 // If none found; return false. 2309 const SCEVConstant *ConstOp = getConstantPart(Product); 2310 if (!ConstOp) 2311 return false; 2312 APInt ConstOpValue = ConstOp->getAPInt(); 2313 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2314 ConstOpValue.abs()); 2315 } 2316 else 2317 return false; 2318 } 2319 } 2320 if (!Constant) 2321 return false; 2322 APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt(); 2323 DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2324 if (ConstDelta == 0) 2325 return false; 2326 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2327 DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2328 APInt Remainder = ConstDelta.srem(RunningGCD); 2329 if (Remainder != 0) { 2330 ++GCDindependence; 2331 return true; 2332 } 2333 2334 // Try to disprove equal directions. 2335 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2336 // the code above can't disprove the dependence because the GCD = 1. 2337 // So we consider what happen if i = i' and what happens if j = j'. 2338 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2339 // which is infeasible, so we can disallow the = direction for the i level. 2340 // Setting j = j' doesn't help matters, so we end up with a direction vector 2341 // of [<>, *] 2342 // 2343 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2344 // we need to remember that the constant part is 5 and the RunningGCD should 2345 // be initialized to ExtraGCD = 30. 2346 DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2347 2348 bool Improved = false; 2349 Coefficients = Src; 2350 while (const SCEVAddRecExpr *AddRec = 2351 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2352 Coefficients = AddRec->getStart(); 2353 const Loop *CurLoop = AddRec->getLoop(); 2354 RunningGCD = ExtraGCD; 2355 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2356 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2357 const SCEV *Inner = Src; 2358 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2359 AddRec = cast<SCEVAddRecExpr>(Inner); 2360 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2361 if (CurLoop == AddRec->getLoop()) 2362 ; // SrcCoeff == Coeff 2363 else { 2364 // If the coefficient is the product of a constant and other stuff, 2365 // we can use the constant in the GCD computation. 2366 Constant = getConstantPart(Coeff); 2367 if (!Constant) 2368 return false; 2369 APInt ConstCoeff = Constant->getAPInt(); 2370 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2371 } 2372 Inner = AddRec->getStart(); 2373 } 2374 Inner = Dst; 2375 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2376 AddRec = cast<SCEVAddRecExpr>(Inner); 2377 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2378 if (CurLoop == AddRec->getLoop()) 2379 DstCoeff = Coeff; 2380 else { 2381 // If the coefficient is the product of a constant and other stuff, 2382 // we can use the constant in the GCD computation. 2383 Constant = getConstantPart(Coeff); 2384 if (!Constant) 2385 return false; 2386 APInt ConstCoeff = Constant->getAPInt(); 2387 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2388 } 2389 Inner = AddRec->getStart(); 2390 } 2391 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2392 // If the coefficient is the product of a constant and other stuff, 2393 // we can use the constant in the GCD computation. 2394 Constant = getConstantPart(Delta); 2395 if (!Constant) 2396 // The difference of the two coefficients might not be a product 2397 // or constant, in which case we give up on this direction. 2398 continue; 2399 APInt ConstCoeff = Constant->getAPInt(); 2400 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2401 DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2402 if (RunningGCD != 0) { 2403 Remainder = ConstDelta.srem(RunningGCD); 2404 DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2405 if (Remainder != 0) { 2406 unsigned Level = mapSrcLoop(CurLoop); 2407 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2408 Improved = true; 2409 } 2410 } 2411 } 2412 if (Improved) 2413 ++GCDsuccesses; 2414 DEBUG(dbgs() << "all done\n"); 2415 return false; 2416 } 2417 2418 2419 //===----------------------------------------------------------------------===// 2420 // banerjeeMIVtest - 2421 // Use Banerjee's Inequalities to test an MIV subscript pair. 2422 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2423 // Generally follows the discussion in Section 2.5.2 of 2424 // 2425 // Optimizing Supercompilers for Supercomputers 2426 // Michael Wolfe 2427 // 2428 // The inequalities given on page 25 are simplified in that loops are 2429 // normalized so that the lower bound is always 0 and the stride is always 1. 2430 // For example, Wolfe gives 2431 // 2432 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2433 // 2434 // where A_k is the coefficient of the kth index in the source subscript, 2435 // B_k is the coefficient of the kth index in the destination subscript, 2436 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2437 // index, and N_k is the stride of the kth index. Since all loops are normalized 2438 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2439 // equation to 2440 // 2441 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2442 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2443 // 2444 // Similar simplifications are possible for the other equations. 2445 // 2446 // When we can't determine the number of iterations for a loop, 2447 // we use NULL as an indicator for the worst case, infinity. 2448 // When computing the upper bound, NULL denotes +inf; 2449 // for the lower bound, NULL denotes -inf. 2450 // 2451 // Return true if dependence disproved. 2452 bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst, 2453 const SmallBitVector &Loops, 2454 FullDependence &Result) const { 2455 DEBUG(dbgs() << "starting Banerjee\n"); 2456 ++BanerjeeApplications; 2457 DEBUG(dbgs() << " Src = " << *Src << '\n'); 2458 const SCEV *A0; 2459 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2460 DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2461 const SCEV *B0; 2462 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2463 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2464 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2465 DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2466 2467 // Compute bounds for all the * directions. 2468 DEBUG(dbgs() << "\tBounds[*]\n"); 2469 for (unsigned K = 1; K <= MaxLevels; ++K) { 2470 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2471 Bound[K].Direction = Dependence::DVEntry::ALL; 2472 Bound[K].DirSet = Dependence::DVEntry::NONE; 2473 findBoundsALL(A, B, Bound, K); 2474 #ifndef NDEBUG 2475 DEBUG(dbgs() << "\t " << K << '\t'); 2476 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2477 DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2478 else 2479 DEBUG(dbgs() << "-inf\t"); 2480 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2481 DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2482 else 2483 DEBUG(dbgs() << "+inf\n"); 2484 #endif 2485 } 2486 2487 // Test the *, *, *, ... case. 2488 bool Disproved = false; 2489 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2490 // Explore the direction vector hierarchy. 2491 unsigned DepthExpanded = 0; 2492 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2493 Loops, DepthExpanded, Delta); 2494 if (NewDeps > 0) { 2495 bool Improved = false; 2496 for (unsigned K = 1; K <= CommonLevels; ++K) { 2497 if (Loops[K]) { 2498 unsigned Old = Result.DV[K - 1].Direction; 2499 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2500 Improved |= Old != Result.DV[K - 1].Direction; 2501 if (!Result.DV[K - 1].Direction) { 2502 Improved = false; 2503 Disproved = true; 2504 break; 2505 } 2506 } 2507 } 2508 if (Improved) 2509 ++BanerjeeSuccesses; 2510 } 2511 else { 2512 ++BanerjeeIndependence; 2513 Disproved = true; 2514 } 2515 } 2516 else { 2517 ++BanerjeeIndependence; 2518 Disproved = true; 2519 } 2520 delete [] Bound; 2521 delete [] A; 2522 delete [] B; 2523 return Disproved; 2524 } 2525 2526 2527 // Hierarchically expands the direction vector 2528 // search space, combining the directions of discovered dependences 2529 // in the DirSet field of Bound. Returns the number of distinct 2530 // dependences discovered. If the dependence is disproved, 2531 // it will return 0. 2532 unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A, 2533 CoefficientInfo *B, BoundInfo *Bound, 2534 const SmallBitVector &Loops, 2535 unsigned &DepthExpanded, 2536 const SCEV *Delta) const { 2537 if (Level > CommonLevels) { 2538 // record result 2539 DEBUG(dbgs() << "\t["); 2540 for (unsigned K = 1; K <= CommonLevels; ++K) { 2541 if (Loops[K]) { 2542 Bound[K].DirSet |= Bound[K].Direction; 2543 #ifndef NDEBUG 2544 switch (Bound[K].Direction) { 2545 case Dependence::DVEntry::LT: 2546 DEBUG(dbgs() << " <"); 2547 break; 2548 case Dependence::DVEntry::EQ: 2549 DEBUG(dbgs() << " ="); 2550 break; 2551 case Dependence::DVEntry::GT: 2552 DEBUG(dbgs() << " >"); 2553 break; 2554 case Dependence::DVEntry::ALL: 2555 DEBUG(dbgs() << " *"); 2556 break; 2557 default: 2558 llvm_unreachable("unexpected Bound[K].Direction"); 2559 } 2560 #endif 2561 } 2562 } 2563 DEBUG(dbgs() << " ]\n"); 2564 return 1; 2565 } 2566 if (Loops[Level]) { 2567 if (Level > DepthExpanded) { 2568 DepthExpanded = Level; 2569 // compute bounds for <, =, > at current level 2570 findBoundsLT(A, B, Bound, Level); 2571 findBoundsGT(A, B, Bound, Level); 2572 findBoundsEQ(A, B, Bound, Level); 2573 #ifndef NDEBUG 2574 DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2575 DEBUG(dbgs() << "\t <\t"); 2576 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2577 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t'); 2578 else 2579 DEBUG(dbgs() << "-inf\t"); 2580 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2581 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n'); 2582 else 2583 DEBUG(dbgs() << "+inf\n"); 2584 DEBUG(dbgs() << "\t =\t"); 2585 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2586 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t'); 2587 else 2588 DEBUG(dbgs() << "-inf\t"); 2589 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2590 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n'); 2591 else 2592 DEBUG(dbgs() << "+inf\n"); 2593 DEBUG(dbgs() << "\t >\t"); 2594 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2595 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t'); 2596 else 2597 DEBUG(dbgs() << "-inf\t"); 2598 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2599 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n'); 2600 else 2601 DEBUG(dbgs() << "+inf\n"); 2602 #endif 2603 } 2604 2605 unsigned NewDeps = 0; 2606 2607 // test bounds for <, *, *, ... 2608 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2609 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2610 Loops, DepthExpanded, Delta); 2611 2612 // Test bounds for =, *, *, ... 2613 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2614 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2615 Loops, DepthExpanded, Delta); 2616 2617 // test bounds for >, *, *, ... 2618 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2619 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2620 Loops, DepthExpanded, Delta); 2621 2622 Bound[Level].Direction = Dependence::DVEntry::ALL; 2623 return NewDeps; 2624 } 2625 else 2626 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2627 } 2628 2629 2630 // Returns true iff the current bounds are plausible. 2631 bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level, 2632 BoundInfo *Bound, const SCEV *Delta) const { 2633 Bound[Level].Direction = DirKind; 2634 if (const SCEV *LowerBound = getLowerBound(Bound)) 2635 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2636 return false; 2637 if (const SCEV *UpperBound = getUpperBound(Bound)) 2638 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2639 return false; 2640 return true; 2641 } 2642 2643 2644 // Computes the upper and lower bounds for level K 2645 // using the * direction. Records them in Bound. 2646 // Wolfe gives the equations 2647 // 2648 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2649 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2650 // 2651 // Since we normalize loops, we can simplify these equations to 2652 // 2653 // LB^*_k = (A^-_k - B^+_k)U_k 2654 // UB^*_k = (A^+_k - B^-_k)U_k 2655 // 2656 // We must be careful to handle the case where the upper bound is unknown. 2657 // Note that the lower bound is always <= 0 2658 // and the upper bound is always >= 0. 2659 void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B, 2660 BoundInfo *Bound, unsigned K) const { 2661 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2662 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2663 if (Bound[K].Iterations) { 2664 Bound[K].Lower[Dependence::DVEntry::ALL] = 2665 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2666 Bound[K].Iterations); 2667 Bound[K].Upper[Dependence::DVEntry::ALL] = 2668 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2669 Bound[K].Iterations); 2670 } 2671 else { 2672 // If the difference is 0, we won't need to know the number of iterations. 2673 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2674 Bound[K].Lower[Dependence::DVEntry::ALL] = 2675 SE->getZero(A[K].Coeff->getType()); 2676 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2677 Bound[K].Upper[Dependence::DVEntry::ALL] = 2678 SE->getZero(A[K].Coeff->getType()); 2679 } 2680 } 2681 2682 2683 // Computes the upper and lower bounds for level K 2684 // using the = direction. Records them in Bound. 2685 // Wolfe gives the equations 2686 // 2687 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2688 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2689 // 2690 // Since we normalize loops, we can simplify these equations to 2691 // 2692 // LB^=_k = (A_k - B_k)^- U_k 2693 // UB^=_k = (A_k - B_k)^+ U_k 2694 // 2695 // We must be careful to handle the case where the upper bound is unknown. 2696 // Note that the lower bound is always <= 0 2697 // and the upper bound is always >= 0. 2698 void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B, 2699 BoundInfo *Bound, unsigned K) const { 2700 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2701 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2702 if (Bound[K].Iterations) { 2703 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2704 const SCEV *NegativePart = getNegativePart(Delta); 2705 Bound[K].Lower[Dependence::DVEntry::EQ] = 2706 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2707 const SCEV *PositivePart = getPositivePart(Delta); 2708 Bound[K].Upper[Dependence::DVEntry::EQ] = 2709 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2710 } 2711 else { 2712 // If the positive/negative part of the difference is 0, 2713 // we won't need to know the number of iterations. 2714 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2715 const SCEV *NegativePart = getNegativePart(Delta); 2716 if (NegativePart->isZero()) 2717 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2718 const SCEV *PositivePart = getPositivePart(Delta); 2719 if (PositivePart->isZero()) 2720 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2721 } 2722 } 2723 2724 2725 // Computes the upper and lower bounds for level K 2726 // using the < direction. Records them in Bound. 2727 // Wolfe gives the equations 2728 // 2729 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2730 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2731 // 2732 // Since we normalize loops, we can simplify these equations to 2733 // 2734 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2735 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2736 // 2737 // We must be careful to handle the case where the upper bound is unknown. 2738 void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B, 2739 BoundInfo *Bound, unsigned K) const { 2740 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2741 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2742 if (Bound[K].Iterations) { 2743 const SCEV *Iter_1 = SE->getMinusSCEV( 2744 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2745 const SCEV *NegPart = 2746 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2747 Bound[K].Lower[Dependence::DVEntry::LT] = 2748 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2749 const SCEV *PosPart = 2750 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2751 Bound[K].Upper[Dependence::DVEntry::LT] = 2752 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2753 } 2754 else { 2755 // If the positive/negative part of the difference is 0, 2756 // we won't need to know the number of iterations. 2757 const SCEV *NegPart = 2758 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2759 if (NegPart->isZero()) 2760 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2761 const SCEV *PosPart = 2762 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2763 if (PosPart->isZero()) 2764 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2765 } 2766 } 2767 2768 2769 // Computes the upper and lower bounds for level K 2770 // using the > direction. Records them in Bound. 2771 // Wolfe gives the equations 2772 // 2773 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2774 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2775 // 2776 // Since we normalize loops, we can simplify these equations to 2777 // 2778 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2779 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2780 // 2781 // We must be careful to handle the case where the upper bound is unknown. 2782 void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B, 2783 BoundInfo *Bound, unsigned K) const { 2784 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2785 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2786 if (Bound[K].Iterations) { 2787 const SCEV *Iter_1 = SE->getMinusSCEV( 2788 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2789 const SCEV *NegPart = 2790 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2791 Bound[K].Lower[Dependence::DVEntry::GT] = 2792 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2793 const SCEV *PosPart = 2794 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2795 Bound[K].Upper[Dependence::DVEntry::GT] = 2796 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2797 } 2798 else { 2799 // If the positive/negative part of the difference is 0, 2800 // we won't need to know the number of iterations. 2801 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2802 if (NegPart->isZero()) 2803 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2804 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2805 if (PosPart->isZero()) 2806 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2807 } 2808 } 2809 2810 2811 // X^+ = max(X, 0) 2812 const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const { 2813 return SE->getSMaxExpr(X, SE->getZero(X->getType())); 2814 } 2815 2816 2817 // X^- = min(X, 0) 2818 const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const { 2819 return SE->getSMinExpr(X, SE->getZero(X->getType())); 2820 } 2821 2822 2823 // Walks through the subscript, 2824 // collecting each coefficient, the associated loop bounds, 2825 // and recording its positive and negative parts for later use. 2826 DependenceInfo::CoefficientInfo * 2827 DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag, 2828 const SCEV *&Constant) const { 2829 const SCEV *Zero = SE->getZero(Subscript->getType()); 2830 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2831 for (unsigned K = 1; K <= MaxLevels; ++K) { 2832 CI[K].Coeff = Zero; 2833 CI[K].PosPart = Zero; 2834 CI[K].NegPart = Zero; 2835 CI[K].Iterations = nullptr; 2836 } 2837 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2838 const Loop *L = AddRec->getLoop(); 2839 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2840 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2841 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2842 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2843 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2844 Subscript = AddRec->getStart(); 2845 } 2846 Constant = Subscript; 2847 #ifndef NDEBUG 2848 DEBUG(dbgs() << "\tCoefficient Info\n"); 2849 for (unsigned K = 1; K <= MaxLevels; ++K) { 2850 DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2851 DEBUG(dbgs() << "\tPos Part = "); 2852 DEBUG(dbgs() << *CI[K].PosPart); 2853 DEBUG(dbgs() << "\tNeg Part = "); 2854 DEBUG(dbgs() << *CI[K].NegPart); 2855 DEBUG(dbgs() << "\tUpper Bound = "); 2856 if (CI[K].Iterations) 2857 DEBUG(dbgs() << *CI[K].Iterations); 2858 else 2859 DEBUG(dbgs() << "+inf"); 2860 DEBUG(dbgs() << '\n'); 2861 } 2862 DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2863 #endif 2864 return CI; 2865 } 2866 2867 2868 // Looks through all the bounds info and 2869 // computes the lower bound given the current direction settings 2870 // at each level. If the lower bound for any level is -inf, 2871 // the result is -inf. 2872 const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const { 2873 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2874 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2875 if (Bound[K].Lower[Bound[K].Direction]) 2876 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2877 else 2878 Sum = nullptr; 2879 } 2880 return Sum; 2881 } 2882 2883 2884 // Looks through all the bounds info and 2885 // computes the upper bound given the current direction settings 2886 // at each level. If the upper bound at any level is +inf, 2887 // the result is +inf. 2888 const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const { 2889 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 2890 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2891 if (Bound[K].Upper[Bound[K].Direction]) 2892 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 2893 else 2894 Sum = nullptr; 2895 } 2896 return Sum; 2897 } 2898 2899 2900 //===----------------------------------------------------------------------===// 2901 // Constraint manipulation for Delta test. 2902 2903 // Given a linear SCEV, 2904 // return the coefficient (the step) 2905 // corresponding to the specified loop. 2906 // If there isn't one, return 0. 2907 // For example, given a*i + b*j + c*k, finding the coefficient 2908 // corresponding to the j loop would yield b. 2909 const SCEV *DependenceInfo::findCoefficient(const SCEV *Expr, 2910 const Loop *TargetLoop) const { 2911 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2912 if (!AddRec) 2913 return SE->getZero(Expr->getType()); 2914 if (AddRec->getLoop() == TargetLoop) 2915 return AddRec->getStepRecurrence(*SE); 2916 return findCoefficient(AddRec->getStart(), TargetLoop); 2917 } 2918 2919 2920 // Given a linear SCEV, 2921 // return the SCEV given by zeroing out the coefficient 2922 // corresponding to the specified loop. 2923 // For example, given a*i + b*j + c*k, zeroing the coefficient 2924 // corresponding to the j loop would yield a*i + c*k. 2925 const SCEV *DependenceInfo::zeroCoefficient(const SCEV *Expr, 2926 const Loop *TargetLoop) const { 2927 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2928 if (!AddRec) 2929 return Expr; // ignore 2930 if (AddRec->getLoop() == TargetLoop) 2931 return AddRec->getStart(); 2932 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 2933 AddRec->getStepRecurrence(*SE), 2934 AddRec->getLoop(), 2935 AddRec->getNoWrapFlags()); 2936 } 2937 2938 2939 // Given a linear SCEV Expr, 2940 // return the SCEV given by adding some Value to the 2941 // coefficient corresponding to the specified TargetLoop. 2942 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 2943 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 2944 const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr, 2945 const Loop *TargetLoop, 2946 const SCEV *Value) const { 2947 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2948 if (!AddRec) // create a new addRec 2949 return SE->getAddRecExpr(Expr, 2950 Value, 2951 TargetLoop, 2952 SCEV::FlagAnyWrap); // Worst case, with no info. 2953 if (AddRec->getLoop() == TargetLoop) { 2954 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 2955 if (Sum->isZero()) 2956 return AddRec->getStart(); 2957 return SE->getAddRecExpr(AddRec->getStart(), 2958 Sum, 2959 AddRec->getLoop(), 2960 AddRec->getNoWrapFlags()); 2961 } 2962 if (SE->isLoopInvariant(AddRec, TargetLoop)) 2963 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); 2964 return SE->getAddRecExpr( 2965 addToCoefficient(AddRec->getStart(), TargetLoop, Value), 2966 AddRec->getStepRecurrence(*SE), AddRec->getLoop(), 2967 AddRec->getNoWrapFlags()); 2968 } 2969 2970 2971 // Review the constraints, looking for opportunities 2972 // to simplify a subscript pair (Src and Dst). 2973 // Return true if some simplification occurs. 2974 // If the simplification isn't exact (that is, if it is conservative 2975 // in terms of dependence), set consistent to false. 2976 // Corresponds to Figure 5 from the paper 2977 // 2978 // Practical Dependence Testing 2979 // Goff, Kennedy, Tseng 2980 // PLDI 1991 2981 bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst, 2982 SmallBitVector &Loops, 2983 SmallVectorImpl<Constraint> &Constraints, 2984 bool &Consistent) { 2985 bool Result = false; 2986 for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) { 2987 DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 2988 DEBUG(Constraints[LI].dump(dbgs())); 2989 if (Constraints[LI].isDistance()) 2990 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 2991 else if (Constraints[LI].isLine()) 2992 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 2993 else if (Constraints[LI].isPoint()) 2994 Result |= propagatePoint(Src, Dst, Constraints[LI]); 2995 } 2996 return Result; 2997 } 2998 2999 3000 // Attempt to propagate a distance 3001 // constraint into a subscript pair (Src and Dst). 3002 // Return true if some simplification occurs. 3003 // If the simplification isn't exact (that is, if it is conservative 3004 // in terms of dependence), set consistent to false. 3005 bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst, 3006 Constraint &CurConstraint, 3007 bool &Consistent) { 3008 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3009 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3010 const SCEV *A_K = findCoefficient(Src, CurLoop); 3011 if (A_K->isZero()) 3012 return false; 3013 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3014 Src = SE->getMinusSCEV(Src, DA_K); 3015 Src = zeroCoefficient(Src, CurLoop); 3016 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3017 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3018 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3019 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3020 if (!findCoefficient(Dst, CurLoop)->isZero()) 3021 Consistent = false; 3022 return true; 3023 } 3024 3025 3026 // Attempt to propagate a line 3027 // constraint into a subscript pair (Src and Dst). 3028 // Return true if some simplification occurs. 3029 // If the simplification isn't exact (that is, if it is conservative 3030 // in terms of dependence), set consistent to false. 3031 bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst, 3032 Constraint &CurConstraint, 3033 bool &Consistent) { 3034 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3035 const SCEV *A = CurConstraint.getA(); 3036 const SCEV *B = CurConstraint.getB(); 3037 const SCEV *C = CurConstraint.getC(); 3038 DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n"); 3039 DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3040 DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3041 if (A->isZero()) { 3042 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3043 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3044 if (!Bconst || !Cconst) return false; 3045 APInt Beta = Bconst->getAPInt(); 3046 APInt Charlie = Cconst->getAPInt(); 3047 APInt CdivB = Charlie.sdiv(Beta); 3048 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3049 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3050 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3051 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3052 Dst = zeroCoefficient(Dst, CurLoop); 3053 if (!findCoefficient(Src, CurLoop)->isZero()) 3054 Consistent = false; 3055 } 3056 else if (B->isZero()) { 3057 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3058 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3059 if (!Aconst || !Cconst) return false; 3060 APInt Alpha = Aconst->getAPInt(); 3061 APInt Charlie = Cconst->getAPInt(); 3062 APInt CdivA = Charlie.sdiv(Alpha); 3063 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3064 const SCEV *A_K = findCoefficient(Src, CurLoop); 3065 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3066 Src = zeroCoefficient(Src, CurLoop); 3067 if (!findCoefficient(Dst, CurLoop)->isZero()) 3068 Consistent = false; 3069 } 3070 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3071 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3072 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3073 if (!Aconst || !Cconst) return false; 3074 APInt Alpha = Aconst->getAPInt(); 3075 APInt Charlie = Cconst->getAPInt(); 3076 APInt CdivA = Charlie.sdiv(Alpha); 3077 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3078 const SCEV *A_K = findCoefficient(Src, CurLoop); 3079 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3080 Src = zeroCoefficient(Src, CurLoop); 3081 Dst = addToCoefficient(Dst, CurLoop, A_K); 3082 if (!findCoefficient(Dst, CurLoop)->isZero()) 3083 Consistent = false; 3084 } 3085 else { 3086 // paper is incorrect here, or perhaps just misleading 3087 const SCEV *A_K = findCoefficient(Src, CurLoop); 3088 Src = SE->getMulExpr(Src, A); 3089 Dst = SE->getMulExpr(Dst, A); 3090 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3091 Src = zeroCoefficient(Src, CurLoop); 3092 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3093 if (!findCoefficient(Dst, CurLoop)->isZero()) 3094 Consistent = false; 3095 } 3096 DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3097 DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3098 return true; 3099 } 3100 3101 3102 // Attempt to propagate a point 3103 // constraint into a subscript pair (Src and Dst). 3104 // Return true if some simplification occurs. 3105 bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst, 3106 Constraint &CurConstraint) { 3107 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3108 const SCEV *A_K = findCoefficient(Src, CurLoop); 3109 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3110 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3111 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3112 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3113 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3114 Src = zeroCoefficient(Src, CurLoop); 3115 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3116 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3117 Dst = zeroCoefficient(Dst, CurLoop); 3118 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3119 return true; 3120 } 3121 3122 3123 // Update direction vector entry based on the current constraint. 3124 void DependenceInfo::updateDirection(Dependence::DVEntry &Level, 3125 const Constraint &CurConstraint) const { 3126 DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3127 DEBUG(CurConstraint.dump(dbgs())); 3128 if (CurConstraint.isAny()) 3129 ; // use defaults 3130 else if (CurConstraint.isDistance()) { 3131 // this one is consistent, the others aren't 3132 Level.Scalar = false; 3133 Level.Distance = CurConstraint.getD(); 3134 unsigned NewDirection = Dependence::DVEntry::NONE; 3135 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3136 NewDirection = Dependence::DVEntry::EQ; 3137 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3138 NewDirection |= Dependence::DVEntry::LT; 3139 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3140 NewDirection |= Dependence::DVEntry::GT; 3141 Level.Direction &= NewDirection; 3142 } 3143 else if (CurConstraint.isLine()) { 3144 Level.Scalar = false; 3145 Level.Distance = nullptr; 3146 // direction should be accurate 3147 } 3148 else if (CurConstraint.isPoint()) { 3149 Level.Scalar = false; 3150 Level.Distance = nullptr; 3151 unsigned NewDirection = Dependence::DVEntry::NONE; 3152 if (!isKnownPredicate(CmpInst::ICMP_NE, 3153 CurConstraint.getY(), 3154 CurConstraint.getX())) 3155 // if X may be = Y 3156 NewDirection |= Dependence::DVEntry::EQ; 3157 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3158 CurConstraint.getY(), 3159 CurConstraint.getX())) 3160 // if Y may be > X 3161 NewDirection |= Dependence::DVEntry::LT; 3162 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3163 CurConstraint.getY(), 3164 CurConstraint.getX())) 3165 // if Y may be < X 3166 NewDirection |= Dependence::DVEntry::GT; 3167 Level.Direction &= NewDirection; 3168 } 3169 else 3170 llvm_unreachable("constraint has unexpected kind"); 3171 } 3172 3173 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3174 /// source and destination array references are recurrences on a nested loop, 3175 /// this function flattens the nested recurrences into separate recurrences 3176 /// for each loop level. 3177 bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, 3178 SmallVectorImpl<Subscript> &Pair) { 3179 Value *SrcPtr = getPointerOperand(Src); 3180 Value *DstPtr = getPointerOperand(Dst); 3181 3182 Loop *SrcLoop = LI->getLoopFor(Src->getParent()); 3183 Loop *DstLoop = LI->getLoopFor(Dst->getParent()); 3184 3185 // Below code mimics the code in Delinearization.cpp 3186 const SCEV *SrcAccessFn = 3187 SE->getSCEVAtScope(SrcPtr, SrcLoop); 3188 const SCEV *DstAccessFn = 3189 SE->getSCEVAtScope(DstPtr, DstLoop); 3190 3191 const SCEVUnknown *SrcBase = 3192 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3193 const SCEVUnknown *DstBase = 3194 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3195 3196 if (!SrcBase || !DstBase || SrcBase != DstBase) 3197 return false; 3198 3199 const SCEV *ElementSize = SE->getElementSize(Src); 3200 if (ElementSize != SE->getElementSize(Dst)) 3201 return false; 3202 3203 const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase); 3204 const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase); 3205 3206 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3207 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3208 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3209 return false; 3210 3211 // First step: collect parametric terms in both array references. 3212 SmallVector<const SCEV *, 4> Terms; 3213 SE->collectParametricTerms(SrcAR, Terms); 3214 SE->collectParametricTerms(DstAR, Terms); 3215 3216 // Second step: find subscript sizes. 3217 SmallVector<const SCEV *, 4> Sizes; 3218 SE->findArrayDimensions(Terms, Sizes, ElementSize); 3219 3220 // Third step: compute the access functions for each subscript. 3221 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3222 SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes); 3223 SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes); 3224 3225 // Fail when there is only a subscript: that's a linearized access function. 3226 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3227 SrcSubscripts.size() != DstSubscripts.size()) 3228 return false; 3229 3230 int size = SrcSubscripts.size(); 3231 3232 DEBUG({ 3233 dbgs() << "\nSrcSubscripts: "; 3234 for (int i = 0; i < size; i++) 3235 dbgs() << *SrcSubscripts[i]; 3236 dbgs() << "\nDstSubscripts: "; 3237 for (int i = 0; i < size; i++) 3238 dbgs() << *DstSubscripts[i]; 3239 }); 3240 3241 // The delinearization transforms a single-subscript MIV dependence test into 3242 // a multi-subscript SIV dependence test that is easier to compute. So we 3243 // resize Pair to contain as many pairs of subscripts as the delinearization 3244 // has found, and then initialize the pairs following the delinearization. 3245 Pair.resize(size); 3246 for (int i = 0; i < size; ++i) { 3247 Pair[i].Src = SrcSubscripts[i]; 3248 Pair[i].Dst = DstSubscripts[i]; 3249 unifySubscriptType(&Pair[i]); 3250 3251 // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the 3252 // delinearization has found, and add these constraints to the dependence 3253 // check to avoid memory accesses overflow from one dimension into another. 3254 // This is related to the problem of determining the existence of data 3255 // dependences in array accesses using a different number of subscripts: in 3256 // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc. 3257 } 3258 3259 return true; 3260 } 3261 3262 //===----------------------------------------------------------------------===// 3263 3264 #ifndef NDEBUG 3265 // For debugging purposes, dump a small bit vector to dbgs(). 3266 static void dumpSmallBitVector(SmallBitVector &BV) { 3267 dbgs() << "{"; 3268 for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) { 3269 dbgs() << VI; 3270 if (BV.find_next(VI) >= 0) 3271 dbgs() << ' '; 3272 } 3273 dbgs() << "}\n"; 3274 } 3275 #endif 3276 3277 // depends - 3278 // Returns NULL if there is no dependence. 3279 // Otherwise, return a Dependence with as many details as possible. 3280 // Corresponds to Section 3.1 in the paper 3281 // 3282 // Practical Dependence Testing 3283 // Goff, Kennedy, Tseng 3284 // PLDI 1991 3285 // 3286 // Care is required to keep the routine below, getSplitIteration(), 3287 // up to date with respect to this routine. 3288 std::unique_ptr<Dependence> 3289 DependenceInfo::depends(Instruction *Src, Instruction *Dst, 3290 bool PossiblyLoopIndependent) { 3291 if (Src == Dst) 3292 PossiblyLoopIndependent = false; 3293 3294 if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || 3295 (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) 3296 // if both instructions don't reference memory, there's no dependence 3297 return nullptr; 3298 3299 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3300 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3301 DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3302 return make_unique<Dependence>(Src, Dst); 3303 } 3304 3305 Value *SrcPtr = getPointerOperand(Src); 3306 Value *DstPtr = getPointerOperand(Dst); 3307 3308 switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr, 3309 SrcPtr)) { 3310 case MayAlias: 3311 case PartialAlias: 3312 // cannot analyse objects if we don't understand their aliasing. 3313 DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3314 return make_unique<Dependence>(Src, Dst); 3315 case NoAlias: 3316 // If the objects noalias, they are distinct, accesses are independent. 3317 DEBUG(dbgs() << "no alias\n"); 3318 return nullptr; 3319 case MustAlias: 3320 break; // The underlying objects alias; test accesses for dependence. 3321 } 3322 3323 // establish loop nesting levels 3324 establishNestingLevels(Src, Dst); 3325 DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3326 DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3327 3328 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3329 ++TotalArrayPairs; 3330 3331 // See if there are GEPs we can use. 3332 bool UsefulGEP = false; 3333 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3334 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3335 if (SrcGEP && DstGEP && 3336 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3337 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3338 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3339 DEBUG(dbgs() << " SrcPtrSCEV = " << *SrcPtrSCEV << "\n"); 3340 DEBUG(dbgs() << " DstPtrSCEV = " << *DstPtrSCEV << "\n"); 3341 3342 UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3343 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) && 3344 (SrcGEP->getNumOperands() == DstGEP->getNumOperands()); 3345 } 3346 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3347 SmallVector<Subscript, 4> Pair(Pairs); 3348 if (UsefulGEP) { 3349 DEBUG(dbgs() << " using GEPs\n"); 3350 unsigned P = 0; 3351 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3352 SrcEnd = SrcGEP->idx_end(), 3353 DstIdx = DstGEP->idx_begin(); 3354 SrcIdx != SrcEnd; 3355 ++SrcIdx, ++DstIdx, ++P) { 3356 Pair[P].Src = SE->getSCEV(*SrcIdx); 3357 Pair[P].Dst = SE->getSCEV(*DstIdx); 3358 unifySubscriptType(&Pair[P]); 3359 } 3360 } 3361 else { 3362 DEBUG(dbgs() << " ignoring GEPs\n"); 3363 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3364 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3365 DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3366 DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3367 Pair[0].Src = SrcSCEV; 3368 Pair[0].Dst = DstSCEV; 3369 } 3370 3371 if (Delinearize && CommonLevels > 1) { 3372 if (tryDelinearize(Src, Dst, Pair)) { 3373 DEBUG(dbgs() << " delinerized GEP\n"); 3374 Pairs = Pair.size(); 3375 } 3376 } 3377 3378 for (unsigned P = 0; P < Pairs; ++P) { 3379 Pair[P].Loops.resize(MaxLevels + 1); 3380 Pair[P].GroupLoops.resize(MaxLevels + 1); 3381 Pair[P].Group.resize(Pairs); 3382 removeMatchingExtensions(&Pair[P]); 3383 Pair[P].Classification = 3384 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3385 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3386 Pair[P].Loops); 3387 Pair[P].GroupLoops = Pair[P].Loops; 3388 Pair[P].Group.set(P); 3389 DEBUG(dbgs() << " subscript " << P << "\n"); 3390 DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3391 DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3392 DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3393 DEBUG(dbgs() << "\tloops = "); 3394 DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3395 } 3396 3397 SmallBitVector Separable(Pairs); 3398 SmallBitVector Coupled(Pairs); 3399 3400 // Partition subscripts into separable and minimally-coupled groups 3401 // Algorithm in paper is algorithmically better; 3402 // this may be faster in practice. Check someday. 3403 // 3404 // Here's an example of how it works. Consider this code: 3405 // 3406 // for (i = ...) { 3407 // for (j = ...) { 3408 // for (k = ...) { 3409 // for (l = ...) { 3410 // for (m = ...) { 3411 // A[i][j][k][m] = ...; 3412 // ... = A[0][j][l][i + j]; 3413 // } 3414 // } 3415 // } 3416 // } 3417 // } 3418 // 3419 // There are 4 subscripts here: 3420 // 0 [i] and [0] 3421 // 1 [j] and [j] 3422 // 2 [k] and [l] 3423 // 3 [m] and [i + j] 3424 // 3425 // We've already classified each subscript pair as ZIV, SIV, etc., 3426 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3427 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3428 // and set Pair[P].Group = {P}. 3429 // 3430 // Src Dst Classification Loops GroupLoops Group 3431 // 0 [i] [0] SIV {1} {1} {0} 3432 // 1 [j] [j] SIV {2} {2} {1} 3433 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3434 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3435 // 3436 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3437 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3438 // 3439 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3440 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3441 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3442 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3443 // to either Separable or Coupled). 3444 // 3445 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3446 // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty, 3447 // so Pair[3].Group = {0, 1, 3} and Done = false. 3448 // 3449 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3450 // Since Done remains true, we add 2 to the set of Separable pairs. 3451 // 3452 // Finally, we consider 3. There's nothing to compare it with, 3453 // so Done remains true and we add it to the Coupled set. 3454 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3455 // 3456 // In the end, we've got 1 separable subscript and 1 coupled group. 3457 for (unsigned SI = 0; SI < Pairs; ++SI) { 3458 if (Pair[SI].Classification == Subscript::NonLinear) { 3459 // ignore these, but collect loops for later 3460 ++NonlinearSubscriptPairs; 3461 collectCommonLoops(Pair[SI].Src, 3462 LI->getLoopFor(Src->getParent()), 3463 Pair[SI].Loops); 3464 collectCommonLoops(Pair[SI].Dst, 3465 LI->getLoopFor(Dst->getParent()), 3466 Pair[SI].Loops); 3467 Result.Consistent = false; 3468 } else if (Pair[SI].Classification == Subscript::ZIV) { 3469 // always separable 3470 Separable.set(SI); 3471 } 3472 else { 3473 // SIV, RDIV, or MIV, so check for coupled group 3474 bool Done = true; 3475 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3476 SmallBitVector Intersection = Pair[SI].GroupLoops; 3477 Intersection &= Pair[SJ].GroupLoops; 3478 if (Intersection.any()) { 3479 // accumulate set of all the loops in group 3480 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3481 // accumulate set of all subscripts in group 3482 Pair[SJ].Group |= Pair[SI].Group; 3483 Done = false; 3484 } 3485 } 3486 if (Done) { 3487 if (Pair[SI].Group.count() == 1) { 3488 Separable.set(SI); 3489 ++SeparableSubscriptPairs; 3490 } 3491 else { 3492 Coupled.set(SI); 3493 ++CoupledSubscriptPairs; 3494 } 3495 } 3496 } 3497 } 3498 3499 DEBUG(dbgs() << " Separable = "); 3500 DEBUG(dumpSmallBitVector(Separable)); 3501 DEBUG(dbgs() << " Coupled = "); 3502 DEBUG(dumpSmallBitVector(Coupled)); 3503 3504 Constraint NewConstraint; 3505 NewConstraint.setAny(SE); 3506 3507 // test separable subscripts 3508 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3509 DEBUG(dbgs() << "testing subscript " << SI); 3510 switch (Pair[SI].Classification) { 3511 case Subscript::ZIV: 3512 DEBUG(dbgs() << ", ZIV\n"); 3513 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3514 return nullptr; 3515 break; 3516 case Subscript::SIV: { 3517 DEBUG(dbgs() << ", SIV\n"); 3518 unsigned Level; 3519 const SCEV *SplitIter = nullptr; 3520 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, 3521 SplitIter)) 3522 return nullptr; 3523 break; 3524 } 3525 case Subscript::RDIV: 3526 DEBUG(dbgs() << ", RDIV\n"); 3527 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3528 return nullptr; 3529 break; 3530 case Subscript::MIV: 3531 DEBUG(dbgs() << ", MIV\n"); 3532 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3533 return nullptr; 3534 break; 3535 default: 3536 llvm_unreachable("subscript has unexpected classification"); 3537 } 3538 } 3539 3540 if (Coupled.count()) { 3541 // test coupled subscript groups 3542 DEBUG(dbgs() << "starting on coupled subscripts\n"); 3543 DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3544 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3545 for (unsigned II = 0; II <= MaxLevels; ++II) 3546 Constraints[II].setAny(SE); 3547 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3548 DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3549 SmallBitVector Group(Pair[SI].Group); 3550 SmallBitVector Sivs(Pairs); 3551 SmallBitVector Mivs(Pairs); 3552 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3553 SmallVector<Subscript *, 4> PairsInGroup; 3554 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3555 DEBUG(dbgs() << SJ << " "); 3556 if (Pair[SJ].Classification == Subscript::SIV) 3557 Sivs.set(SJ); 3558 else 3559 Mivs.set(SJ); 3560 PairsInGroup.push_back(&Pair[SJ]); 3561 } 3562 unifySubscriptType(PairsInGroup); 3563 DEBUG(dbgs() << "}\n"); 3564 while (Sivs.any()) { 3565 bool Changed = false; 3566 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3567 DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3568 // SJ is an SIV subscript that's part of the current coupled group 3569 unsigned Level; 3570 const SCEV *SplitIter = nullptr; 3571 DEBUG(dbgs() << "SIV\n"); 3572 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, 3573 SplitIter)) 3574 return nullptr; 3575 ConstrainedLevels.set(Level); 3576 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3577 if (Constraints[Level].isEmpty()) { 3578 ++DeltaIndependence; 3579 return nullptr; 3580 } 3581 Changed = true; 3582 } 3583 Sivs.reset(SJ); 3584 } 3585 if (Changed) { 3586 // propagate, possibly creating new SIVs and ZIVs 3587 DEBUG(dbgs() << " propagating\n"); 3588 DEBUG(dbgs() << "\tMivs = "); 3589 DEBUG(dumpSmallBitVector(Mivs)); 3590 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3591 // SJ is an MIV subscript that's part of the current coupled group 3592 DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3593 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3594 Constraints, Result.Consistent)) { 3595 DEBUG(dbgs() << "\t Changed\n"); 3596 ++DeltaPropagations; 3597 Pair[SJ].Classification = 3598 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3599 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3600 Pair[SJ].Loops); 3601 switch (Pair[SJ].Classification) { 3602 case Subscript::ZIV: 3603 DEBUG(dbgs() << "ZIV\n"); 3604 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3605 return nullptr; 3606 Mivs.reset(SJ); 3607 break; 3608 case Subscript::SIV: 3609 Sivs.set(SJ); 3610 Mivs.reset(SJ); 3611 break; 3612 case Subscript::RDIV: 3613 case Subscript::MIV: 3614 break; 3615 default: 3616 llvm_unreachable("bad subscript classification"); 3617 } 3618 } 3619 } 3620 } 3621 } 3622 3623 // test & propagate remaining RDIVs 3624 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3625 if (Pair[SJ].Classification == Subscript::RDIV) { 3626 DEBUG(dbgs() << "RDIV test\n"); 3627 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3628 return nullptr; 3629 // I don't yet understand how to propagate RDIV results 3630 Mivs.reset(SJ); 3631 } 3632 } 3633 3634 // test remaining MIVs 3635 // This code is temporary. 3636 // Better to somehow test all remaining subscripts simultaneously. 3637 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3638 if (Pair[SJ].Classification == Subscript::MIV) { 3639 DEBUG(dbgs() << "MIV test\n"); 3640 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3641 return nullptr; 3642 } 3643 else 3644 llvm_unreachable("expected only MIV subscripts at this point"); 3645 } 3646 3647 // update Result.DV from constraint vector 3648 DEBUG(dbgs() << " updating\n"); 3649 for (int SJ = ConstrainedLevels.find_first(); SJ >= 0; 3650 SJ = ConstrainedLevels.find_next(SJ)) { 3651 if (SJ > (int)CommonLevels) 3652 break; 3653 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3654 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3655 return nullptr; 3656 } 3657 } 3658 } 3659 3660 // Make sure the Scalar flags are set correctly. 3661 SmallBitVector CompleteLoops(MaxLevels + 1); 3662 for (unsigned SI = 0; SI < Pairs; ++SI) 3663 CompleteLoops |= Pair[SI].Loops; 3664 for (unsigned II = 1; II <= CommonLevels; ++II) 3665 if (CompleteLoops[II]) 3666 Result.DV[II - 1].Scalar = false; 3667 3668 if (PossiblyLoopIndependent) { 3669 // Make sure the LoopIndependent flag is set correctly. 3670 // All directions must include equal, otherwise no 3671 // loop-independent dependence is possible. 3672 for (unsigned II = 1; II <= CommonLevels; ++II) { 3673 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3674 Result.LoopIndependent = false; 3675 break; 3676 } 3677 } 3678 } 3679 else { 3680 // On the other hand, if all directions are equal and there's no 3681 // loop-independent dependence possible, then no dependence exists. 3682 bool AllEqual = true; 3683 for (unsigned II = 1; II <= CommonLevels; ++II) { 3684 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 3685 AllEqual = false; 3686 break; 3687 } 3688 } 3689 if (AllEqual) 3690 return nullptr; 3691 } 3692 3693 return make_unique<FullDependence>(std::move(Result)); 3694 } 3695 3696 3697 3698 //===----------------------------------------------------------------------===// 3699 // getSplitIteration - 3700 // Rather than spend rarely-used space recording the splitting iteration 3701 // during the Weak-Crossing SIV test, we re-compute it on demand. 3702 // The re-computation is basically a repeat of the entire dependence test, 3703 // though simplified since we know that the dependence exists. 3704 // It's tedious, since we must go through all propagations, etc. 3705 // 3706 // Care is required to keep this code up to date with respect to the routine 3707 // above, depends(). 3708 // 3709 // Generally, the dependence analyzer will be used to build 3710 // a dependence graph for a function (basically a map from instructions 3711 // to dependences). Looking for cycles in the graph shows us loops 3712 // that cannot be trivially vectorized/parallelized. 3713 // 3714 // We can try to improve the situation by examining all the dependences 3715 // that make up the cycle, looking for ones we can break. 3716 // Sometimes, peeling the first or last iteration of a loop will break 3717 // dependences, and we've got flags for those possibilities. 3718 // Sometimes, splitting a loop at some other iteration will do the trick, 3719 // and we've got a flag for that case. Rather than waste the space to 3720 // record the exact iteration (since we rarely know), we provide 3721 // a method that calculates the iteration. It's a drag that it must work 3722 // from scratch, but wonderful in that it's possible. 3723 // 3724 // Here's an example: 3725 // 3726 // for (i = 0; i < 10; i++) 3727 // A[i] = ... 3728 // ... = A[11 - i] 3729 // 3730 // There's a loop-carried flow dependence from the store to the load, 3731 // found by the weak-crossing SIV test. The dependence will have a flag, 3732 // indicating that the dependence can be broken by splitting the loop. 3733 // Calling getSplitIteration will return 5. 3734 // Splitting the loop breaks the dependence, like so: 3735 // 3736 // for (i = 0; i <= 5; i++) 3737 // A[i] = ... 3738 // ... = A[11 - i] 3739 // for (i = 6; i < 10; i++) 3740 // A[i] = ... 3741 // ... = A[11 - i] 3742 // 3743 // breaks the dependence and allows us to vectorize/parallelize 3744 // both loops. 3745 const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, 3746 unsigned SplitLevel) { 3747 assert(Dep.isSplitable(SplitLevel) && 3748 "Dep should be splitable at SplitLevel"); 3749 Instruction *Src = Dep.getSrc(); 3750 Instruction *Dst = Dep.getDst(); 3751 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3752 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3753 assert(isLoadOrStore(Src)); 3754 assert(isLoadOrStore(Dst)); 3755 Value *SrcPtr = getPointerOperand(Src); 3756 Value *DstPtr = getPointerOperand(Dst); 3757 assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr, 3758 SrcPtr) == MustAlias); 3759 3760 // establish loop nesting levels 3761 establishNestingLevels(Src, Dst); 3762 3763 FullDependence Result(Src, Dst, false, CommonLevels); 3764 3765 // See if there are GEPs we can use. 3766 bool UsefulGEP = false; 3767 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3768 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3769 if (SrcGEP && DstGEP && 3770 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3771 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3772 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3773 UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3774 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) && 3775 (SrcGEP->getNumOperands() == DstGEP->getNumOperands()); 3776 } 3777 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3778 SmallVector<Subscript, 4> Pair(Pairs); 3779 if (UsefulGEP) { 3780 unsigned P = 0; 3781 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3782 SrcEnd = SrcGEP->idx_end(), 3783 DstIdx = DstGEP->idx_begin(); 3784 SrcIdx != SrcEnd; 3785 ++SrcIdx, ++DstIdx, ++P) { 3786 Pair[P].Src = SE->getSCEV(*SrcIdx); 3787 Pair[P].Dst = SE->getSCEV(*DstIdx); 3788 } 3789 } 3790 else { 3791 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3792 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3793 Pair[0].Src = SrcSCEV; 3794 Pair[0].Dst = DstSCEV; 3795 } 3796 3797 if (Delinearize && CommonLevels > 1) { 3798 if (tryDelinearize(Src, Dst, Pair)) { 3799 DEBUG(dbgs() << " delinerized GEP\n"); 3800 Pairs = Pair.size(); 3801 } 3802 } 3803 3804 for (unsigned P = 0; P < Pairs; ++P) { 3805 Pair[P].Loops.resize(MaxLevels + 1); 3806 Pair[P].GroupLoops.resize(MaxLevels + 1); 3807 Pair[P].Group.resize(Pairs); 3808 removeMatchingExtensions(&Pair[P]); 3809 Pair[P].Classification = 3810 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3811 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3812 Pair[P].Loops); 3813 Pair[P].GroupLoops = Pair[P].Loops; 3814 Pair[P].Group.set(P); 3815 } 3816 3817 SmallBitVector Separable(Pairs); 3818 SmallBitVector Coupled(Pairs); 3819 3820 // partition subscripts into separable and minimally-coupled groups 3821 for (unsigned SI = 0; SI < Pairs; ++SI) { 3822 if (Pair[SI].Classification == Subscript::NonLinear) { 3823 // ignore these, but collect loops for later 3824 collectCommonLoops(Pair[SI].Src, 3825 LI->getLoopFor(Src->getParent()), 3826 Pair[SI].Loops); 3827 collectCommonLoops(Pair[SI].Dst, 3828 LI->getLoopFor(Dst->getParent()), 3829 Pair[SI].Loops); 3830 Result.Consistent = false; 3831 } 3832 else if (Pair[SI].Classification == Subscript::ZIV) 3833 Separable.set(SI); 3834 else { 3835 // SIV, RDIV, or MIV, so check for coupled group 3836 bool Done = true; 3837 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3838 SmallBitVector Intersection = Pair[SI].GroupLoops; 3839 Intersection &= Pair[SJ].GroupLoops; 3840 if (Intersection.any()) { 3841 // accumulate set of all the loops in group 3842 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3843 // accumulate set of all subscripts in group 3844 Pair[SJ].Group |= Pair[SI].Group; 3845 Done = false; 3846 } 3847 } 3848 if (Done) { 3849 if (Pair[SI].Group.count() == 1) 3850 Separable.set(SI); 3851 else 3852 Coupled.set(SI); 3853 } 3854 } 3855 } 3856 3857 Constraint NewConstraint; 3858 NewConstraint.setAny(SE); 3859 3860 // test separable subscripts 3861 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3862 switch (Pair[SI].Classification) { 3863 case Subscript::SIV: { 3864 unsigned Level; 3865 const SCEV *SplitIter = nullptr; 3866 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3867 Result, NewConstraint, SplitIter); 3868 if (Level == SplitLevel) { 3869 assert(SplitIter != nullptr); 3870 return SplitIter; 3871 } 3872 break; 3873 } 3874 case Subscript::ZIV: 3875 case Subscript::RDIV: 3876 case Subscript::MIV: 3877 break; 3878 default: 3879 llvm_unreachable("subscript has unexpected classification"); 3880 } 3881 } 3882 3883 if (Coupled.count()) { 3884 // test coupled subscript groups 3885 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3886 for (unsigned II = 0; II <= MaxLevels; ++II) 3887 Constraints[II].setAny(SE); 3888 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3889 SmallBitVector Group(Pair[SI].Group); 3890 SmallBitVector Sivs(Pairs); 3891 SmallBitVector Mivs(Pairs); 3892 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3893 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3894 if (Pair[SJ].Classification == Subscript::SIV) 3895 Sivs.set(SJ); 3896 else 3897 Mivs.set(SJ); 3898 } 3899 while (Sivs.any()) { 3900 bool Changed = false; 3901 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3902 // SJ is an SIV subscript that's part of the current coupled group 3903 unsigned Level; 3904 const SCEV *SplitIter = nullptr; 3905 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3906 Result, NewConstraint, SplitIter); 3907 if (Level == SplitLevel && SplitIter) 3908 return SplitIter; 3909 ConstrainedLevels.set(Level); 3910 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 3911 Changed = true; 3912 Sivs.reset(SJ); 3913 } 3914 if (Changed) { 3915 // propagate, possibly creating new SIVs and ZIVs 3916 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3917 // SJ is an MIV subscript that's part of the current coupled group 3918 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 3919 Pair[SJ].Loops, Constraints, Result.Consistent)) { 3920 Pair[SJ].Classification = 3921 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3922 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3923 Pair[SJ].Loops); 3924 switch (Pair[SJ].Classification) { 3925 case Subscript::ZIV: 3926 Mivs.reset(SJ); 3927 break; 3928 case Subscript::SIV: 3929 Sivs.set(SJ); 3930 Mivs.reset(SJ); 3931 break; 3932 case Subscript::RDIV: 3933 case Subscript::MIV: 3934 break; 3935 default: 3936 llvm_unreachable("bad subscript classification"); 3937 } 3938 } 3939 } 3940 } 3941 } 3942 } 3943 } 3944 llvm_unreachable("somehow reached end of routine"); 3945 return nullptr; 3946 } 3947