1 //===-- LoopUtils.cpp - Loop Utility functions -------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines common loop utility functions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Utils/LoopUtils.h" 15 #include "llvm/ADT/ScopeExit.h" 16 #include "llvm/Analysis/AliasAnalysis.h" 17 #include "llvm/Analysis/BasicAliasAnalysis.h" 18 #include "llvm/Analysis/GlobalsModRef.h" 19 #include "llvm/Analysis/InstructionSimplify.h" 20 #include "llvm/Analysis/LoopInfo.h" 21 #include "llvm/Analysis/LoopPass.h" 22 #include "llvm/Analysis/MustExecute.h" 23 #include "llvm/Analysis/ScalarEvolution.h" 24 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 25 #include "llvm/Analysis/ScalarEvolutionExpander.h" 26 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 27 #include "llvm/Analysis/TargetTransformInfo.h" 28 #include "llvm/Analysis/ValueTracking.h" 29 #include "llvm/IR/Dominators.h" 30 #include "llvm/IR/Instructions.h" 31 #include "llvm/IR/Module.h" 32 #include "llvm/IR/PatternMatch.h" 33 #include "llvm/IR/ValueHandle.h" 34 #include "llvm/Pass.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/KnownBits.h" 37 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 38 39 using namespace llvm; 40 using namespace llvm::PatternMatch; 41 42 #define DEBUG_TYPE "loop-utils" 43 44 bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, 45 SmallPtrSetImpl<Instruction *> &Set) { 46 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) 47 if (!Set.count(dyn_cast<Instruction>(*Use))) 48 return false; 49 return true; 50 } 51 52 bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) { 53 switch (Kind) { 54 default: 55 break; 56 case RK_IntegerAdd: 57 case RK_IntegerMult: 58 case RK_IntegerOr: 59 case RK_IntegerAnd: 60 case RK_IntegerXor: 61 case RK_IntegerMinMax: 62 return true; 63 } 64 return false; 65 } 66 67 bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) { 68 return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind); 69 } 70 71 bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) { 72 switch (Kind) { 73 default: 74 break; 75 case RK_IntegerAdd: 76 case RK_IntegerMult: 77 case RK_FloatAdd: 78 case RK_FloatMult: 79 return true; 80 } 81 return false; 82 } 83 84 /// Determines if Phi may have been type-promoted. If Phi has a single user 85 /// that ANDs the Phi with a type mask, return the user. RT is updated to 86 /// account for the narrower bit width represented by the mask, and the AND 87 /// instruction is added to CI. 88 static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT, 89 SmallPtrSetImpl<Instruction *> &Visited, 90 SmallPtrSetImpl<Instruction *> &CI) { 91 if (!Phi->hasOneUse()) 92 return Phi; 93 94 const APInt *M = nullptr; 95 Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser()); 96 97 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT 98 // with a new integer type of the corresponding bit width. 99 if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) { 100 int32_t Bits = (*M + 1).exactLogBase2(); 101 if (Bits > 0) { 102 RT = IntegerType::get(Phi->getContext(), Bits); 103 Visited.insert(Phi); 104 CI.insert(J); 105 return J; 106 } 107 } 108 return Phi; 109 } 110 111 /// Compute the minimal bit width needed to represent a reduction whose exit 112 /// instruction is given by Exit. 113 static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit, 114 DemandedBits *DB, 115 AssumptionCache *AC, 116 DominatorTree *DT) { 117 bool IsSigned = false; 118 const DataLayout &DL = Exit->getModule()->getDataLayout(); 119 uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType()); 120 121 if (DB) { 122 // Use the demanded bits analysis to determine the bits that are live out 123 // of the exit instruction, rounding up to the nearest power of two. If the 124 // use of demanded bits results in a smaller bit width, we know the value 125 // must be positive (i.e., IsSigned = false), because if this were not the 126 // case, the sign bit would have been demanded. 127 auto Mask = DB->getDemandedBits(Exit); 128 MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros(); 129 } 130 131 if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) { 132 // If demanded bits wasn't able to limit the bit width, we can try to use 133 // value tracking instead. This can be the case, for example, if the value 134 // may be negative. 135 auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT); 136 auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType()); 137 MaxBitWidth = NumTypeBits - NumSignBits; 138 KnownBits Bits = computeKnownBits(Exit, DL); 139 if (!Bits.isNonNegative()) { 140 // If the value is not known to be non-negative, we set IsSigned to true, 141 // meaning that we will use sext instructions instead of zext 142 // instructions to restore the original type. 143 IsSigned = true; 144 if (!Bits.isNegative()) 145 // If the value is not known to be negative, we don't known what the 146 // upper bit is, and therefore, we don't know what kind of extend we 147 // will need. In this case, just increase the bit width by one bit and 148 // use sext. 149 ++MaxBitWidth; 150 } 151 } 152 if (!isPowerOf2_64(MaxBitWidth)) 153 MaxBitWidth = NextPowerOf2(MaxBitWidth); 154 155 return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth), 156 IsSigned); 157 } 158 159 /// Collect cast instructions that can be ignored in the vectorizer's cost 160 /// model, given a reduction exit value and the minimal type in which the 161 /// reduction can be represented. 162 static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit, 163 Type *RecurrenceType, 164 SmallPtrSetImpl<Instruction *> &Casts) { 165 166 SmallVector<Instruction *, 8> Worklist; 167 SmallPtrSet<Instruction *, 8> Visited; 168 Worklist.push_back(Exit); 169 170 while (!Worklist.empty()) { 171 Instruction *Val = Worklist.pop_back_val(); 172 Visited.insert(Val); 173 if (auto *Cast = dyn_cast<CastInst>(Val)) 174 if (Cast->getSrcTy() == RecurrenceType) { 175 // If the source type of a cast instruction is equal to the recurrence 176 // type, it will be eliminated, and should be ignored in the vectorizer 177 // cost model. 178 Casts.insert(Cast); 179 continue; 180 } 181 182 // Add all operands to the work list if they are loop-varying values that 183 // we haven't yet visited. 184 for (Value *O : cast<User>(Val)->operands()) 185 if (auto *I = dyn_cast<Instruction>(O)) 186 if (TheLoop->contains(I) && !Visited.count(I)) 187 Worklist.push_back(I); 188 } 189 } 190 191 bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, 192 Loop *TheLoop, bool HasFunNoNaNAttr, 193 RecurrenceDescriptor &RedDes, 194 DemandedBits *DB, 195 AssumptionCache *AC, 196 DominatorTree *DT) { 197 if (Phi->getNumIncomingValues() != 2) 198 return false; 199 200 // Reduction variables are only found in the loop header block. 201 if (Phi->getParent() != TheLoop->getHeader()) 202 return false; 203 204 // Obtain the reduction start value from the value that comes from the loop 205 // preheader. 206 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); 207 208 // ExitInstruction is the single value which is used outside the loop. 209 // We only allow for a single reduction value to be used outside the loop. 210 // This includes users of the reduction, variables (which form a cycle 211 // which ends in the phi node). 212 Instruction *ExitInstruction = nullptr; 213 // Indicates that we found a reduction operation in our scan. 214 bool FoundReduxOp = false; 215 216 // We start with the PHI node and scan for all of the users of this 217 // instruction. All users must be instructions that can be used as reduction 218 // variables (such as ADD). We must have a single out-of-block user. The cycle 219 // must include the original PHI. 220 bool FoundStartPHI = false; 221 222 // To recognize min/max patterns formed by a icmp select sequence, we store 223 // the number of instruction we saw from the recognized min/max pattern, 224 // to make sure we only see exactly the two instructions. 225 unsigned NumCmpSelectPatternInst = 0; 226 InstDesc ReduxDesc(false, nullptr); 227 228 // Data used for determining if the recurrence has been type-promoted. 229 Type *RecurrenceType = Phi->getType(); 230 SmallPtrSet<Instruction *, 4> CastInsts; 231 Instruction *Start = Phi; 232 bool IsSigned = false; 233 234 SmallPtrSet<Instruction *, 8> VisitedInsts; 235 SmallVector<Instruction *, 8> Worklist; 236 237 // Return early if the recurrence kind does not match the type of Phi. If the 238 // recurrence kind is arithmetic, we attempt to look through AND operations 239 // resulting from the type promotion performed by InstCombine. Vector 240 // operations are not limited to the legal integer widths, so we may be able 241 // to evaluate the reduction in the narrower width. 242 if (RecurrenceType->isFloatingPointTy()) { 243 if (!isFloatingPointRecurrenceKind(Kind)) 244 return false; 245 } else { 246 if (!isIntegerRecurrenceKind(Kind)) 247 return false; 248 if (isArithmeticRecurrenceKind(Kind)) 249 Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); 250 } 251 252 Worklist.push_back(Start); 253 VisitedInsts.insert(Start); 254 255 // A value in the reduction can be used: 256 // - By the reduction: 257 // - Reduction operation: 258 // - One use of reduction value (safe). 259 // - Multiple use of reduction value (not safe). 260 // - PHI: 261 // - All uses of the PHI must be the reduction (safe). 262 // - Otherwise, not safe. 263 // - By instructions outside of the loop (safe). 264 // * One value may have several outside users, but all outside 265 // uses must be of the same value. 266 // - By an instruction that is not part of the reduction (not safe). 267 // This is either: 268 // * An instruction type other than PHI or the reduction operation. 269 // * A PHI in the header other than the initial PHI. 270 while (!Worklist.empty()) { 271 Instruction *Cur = Worklist.back(); 272 Worklist.pop_back(); 273 274 // No Users. 275 // If the instruction has no users then this is a broken chain and can't be 276 // a reduction variable. 277 if (Cur->use_empty()) 278 return false; 279 280 bool IsAPhi = isa<PHINode>(Cur); 281 282 // A header PHI use other than the original PHI. 283 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent()) 284 return false; 285 286 // Reductions of instructions such as Div, and Sub is only possible if the 287 // LHS is the reduction variable. 288 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) && 289 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) && 290 !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0)))) 291 return false; 292 293 // Any reduction instruction must be of one of the allowed kinds. We ignore 294 // the starting value (the Phi or an AND instruction if the Phi has been 295 // type-promoted). 296 if (Cur != Start) { 297 ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); 298 if (!ReduxDesc.isRecurrence()) 299 return false; 300 } 301 302 // A reduction operation must only have one use of the reduction value. 303 if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax && 304 hasMultipleUsesOf(Cur, VisitedInsts)) 305 return false; 306 307 // All inputs to a PHI node must be a reduction value. 308 if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts)) 309 return false; 310 311 if (Kind == RK_IntegerMinMax && 312 (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur))) 313 ++NumCmpSelectPatternInst; 314 if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur))) 315 ++NumCmpSelectPatternInst; 316 317 // Check whether we found a reduction operator. 318 FoundReduxOp |= !IsAPhi && Cur != Start; 319 320 // Process users of current instruction. Push non-PHI nodes after PHI nodes 321 // onto the stack. This way we are going to have seen all inputs to PHI 322 // nodes once we get to them. 323 SmallVector<Instruction *, 8> NonPHIs; 324 SmallVector<Instruction *, 8> PHIs; 325 for (User *U : Cur->users()) { 326 Instruction *UI = cast<Instruction>(U); 327 328 // Check if we found the exit user. 329 BasicBlock *Parent = UI->getParent(); 330 if (!TheLoop->contains(Parent)) { 331 // If we already know this instruction is used externally, move on to 332 // the next user. 333 if (ExitInstruction == Cur) 334 continue; 335 336 // Exit if you find multiple values used outside or if the header phi 337 // node is being used. In this case the user uses the value of the 338 // previous iteration, in which case we would loose "VF-1" iterations of 339 // the reduction operation if we vectorize. 340 if (ExitInstruction != nullptr || Cur == Phi) 341 return false; 342 343 // The instruction used by an outside user must be the last instruction 344 // before we feed back to the reduction phi. Otherwise, we loose VF-1 345 // operations on the value. 346 if (!is_contained(Phi->operands(), Cur)) 347 return false; 348 349 ExitInstruction = Cur; 350 continue; 351 } 352 353 // Process instructions only once (termination). Each reduction cycle 354 // value must only be used once, except by phi nodes and min/max 355 // reductions which are represented as a cmp followed by a select. 356 InstDesc IgnoredVal(false, nullptr); 357 if (VisitedInsts.insert(UI).second) { 358 if (isa<PHINode>(UI)) 359 PHIs.push_back(UI); 360 else 361 NonPHIs.push_back(UI); 362 } else if (!isa<PHINode>(UI) && 363 ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) && 364 !isa<SelectInst>(UI)) || 365 !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence())) 366 return false; 367 368 // Remember that we completed the cycle. 369 if (UI == Phi) 370 FoundStartPHI = true; 371 } 372 Worklist.append(PHIs.begin(), PHIs.end()); 373 Worklist.append(NonPHIs.begin(), NonPHIs.end()); 374 } 375 376 // This means we have seen one but not the other instruction of the 377 // pattern or more than just a select and cmp. 378 if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) && 379 NumCmpSelectPatternInst != 2) 380 return false; 381 382 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) 383 return false; 384 385 if (Start != Phi) { 386 // If the starting value is not the same as the phi node, we speculatively 387 // looked through an 'and' instruction when evaluating a potential 388 // arithmetic reduction to determine if it may have been type-promoted. 389 // 390 // We now compute the minimal bit width that is required to represent the 391 // reduction. If this is the same width that was indicated by the 'and', we 392 // can represent the reduction in the smaller type. The 'and' instruction 393 // will be eliminated since it will essentially be a cast instruction that 394 // can be ignore in the cost model. If we compute a different type than we 395 // did when evaluating the 'and', the 'and' will not be eliminated, and we 396 // will end up with different kinds of operations in the recurrence 397 // expression (e.g., RK_IntegerAND, RK_IntegerADD). We give up if this is 398 // the case. 399 // 400 // The vectorizer relies on InstCombine to perform the actual 401 // type-shrinking. It does this by inserting instructions to truncate the 402 // exit value of the reduction to the width indicated by RecurrenceType and 403 // then extend this value back to the original width. If IsSigned is false, 404 // a 'zext' instruction will be generated; otherwise, a 'sext' will be 405 // used. 406 // 407 // TODO: We should not rely on InstCombine to rewrite the reduction in the 408 // smaller type. We should just generate a correctly typed expression 409 // to begin with. 410 Type *ComputedType; 411 std::tie(ComputedType, IsSigned) = 412 computeRecurrenceType(ExitInstruction, DB, AC, DT); 413 if (ComputedType != RecurrenceType) 414 return false; 415 416 // The recurrence expression will be represented in a narrower type. If 417 // there are any cast instructions that will be unnecessary, collect them 418 // in CastInsts. Note that the 'and' instruction was already included in 419 // this list. 420 // 421 // TODO: A better way to represent this may be to tag in some way all the 422 // instructions that are a part of the reduction. The vectorizer cost 423 // model could then apply the recurrence type to these instructions, 424 // without needing a white list of instructions to ignore. 425 collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts); 426 } 427 428 // We found a reduction var if we have reached the original phi node and we 429 // only have a single instruction with out-of-loop users. 430 431 // The ExitInstruction(Instruction which is allowed to have out-of-loop users) 432 // is saved as part of the RecurrenceDescriptor. 433 434 // Save the description of this reduction variable. 435 RecurrenceDescriptor RD( 436 RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(), 437 ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts); 438 RedDes = RD; 439 440 return true; 441 } 442 443 /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction 444 /// pattern corresponding to a min(X, Y) or max(X, Y). 445 RecurrenceDescriptor::InstDesc 446 RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) { 447 448 assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) && 449 "Expect a select instruction"); 450 Instruction *Cmp = nullptr; 451 SelectInst *Select = nullptr; 452 453 // We must handle the select(cmp()) as a single instruction. Advance to the 454 // select. 455 if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) { 456 if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin()))) 457 return InstDesc(false, I); 458 return InstDesc(Select, Prev.getMinMaxKind()); 459 } 460 461 // Only handle single use cases for now. 462 if (!(Select = dyn_cast<SelectInst>(I))) 463 return InstDesc(false, I); 464 if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) && 465 !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0)))) 466 return InstDesc(false, I); 467 if (!Cmp->hasOneUse()) 468 return InstDesc(false, I); 469 470 Value *CmpLeft; 471 Value *CmpRight; 472 473 // Look for a min/max pattern. 474 if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 475 return InstDesc(Select, MRK_UIntMin); 476 else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 477 return InstDesc(Select, MRK_UIntMax); 478 else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 479 return InstDesc(Select, MRK_SIntMax); 480 else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 481 return InstDesc(Select, MRK_SIntMin); 482 else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 483 return InstDesc(Select, MRK_FloatMin); 484 else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 485 return InstDesc(Select, MRK_FloatMax); 486 else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 487 return InstDesc(Select, MRK_FloatMin); 488 else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) 489 return InstDesc(Select, MRK_FloatMax); 490 491 return InstDesc(false, I); 492 } 493 494 RecurrenceDescriptor::InstDesc 495 RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, 496 InstDesc &Prev, bool HasFunNoNaNAttr) { 497 bool FP = I->getType()->isFloatingPointTy(); 498 Instruction *UAI = Prev.getUnsafeAlgebraInst(); 499 if (!UAI && FP && !I->isFast()) 500 UAI = I; // Found an unsafe (unvectorizable) algebra instruction. 501 502 switch (I->getOpcode()) { 503 default: 504 return InstDesc(false, I); 505 case Instruction::PHI: 506 return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst()); 507 case Instruction::Sub: 508 case Instruction::Add: 509 return InstDesc(Kind == RK_IntegerAdd, I); 510 case Instruction::Mul: 511 return InstDesc(Kind == RK_IntegerMult, I); 512 case Instruction::And: 513 return InstDesc(Kind == RK_IntegerAnd, I); 514 case Instruction::Or: 515 return InstDesc(Kind == RK_IntegerOr, I); 516 case Instruction::Xor: 517 return InstDesc(Kind == RK_IntegerXor, I); 518 case Instruction::FMul: 519 return InstDesc(Kind == RK_FloatMult, I, UAI); 520 case Instruction::FSub: 521 case Instruction::FAdd: 522 return InstDesc(Kind == RK_FloatAdd, I, UAI); 523 case Instruction::FCmp: 524 case Instruction::ICmp: 525 case Instruction::Select: 526 if (Kind != RK_IntegerMinMax && 527 (!HasFunNoNaNAttr || Kind != RK_FloatMinMax)) 528 return InstDesc(false, I); 529 return isMinMaxSelectCmpPattern(I, Prev); 530 } 531 } 532 533 bool RecurrenceDescriptor::hasMultipleUsesOf( 534 Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) { 535 unsigned NumUses = 0; 536 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; 537 ++Use) { 538 if (Insts.count(dyn_cast<Instruction>(*Use))) 539 ++NumUses; 540 if (NumUses > 1) 541 return true; 542 } 543 544 return false; 545 } 546 bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, 547 RecurrenceDescriptor &RedDes, 548 DemandedBits *DB, AssumptionCache *AC, 549 DominatorTree *DT) { 550 551 BasicBlock *Header = TheLoop->getHeader(); 552 Function &F = *Header->getParent(); 553 bool HasFunNoNaNAttr = 554 F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; 555 556 if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, 557 AC, DT)) { 558 LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); 559 return true; 560 } 561 if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, 562 AC, DT)) { 563 LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); 564 return true; 565 } 566 if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB, 567 AC, DT)) { 568 LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); 569 return true; 570 } 571 if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB, 572 AC, DT)) { 573 LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); 574 return true; 575 } 576 if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB, 577 AC, DT)) { 578 LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); 579 return true; 580 } 581 if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes, 582 DB, AC, DT)) { 583 LLVM_DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n"); 584 return true; 585 } 586 if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, 587 AC, DT)) { 588 LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); 589 return true; 590 } 591 if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, 592 AC, DT)) { 593 LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); 594 return true; 595 } 596 if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB, 597 AC, DT)) { 598 LLVM_DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi 599 << "\n"); 600 return true; 601 } 602 // Not a reduction of known type. 603 return false; 604 } 605 606 bool RecurrenceDescriptor::isFirstOrderRecurrence( 607 PHINode *Phi, Loop *TheLoop, 608 DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) { 609 610 // Ensure the phi node is in the loop header and has two incoming values. 611 if (Phi->getParent() != TheLoop->getHeader() || 612 Phi->getNumIncomingValues() != 2) 613 return false; 614 615 // Ensure the loop has a preheader and a single latch block. The loop 616 // vectorizer will need the latch to set up the next iteration of the loop. 617 auto *Preheader = TheLoop->getLoopPreheader(); 618 auto *Latch = TheLoop->getLoopLatch(); 619 if (!Preheader || !Latch) 620 return false; 621 622 // Ensure the phi node's incoming blocks are the loop preheader and latch. 623 if (Phi->getBasicBlockIndex(Preheader) < 0 || 624 Phi->getBasicBlockIndex(Latch) < 0) 625 return false; 626 627 // Get the previous value. The previous value comes from the latch edge while 628 // the initial value comes form the preheader edge. 629 auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch)); 630 if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) || 631 SinkAfter.count(Previous)) // Cannot rely on dominance due to motion. 632 return false; 633 634 // Ensure every user of the phi node is dominated by the previous value. 635 // The dominance requirement ensures the loop vectorizer will not need to 636 // vectorize the initial value prior to the first iteration of the loop. 637 // TODO: Consider extending this sinking to handle other kinds of instructions 638 // and expressions, beyond sinking a single cast past Previous. 639 if (Phi->hasOneUse()) { 640 auto *I = Phi->user_back(); 641 if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() && 642 DT->dominates(Previous, I->user_back())) { 643 if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking. 644 SinkAfter[I] = Previous; 645 return true; 646 } 647 } 648 649 for (User *U : Phi->users()) 650 if (auto *I = dyn_cast<Instruction>(U)) { 651 if (!DT->dominates(Previous, I)) 652 return false; 653 } 654 655 return true; 656 } 657 658 /// This function returns the identity element (or neutral element) for 659 /// the operation K. 660 Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K, 661 Type *Tp) { 662 switch (K) { 663 case RK_IntegerXor: 664 case RK_IntegerAdd: 665 case RK_IntegerOr: 666 // Adding, Xoring, Oring zero to a number does not change it. 667 return ConstantInt::get(Tp, 0); 668 case RK_IntegerMult: 669 // Multiplying a number by 1 does not change it. 670 return ConstantInt::get(Tp, 1); 671 case RK_IntegerAnd: 672 // AND-ing a number with an all-1 value does not change it. 673 return ConstantInt::get(Tp, -1, true); 674 case RK_FloatMult: 675 // Multiplying a number by 1 does not change it. 676 return ConstantFP::get(Tp, 1.0L); 677 case RK_FloatAdd: 678 // Adding zero to a number does not change it. 679 return ConstantFP::get(Tp, 0.0L); 680 default: 681 llvm_unreachable("Unknown recurrence kind"); 682 } 683 } 684 685 /// This function translates the recurrence kind to an LLVM binary operator. 686 unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) { 687 switch (Kind) { 688 case RK_IntegerAdd: 689 return Instruction::Add; 690 case RK_IntegerMult: 691 return Instruction::Mul; 692 case RK_IntegerOr: 693 return Instruction::Or; 694 case RK_IntegerAnd: 695 return Instruction::And; 696 case RK_IntegerXor: 697 return Instruction::Xor; 698 case RK_FloatMult: 699 return Instruction::FMul; 700 case RK_FloatAdd: 701 return Instruction::FAdd; 702 case RK_IntegerMinMax: 703 return Instruction::ICmp; 704 case RK_FloatMinMax: 705 return Instruction::FCmp; 706 default: 707 llvm_unreachable("Unknown recurrence operation"); 708 } 709 } 710 711 Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, 712 MinMaxRecurrenceKind RK, 713 Value *Left, Value *Right) { 714 CmpInst::Predicate P = CmpInst::ICMP_NE; 715 switch (RK) { 716 default: 717 llvm_unreachable("Unknown min/max recurrence kind"); 718 case MRK_UIntMin: 719 P = CmpInst::ICMP_ULT; 720 break; 721 case MRK_UIntMax: 722 P = CmpInst::ICMP_UGT; 723 break; 724 case MRK_SIntMin: 725 P = CmpInst::ICMP_SLT; 726 break; 727 case MRK_SIntMax: 728 P = CmpInst::ICMP_SGT; 729 break; 730 case MRK_FloatMin: 731 P = CmpInst::FCMP_OLT; 732 break; 733 case MRK_FloatMax: 734 P = CmpInst::FCMP_OGT; 735 break; 736 } 737 738 // We only match FP sequences that are 'fast', so we can unconditionally 739 // set it on any generated instructions. 740 IRBuilder<>::FastMathFlagGuard FMFG(Builder); 741 FastMathFlags FMF; 742 FMF.setFast(); 743 Builder.setFastMathFlags(FMF); 744 745 Value *Cmp; 746 if (RK == MRK_FloatMin || RK == MRK_FloatMax) 747 Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp"); 748 else 749 Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp"); 750 751 Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); 752 return Select; 753 } 754 755 InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, 756 const SCEV *Step, BinaryOperator *BOp, 757 SmallVectorImpl<Instruction *> *Casts) 758 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) { 759 assert(IK != IK_NoInduction && "Not an induction"); 760 761 // Start value type should match the induction kind and the value 762 // itself should not be null. 763 assert(StartValue && "StartValue is null"); 764 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && 765 "StartValue is not a pointer for pointer induction"); 766 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && 767 "StartValue is not an integer for integer induction"); 768 769 // Check the Step Value. It should be non-zero integer value. 770 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) && 771 "Step value is zero"); 772 773 assert((IK != IK_PtrInduction || getConstIntStepValue()) && 774 "Step value should be constant for pointer induction"); 775 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) && 776 "StepValue is not an integer"); 777 778 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) && 779 "StepValue is not FP for FpInduction"); 780 assert((IK != IK_FpInduction || (InductionBinOp && 781 (InductionBinOp->getOpcode() == Instruction::FAdd || 782 InductionBinOp->getOpcode() == Instruction::FSub))) && 783 "Binary opcode should be specified for FP induction"); 784 785 if (Casts) { 786 for (auto &Inst : *Casts) { 787 RedundantCasts.push_back(Inst); 788 } 789 } 790 } 791 792 int InductionDescriptor::getConsecutiveDirection() const { 793 ConstantInt *ConstStep = getConstIntStepValue(); 794 if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne())) 795 return ConstStep->getSExtValue(); 796 return 0; 797 } 798 799 ConstantInt *InductionDescriptor::getConstIntStepValue() const { 800 if (isa<SCEVConstant>(Step)) 801 return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue()); 802 return nullptr; 803 } 804 805 Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index, 806 ScalarEvolution *SE, 807 const DataLayout& DL) const { 808 809 SCEVExpander Exp(*SE, DL, "induction"); 810 assert(Index->getType() == Step->getType() && 811 "Index type does not match StepValue type"); 812 switch (IK) { 813 case IK_IntInduction: { 814 assert(Index->getType() == StartValue->getType() && 815 "Index type does not match StartValue type"); 816 817 // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution 818 // and calculate (Start + Index * Step) for all cases, without 819 // special handling for "isOne" and "isMinusOne". 820 // But in the real life the result code getting worse. We mix SCEV 821 // expressions and ADD/SUB operations and receive redundant 822 // intermediate values being calculated in different ways and 823 // Instcombine is unable to reduce them all. 824 825 if (getConstIntStepValue() && 826 getConstIntStepValue()->isMinusOne()) 827 return B.CreateSub(StartValue, Index); 828 if (getConstIntStepValue() && 829 getConstIntStepValue()->isOne()) 830 return B.CreateAdd(StartValue, Index); 831 const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue), 832 SE->getMulExpr(Step, SE->getSCEV(Index))); 833 return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint()); 834 } 835 case IK_PtrInduction: { 836 assert(isa<SCEVConstant>(Step) && 837 "Expected constant step for pointer induction"); 838 const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step); 839 Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint()); 840 return B.CreateGEP(nullptr, StartValue, Index); 841 } 842 case IK_FpInduction: { 843 assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value"); 844 assert(InductionBinOp && 845 (InductionBinOp->getOpcode() == Instruction::FAdd || 846 InductionBinOp->getOpcode() == Instruction::FSub) && 847 "Original bin op should be defined for FP induction"); 848 849 Value *StepValue = cast<SCEVUnknown>(Step)->getValue(); 850 851 // Floating point operations had to be 'fast' to enable the induction. 852 FastMathFlags Flags; 853 Flags.setFast(); 854 855 Value *MulExp = B.CreateFMul(StepValue, Index); 856 if (isa<Instruction>(MulExp)) 857 // We have to check, the MulExp may be a constant. 858 cast<Instruction>(MulExp)->setFastMathFlags(Flags); 859 860 Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue, 861 MulExp, "induction"); 862 if (isa<Instruction>(BOp)) 863 cast<Instruction>(BOp)->setFastMathFlags(Flags); 864 865 return BOp; 866 } 867 case IK_NoInduction: 868 return nullptr; 869 } 870 llvm_unreachable("invalid enum"); 871 } 872 873 bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, 874 ScalarEvolution *SE, 875 InductionDescriptor &D) { 876 877 // Here we only handle FP induction variables. 878 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type"); 879 880 if (TheLoop->getHeader() != Phi->getParent()) 881 return false; 882 883 // The loop may have multiple entrances or multiple exits; we can analyze 884 // this phi if it has a unique entry value and a unique backedge value. 885 if (Phi->getNumIncomingValues() != 2) 886 return false; 887 Value *BEValue = nullptr, *StartValue = nullptr; 888 if (TheLoop->contains(Phi->getIncomingBlock(0))) { 889 BEValue = Phi->getIncomingValue(0); 890 StartValue = Phi->getIncomingValue(1); 891 } else { 892 assert(TheLoop->contains(Phi->getIncomingBlock(1)) && 893 "Unexpected Phi node in the loop"); 894 BEValue = Phi->getIncomingValue(1); 895 StartValue = Phi->getIncomingValue(0); 896 } 897 898 BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue); 899 if (!BOp) 900 return false; 901 902 Value *Addend = nullptr; 903 if (BOp->getOpcode() == Instruction::FAdd) { 904 if (BOp->getOperand(0) == Phi) 905 Addend = BOp->getOperand(1); 906 else if (BOp->getOperand(1) == Phi) 907 Addend = BOp->getOperand(0); 908 } else if (BOp->getOpcode() == Instruction::FSub) 909 if (BOp->getOperand(0) == Phi) 910 Addend = BOp->getOperand(1); 911 912 if (!Addend) 913 return false; 914 915 // The addend should be loop invariant 916 if (auto *I = dyn_cast<Instruction>(Addend)) 917 if (TheLoop->contains(I)) 918 return false; 919 920 // FP Step has unknown SCEV 921 const SCEV *Step = SE->getUnknown(Addend); 922 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp); 923 return true; 924 } 925 926 /// This function is called when we suspect that the update-chain of a phi node 927 /// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts, 928 /// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime 929 /// predicate P under which the SCEV expression for the phi can be the 930 /// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the 931 /// cast instructions that are involved in the update-chain of this induction. 932 /// A caller that adds the required runtime predicate can be free to drop these 933 /// cast instructions, and compute the phi using \p AR (instead of some scev 934 /// expression with casts). 935 /// 936 /// For example, without a predicate the scev expression can take the following 937 /// form: 938 /// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy) 939 /// 940 /// It corresponds to the following IR sequence: 941 /// %for.body: 942 /// %x = phi i64 [ 0, %ph ], [ %add, %for.body ] 943 /// %casted_phi = "ExtTrunc i64 %x" 944 /// %add = add i64 %casted_phi, %step 945 /// 946 /// where %x is given in \p PN, 947 /// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate, 948 /// and the IR sequence that "ExtTrunc i64 %x" represents can take one of 949 /// several forms, for example, such as: 950 /// ExtTrunc1: %casted_phi = and %x, 2^n-1 951 /// or: 952 /// ExtTrunc2: %t = shl %x, m 953 /// %casted_phi = ashr %t, m 954 /// 955 /// If we are able to find such sequence, we return the instructions 956 /// we found, namely %casted_phi and the instructions on its use-def chain up 957 /// to the phi (not including the phi). 958 static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE, 959 const SCEVUnknown *PhiScev, 960 const SCEVAddRecExpr *AR, 961 SmallVectorImpl<Instruction *> &CastInsts) { 962 963 assert(CastInsts.empty() && "CastInsts is expected to be empty."); 964 auto *PN = cast<PHINode>(PhiScev->getValue()); 965 assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression"); 966 const Loop *L = AR->getLoop(); 967 968 // Find any cast instructions that participate in the def-use chain of 969 // PhiScev in the loop. 970 // FORNOW/TODO: We currently expect the def-use chain to include only 971 // two-operand instructions, where one of the operands is an invariant. 972 // createAddRecFromPHIWithCasts() currently does not support anything more 973 // involved than that, so we keep the search simple. This can be 974 // extended/generalized as needed. 975 976 auto getDef = [&](const Value *Val) -> Value * { 977 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val); 978 if (!BinOp) 979 return nullptr; 980 Value *Op0 = BinOp->getOperand(0); 981 Value *Op1 = BinOp->getOperand(1); 982 Value *Def = nullptr; 983 if (L->isLoopInvariant(Op0)) 984 Def = Op1; 985 else if (L->isLoopInvariant(Op1)) 986 Def = Op0; 987 return Def; 988 }; 989 990 // Look for the instruction that defines the induction via the 991 // loop backedge. 992 BasicBlock *Latch = L->getLoopLatch(); 993 if (!Latch) 994 return false; 995 Value *Val = PN->getIncomingValueForBlock(Latch); 996 if (!Val) 997 return false; 998 999 // Follow the def-use chain until the induction phi is reached. 1000 // If on the way we encounter a Value that has the same SCEV Expr as the 1001 // phi node, we can consider the instructions we visit from that point 1002 // as part of the cast-sequence that can be ignored. 1003 bool InCastSequence = false; 1004 auto *Inst = dyn_cast<Instruction>(Val); 1005 while (Val != PN) { 1006 // If we encountered a phi node other than PN, or if we left the loop, 1007 // we bail out. 1008 if (!Inst || !L->contains(Inst)) { 1009 return false; 1010 } 1011 auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val)); 1012 if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR)) 1013 InCastSequence = true; 1014 if (InCastSequence) { 1015 // Only the last instruction in the cast sequence is expected to have 1016 // uses outside the induction def-use chain. 1017 if (!CastInsts.empty()) 1018 if (!Inst->hasOneUse()) 1019 return false; 1020 CastInsts.push_back(Inst); 1021 } 1022 Val = getDef(Val); 1023 if (!Val) 1024 return false; 1025 Inst = dyn_cast<Instruction>(Val); 1026 } 1027 1028 return InCastSequence; 1029 } 1030 1031 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, 1032 PredicatedScalarEvolution &PSE, 1033 InductionDescriptor &D, 1034 bool Assume) { 1035 Type *PhiTy = Phi->getType(); 1036 1037 // Handle integer and pointer inductions variables. 1038 // Now we handle also FP induction but not trying to make a 1039 // recurrent expression from the PHI node in-place. 1040 1041 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && 1042 !PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy()) 1043 return false; 1044 1045 if (PhiTy->isFloatingPointTy()) 1046 return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D); 1047 1048 const SCEV *PhiScev = PSE.getSCEV(Phi); 1049 const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 1050 1051 // We need this expression to be an AddRecExpr. 1052 if (Assume && !AR) 1053 AR = PSE.getAsAddRec(Phi); 1054 1055 if (!AR) { 1056 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 1057 return false; 1058 } 1059 1060 // Record any Cast instructions that participate in the induction update 1061 const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev); 1062 // If we started from an UnknownSCEV, and managed to build an addRecurrence 1063 // only after enabling Assume with PSCEV, this means we may have encountered 1064 // cast instructions that required adding a runtime check in order to 1065 // guarantee the correctness of the AddRecurence respresentation of the 1066 // induction. 1067 if (PhiScev != AR && SymbolicPhi) { 1068 SmallVector<Instruction *, 2> Casts; 1069 if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts)) 1070 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts); 1071 } 1072 1073 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR); 1074 } 1075 1076 bool InductionDescriptor::isInductionPHI( 1077 PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE, 1078 InductionDescriptor &D, const SCEV *Expr, 1079 SmallVectorImpl<Instruction *> *CastsToIgnore) { 1080 Type *PhiTy = Phi->getType(); 1081 // We only handle integer and pointer inductions variables. 1082 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) 1083 return false; 1084 1085 // Check that the PHI is consecutive. 1086 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi); 1087 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 1088 1089 if (!AR) { 1090 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 1091 return false; 1092 } 1093 1094 if (AR->getLoop() != TheLoop) { 1095 // FIXME: We should treat this as a uniform. Unfortunately, we 1096 // don't currently know how to handled uniform PHIs. 1097 LLVM_DEBUG( 1098 dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); 1099 return false; 1100 } 1101 1102 Value *StartValue = 1103 Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); 1104 const SCEV *Step = AR->getStepRecurrence(*SE); 1105 // Calculate the pointer stride and check if it is consecutive. 1106 // The stride may be a constant or a loop invariant integer value. 1107 const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step); 1108 if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop)) 1109 return false; 1110 1111 if (PhiTy->isIntegerTy()) { 1112 D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/ nullptr, 1113 CastsToIgnore); 1114 return true; 1115 } 1116 1117 assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); 1118 // Pointer induction should be a constant. 1119 if (!ConstStep) 1120 return false; 1121 1122 ConstantInt *CV = ConstStep->getValue(); 1123 Type *PointerElementType = PhiTy->getPointerElementType(); 1124 // The pointer stride cannot be determined if the pointer element type is not 1125 // sized. 1126 if (!PointerElementType->isSized()) 1127 return false; 1128 1129 const DataLayout &DL = Phi->getModule()->getDataLayout(); 1130 int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType)); 1131 if (!Size) 1132 return false; 1133 1134 int64_t CVSize = CV->getSExtValue(); 1135 if (CVSize % Size) 1136 return false; 1137 auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size, 1138 true /* signed */); 1139 D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue); 1140 return true; 1141 } 1142 1143 bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, 1144 bool PreserveLCSSA) { 1145 bool Changed = false; 1146 1147 // We re-use a vector for the in-loop predecesosrs. 1148 SmallVector<BasicBlock *, 4> InLoopPredecessors; 1149 1150 auto RewriteExit = [&](BasicBlock *BB) { 1151 assert(InLoopPredecessors.empty() && 1152 "Must start with an empty predecessors list!"); 1153 auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); }); 1154 1155 // See if there are any non-loop predecessors of this exit block and 1156 // keep track of the in-loop predecessors. 1157 bool IsDedicatedExit = true; 1158 for (auto *PredBB : predecessors(BB)) 1159 if (L->contains(PredBB)) { 1160 if (isa<IndirectBrInst>(PredBB->getTerminator())) 1161 // We cannot rewrite exiting edges from an indirectbr. 1162 return false; 1163 1164 InLoopPredecessors.push_back(PredBB); 1165 } else { 1166 IsDedicatedExit = false; 1167 } 1168 1169 assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!"); 1170 1171 // Nothing to do if this is already a dedicated exit. 1172 if (IsDedicatedExit) 1173 return false; 1174 1175 auto *NewExitBB = SplitBlockPredecessors( 1176 BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA); 1177 1178 if (!NewExitBB) 1179 LLVM_DEBUG( 1180 dbgs() << "WARNING: Can't create a dedicated exit block for loop: " 1181 << *L << "\n"); 1182 else 1183 LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " 1184 << NewExitBB->getName() << "\n"); 1185 return true; 1186 }; 1187 1188 // Walk the exit blocks directly rather than building up a data structure for 1189 // them, but only visit each one once. 1190 SmallPtrSet<BasicBlock *, 4> Visited; 1191 for (auto *BB : L->blocks()) 1192 for (auto *SuccBB : successors(BB)) { 1193 // We're looking for exit blocks so skip in-loop successors. 1194 if (L->contains(SuccBB)) 1195 continue; 1196 1197 // Visit each exit block exactly once. 1198 if (!Visited.insert(SuccBB).second) 1199 continue; 1200 1201 Changed |= RewriteExit(SuccBB); 1202 } 1203 1204 return Changed; 1205 } 1206 1207 /// Returns the instructions that use values defined in the loop. 1208 SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) { 1209 SmallVector<Instruction *, 8> UsedOutside; 1210 1211 for (auto *Block : L->getBlocks()) 1212 // FIXME: I believe that this could use copy_if if the Inst reference could 1213 // be adapted into a pointer. 1214 for (auto &Inst : *Block) { 1215 auto Users = Inst.users(); 1216 if (any_of(Users, [&](User *U) { 1217 auto *Use = cast<Instruction>(U); 1218 return !L->contains(Use->getParent()); 1219 })) 1220 UsedOutside.push_back(&Inst); 1221 } 1222 1223 return UsedOutside; 1224 } 1225 1226 void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) { 1227 // By definition, all loop passes need the LoopInfo analysis and the 1228 // Dominator tree it depends on. Because they all participate in the loop 1229 // pass manager, they must also preserve these. 1230 AU.addRequired<DominatorTreeWrapperPass>(); 1231 AU.addPreserved<DominatorTreeWrapperPass>(); 1232 AU.addRequired<LoopInfoWrapperPass>(); 1233 AU.addPreserved<LoopInfoWrapperPass>(); 1234 1235 // We must also preserve LoopSimplify and LCSSA. We locally access their IDs 1236 // here because users shouldn't directly get them from this header. 1237 extern char &LoopSimplifyID; 1238 extern char &LCSSAID; 1239 AU.addRequiredID(LoopSimplifyID); 1240 AU.addPreservedID(LoopSimplifyID); 1241 AU.addRequiredID(LCSSAID); 1242 AU.addPreservedID(LCSSAID); 1243 // This is used in the LPPassManager to perform LCSSA verification on passes 1244 // which preserve lcssa form 1245 AU.addRequired<LCSSAVerificationPass>(); 1246 AU.addPreserved<LCSSAVerificationPass>(); 1247 1248 // Loop passes are designed to run inside of a loop pass manager which means 1249 // that any function analyses they require must be required by the first loop 1250 // pass in the manager (so that it is computed before the loop pass manager 1251 // runs) and preserved by all loop pasess in the manager. To make this 1252 // reasonably robust, the set needed for most loop passes is maintained here. 1253 // If your loop pass requires an analysis not listed here, you will need to 1254 // carefully audit the loop pass manager nesting structure that results. 1255 AU.addRequired<AAResultsWrapperPass>(); 1256 AU.addPreserved<AAResultsWrapperPass>(); 1257 AU.addPreserved<BasicAAWrapperPass>(); 1258 AU.addPreserved<GlobalsAAWrapperPass>(); 1259 AU.addPreserved<SCEVAAWrapperPass>(); 1260 AU.addRequired<ScalarEvolutionWrapperPass>(); 1261 AU.addPreserved<ScalarEvolutionWrapperPass>(); 1262 } 1263 1264 /// Manually defined generic "LoopPass" dependency initialization. This is used 1265 /// to initialize the exact set of passes from above in \c 1266 /// getLoopAnalysisUsage. It can be used within a loop pass's initialization 1267 /// with: 1268 /// 1269 /// INITIALIZE_PASS_DEPENDENCY(LoopPass) 1270 /// 1271 /// As-if "LoopPass" were a pass. 1272 void llvm::initializeLoopPassPass(PassRegistry &Registry) { 1273 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 1274 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 1275 INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 1276 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) 1277 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 1278 INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) 1279 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 1280 INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) 1281 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 1282 } 1283 1284 /// Find string metadata for loop 1285 /// 1286 /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an 1287 /// operand or null otherwise. If the string metadata is not found return 1288 /// Optional's not-a-value. 1289 Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop, 1290 StringRef Name) { 1291 MDNode *LoopID = TheLoop->getLoopID(); 1292 // Return none if LoopID is false. 1293 if (!LoopID) 1294 return None; 1295 1296 // First operand should refer to the loop id itself. 1297 assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); 1298 assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); 1299 1300 // Iterate over LoopID operands and look for MDString Metadata 1301 for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { 1302 MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); 1303 if (!MD) 1304 continue; 1305 MDString *S = dyn_cast<MDString>(MD->getOperand(0)); 1306 if (!S) 1307 continue; 1308 // Return true if MDString holds expected MetaData. 1309 if (Name.equals(S->getString())) 1310 switch (MD->getNumOperands()) { 1311 case 1: 1312 return nullptr; 1313 case 2: 1314 return &MD->getOperand(1); 1315 default: 1316 llvm_unreachable("loop metadata has 0 or 1 operand"); 1317 } 1318 } 1319 return None; 1320 } 1321 1322 /// Does a BFS from a given node to all of its children inside a given loop. 1323 /// The returned vector of nodes includes the starting point. 1324 SmallVector<DomTreeNode *, 16> 1325 llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) { 1326 SmallVector<DomTreeNode *, 16> Worklist; 1327 auto AddRegionToWorklist = [&](DomTreeNode *DTN) { 1328 // Only include subregions in the top level loop. 1329 BasicBlock *BB = DTN->getBlock(); 1330 if (CurLoop->contains(BB)) 1331 Worklist.push_back(DTN); 1332 }; 1333 1334 AddRegionToWorklist(N); 1335 1336 for (size_t I = 0; I < Worklist.size(); I++) 1337 for (DomTreeNode *Child : Worklist[I]->getChildren()) 1338 AddRegionToWorklist(Child); 1339 1340 return Worklist; 1341 } 1342 1343 void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr, 1344 ScalarEvolution *SE = nullptr, 1345 LoopInfo *LI = nullptr) { 1346 assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!"); 1347 auto *Preheader = L->getLoopPreheader(); 1348 assert(Preheader && "Preheader should exist!"); 1349 1350 // Now that we know the removal is safe, remove the loop by changing the 1351 // branch from the preheader to go to the single exit block. 1352 // 1353 // Because we're deleting a large chunk of code at once, the sequence in which 1354 // we remove things is very important to avoid invalidation issues. 1355 1356 // Tell ScalarEvolution that the loop is deleted. Do this before 1357 // deleting the loop so that ScalarEvolution can look at the loop 1358 // to determine what it needs to clean up. 1359 if (SE) 1360 SE->forgetLoop(L); 1361 1362 auto *ExitBlock = L->getUniqueExitBlock(); 1363 assert(ExitBlock && "Should have a unique exit block!"); 1364 assert(L->hasDedicatedExits() && "Loop should have dedicated exits!"); 1365 1366 auto *OldBr = dyn_cast<BranchInst>(Preheader->getTerminator()); 1367 assert(OldBr && "Preheader must end with a branch"); 1368 assert(OldBr->isUnconditional() && "Preheader must have a single successor"); 1369 // Connect the preheader to the exit block. Keep the old edge to the header 1370 // around to perform the dominator tree update in two separate steps 1371 // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge 1372 // preheader -> header. 1373 // 1374 // 1375 // 0. Preheader 1. Preheader 2. Preheader 1376 // | | | | 1377 // V | V | 1378 // Header <--\ | Header <--\ | Header <--\ 1379 // | | | | | | | | | | | 1380 // | V | | | V | | | V | 1381 // | Body --/ | | Body --/ | | Body --/ 1382 // V V V V V 1383 // Exit Exit Exit 1384 // 1385 // By doing this is two separate steps we can perform the dominator tree 1386 // update without using the batch update API. 1387 // 1388 // Even when the loop is never executed, we cannot remove the edge from the 1389 // source block to the exit block. Consider the case where the unexecuted loop 1390 // branches back to an outer loop. If we deleted the loop and removed the edge 1391 // coming to this inner loop, this will break the outer loop structure (by 1392 // deleting the backedge of the outer loop). If the outer loop is indeed a 1393 // non-loop, it will be deleted in a future iteration of loop deletion pass. 1394 IRBuilder<> Builder(OldBr); 1395 Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock); 1396 // Remove the old branch. The conditional branch becomes a new terminator. 1397 OldBr->eraseFromParent(); 1398 1399 // Rewrite phis in the exit block to get their inputs from the Preheader 1400 // instead of the exiting block. 1401 for (PHINode &P : ExitBlock->phis()) { 1402 // Set the zero'th element of Phi to be from the preheader and remove all 1403 // other incoming values. Given the loop has dedicated exits, all other 1404 // incoming values must be from the exiting blocks. 1405 int PredIndex = 0; 1406 P.setIncomingBlock(PredIndex, Preheader); 1407 // Removes all incoming values from all other exiting blocks (including 1408 // duplicate values from an exiting block). 1409 // Nuke all entries except the zero'th entry which is the preheader entry. 1410 // NOTE! We need to remove Incoming Values in the reverse order as done 1411 // below, to keep the indices valid for deletion (removeIncomingValues 1412 // updates getNumIncomingValues and shifts all values down into the operand 1413 // being deleted). 1414 for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i) 1415 P.removeIncomingValue(e - i, false); 1416 1417 assert((P.getNumIncomingValues() == 1 && 1418 P.getIncomingBlock(PredIndex) == Preheader) && 1419 "Should have exactly one value and that's from the preheader!"); 1420 } 1421 1422 // Disconnect the loop body by branching directly to its exit. 1423 Builder.SetInsertPoint(Preheader->getTerminator()); 1424 Builder.CreateBr(ExitBlock); 1425 // Remove the old branch. 1426 Preheader->getTerminator()->eraseFromParent(); 1427 1428 if (DT) { 1429 // Update the dominator tree by informing it about the new edge from the 1430 // preheader to the exit. 1431 DT->insertEdge(Preheader, ExitBlock); 1432 // Inform the dominator tree about the removed edge. 1433 DT->deleteEdge(Preheader, L->getHeader()); 1434 } 1435 1436 // Given LCSSA form is satisfied, we should not have users of instructions 1437 // within the dead loop outside of the loop. However, LCSSA doesn't take 1438 // unreachable uses into account. We handle them here. 1439 // We could do it after drop all references (in this case all users in the 1440 // loop will be already eliminated and we have less work to do but according 1441 // to API doc of User::dropAllReferences only valid operation after dropping 1442 // references, is deletion. So let's substitute all usages of 1443 // instruction from the loop with undef value of corresponding type first. 1444 for (auto *Block : L->blocks()) 1445 for (Instruction &I : *Block) { 1446 auto *Undef = UndefValue::get(I.getType()); 1447 for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E;) { 1448 Use &U = *UI; 1449 ++UI; 1450 if (auto *Usr = dyn_cast<Instruction>(U.getUser())) 1451 if (L->contains(Usr->getParent())) 1452 continue; 1453 // If we have a DT then we can check that uses outside a loop only in 1454 // unreachable block. 1455 if (DT) 1456 assert(!DT->isReachableFromEntry(U) && 1457 "Unexpected user in reachable block"); 1458 U.set(Undef); 1459 } 1460 } 1461 1462 // Remove the block from the reference counting scheme, so that we can 1463 // delete it freely later. 1464 for (auto *Block : L->blocks()) 1465 Block->dropAllReferences(); 1466 1467 if (LI) { 1468 // Erase the instructions and the blocks without having to worry 1469 // about ordering because we already dropped the references. 1470 // NOTE: This iteration is safe because erasing the block does not remove 1471 // its entry from the loop's block list. We do that in the next section. 1472 for (Loop::block_iterator LpI = L->block_begin(), LpE = L->block_end(); 1473 LpI != LpE; ++LpI) 1474 (*LpI)->eraseFromParent(); 1475 1476 // Finally, the blocks from loopinfo. This has to happen late because 1477 // otherwise our loop iterators won't work. 1478 1479 SmallPtrSet<BasicBlock *, 8> blocks; 1480 blocks.insert(L->block_begin(), L->block_end()); 1481 for (BasicBlock *BB : blocks) 1482 LI->removeBlock(BB); 1483 1484 // The last step is to update LoopInfo now that we've eliminated this loop. 1485 LI->erase(L); 1486 } 1487 } 1488 1489 Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) { 1490 // Only support loops with a unique exiting block, and a latch. 1491 if (!L->getExitingBlock()) 1492 return None; 1493 1494 // Get the branch weights for the loop's backedge. 1495 BranchInst *LatchBR = 1496 dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator()); 1497 if (!LatchBR || LatchBR->getNumSuccessors() != 2) 1498 return None; 1499 1500 assert((LatchBR->getSuccessor(0) == L->getHeader() || 1501 LatchBR->getSuccessor(1) == L->getHeader()) && 1502 "At least one edge out of the latch must go to the header"); 1503 1504 // To estimate the number of times the loop body was executed, we want to 1505 // know the number of times the backedge was taken, vs. the number of times 1506 // we exited the loop. 1507 uint64_t TrueVal, FalseVal; 1508 if (!LatchBR->extractProfMetadata(TrueVal, FalseVal)) 1509 return None; 1510 1511 if (!TrueVal || !FalseVal) 1512 return 0; 1513 1514 // Divide the count of the backedge by the count of the edge exiting the loop, 1515 // rounding to nearest. 1516 if (LatchBR->getSuccessor(0) == L->getHeader()) 1517 return (TrueVal + (FalseVal / 2)) / FalseVal; 1518 else 1519 return (FalseVal + (TrueVal / 2)) / TrueVal; 1520 } 1521 1522 /// Adds a 'fast' flag to floating point operations. 1523 static Value *addFastMathFlag(Value *V) { 1524 if (isa<FPMathOperator>(V)) { 1525 FastMathFlags Flags; 1526 Flags.setFast(); 1527 cast<Instruction>(V)->setFastMathFlags(Flags); 1528 } 1529 return V; 1530 } 1531 1532 // Helper to generate an ordered reduction. 1533 Value * 1534 llvm::getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src, 1535 unsigned Op, 1536 RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, 1537 ArrayRef<Value *> RedOps) { 1538 unsigned VF = Src->getType()->getVectorNumElements(); 1539 1540 // Extract and apply reduction ops in ascending order: 1541 // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1] 1542 Value *Result = Acc; 1543 for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) { 1544 Value *Ext = 1545 Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx)); 1546 1547 if (Op != Instruction::ICmp && Op != Instruction::FCmp) { 1548 Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext, 1549 "bin.rdx"); 1550 } else { 1551 assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && 1552 "Invalid min/max"); 1553 Result = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, Result, 1554 Ext); 1555 } 1556 1557 if (!RedOps.empty()) 1558 propagateIRFlags(Result, RedOps); 1559 } 1560 1561 return Result; 1562 } 1563 1564 // Helper to generate a log2 shuffle reduction. 1565 Value * 1566 llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, 1567 RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, 1568 ArrayRef<Value *> RedOps) { 1569 unsigned VF = Src->getType()->getVectorNumElements(); 1570 // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles 1571 // and vector ops, reducing the set of values being computed by half each 1572 // round. 1573 assert(isPowerOf2_32(VF) && 1574 "Reduction emission only supported for pow2 vectors!"); 1575 Value *TmpVec = Src; 1576 SmallVector<Constant *, 32> ShuffleMask(VF, nullptr); 1577 for (unsigned i = VF; i != 1; i >>= 1) { 1578 // Move the upper half of the vector to the lower half. 1579 for (unsigned j = 0; j != i / 2; ++j) 1580 ShuffleMask[j] = Builder.getInt32(i / 2 + j); 1581 1582 // Fill the rest of the mask with undef. 1583 std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), 1584 UndefValue::get(Builder.getInt32Ty())); 1585 1586 Value *Shuf = Builder.CreateShuffleVector( 1587 TmpVec, UndefValue::get(TmpVec->getType()), 1588 ConstantVector::get(ShuffleMask), "rdx.shuf"); 1589 1590 if (Op != Instruction::ICmp && Op != Instruction::FCmp) { 1591 // Floating point operations had to be 'fast' to enable the reduction. 1592 TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op, 1593 TmpVec, Shuf, "bin.rdx")); 1594 } else { 1595 assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && 1596 "Invalid min/max"); 1597 TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, TmpVec, 1598 Shuf); 1599 } 1600 if (!RedOps.empty()) 1601 propagateIRFlags(TmpVec, RedOps); 1602 } 1603 // The result is in the first element of the vector. 1604 return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); 1605 } 1606 1607 /// Create a simple vector reduction specified by an opcode and some 1608 /// flags (if generating min/max reductions). 1609 Value *llvm::createSimpleTargetReduction( 1610 IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode, 1611 Value *Src, TargetTransformInfo::ReductionFlags Flags, 1612 ArrayRef<Value *> RedOps) { 1613 assert(isa<VectorType>(Src->getType()) && "Type must be a vector"); 1614 1615 Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType()); 1616 std::function<Value*()> BuildFunc; 1617 using RD = RecurrenceDescriptor; 1618 RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; 1619 // TODO: Support creating ordered reductions. 1620 FastMathFlags FMFFast; 1621 FMFFast.setFast(); 1622 1623 switch (Opcode) { 1624 case Instruction::Add: 1625 BuildFunc = [&]() { return Builder.CreateAddReduce(Src); }; 1626 break; 1627 case Instruction::Mul: 1628 BuildFunc = [&]() { return Builder.CreateMulReduce(Src); }; 1629 break; 1630 case Instruction::And: 1631 BuildFunc = [&]() { return Builder.CreateAndReduce(Src); }; 1632 break; 1633 case Instruction::Or: 1634 BuildFunc = [&]() { return Builder.CreateOrReduce(Src); }; 1635 break; 1636 case Instruction::Xor: 1637 BuildFunc = [&]() { return Builder.CreateXorReduce(Src); }; 1638 break; 1639 case Instruction::FAdd: 1640 BuildFunc = [&]() { 1641 auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src); 1642 cast<CallInst>(Rdx)->setFastMathFlags(FMFFast); 1643 return Rdx; 1644 }; 1645 break; 1646 case Instruction::FMul: 1647 BuildFunc = [&]() { 1648 auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src); 1649 cast<CallInst>(Rdx)->setFastMathFlags(FMFFast); 1650 return Rdx; 1651 }; 1652 break; 1653 case Instruction::ICmp: 1654 if (Flags.IsMaxOp) { 1655 MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax; 1656 BuildFunc = [&]() { 1657 return Builder.CreateIntMaxReduce(Src, Flags.IsSigned); 1658 }; 1659 } else { 1660 MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin; 1661 BuildFunc = [&]() { 1662 return Builder.CreateIntMinReduce(Src, Flags.IsSigned); 1663 }; 1664 } 1665 break; 1666 case Instruction::FCmp: 1667 if (Flags.IsMaxOp) { 1668 MinMaxKind = RD::MRK_FloatMax; 1669 BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); }; 1670 } else { 1671 MinMaxKind = RD::MRK_FloatMin; 1672 BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); }; 1673 } 1674 break; 1675 default: 1676 llvm_unreachable("Unhandled opcode"); 1677 break; 1678 } 1679 if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)) 1680 return BuildFunc(); 1681 return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps); 1682 } 1683 1684 /// Create a vector reduction using a given recurrence descriptor. 1685 Value *llvm::createTargetReduction(IRBuilder<> &B, 1686 const TargetTransformInfo *TTI, 1687 RecurrenceDescriptor &Desc, Value *Src, 1688 bool NoNaN) { 1689 // TODO: Support in-order reductions based on the recurrence descriptor. 1690 using RD = RecurrenceDescriptor; 1691 RD::RecurrenceKind RecKind = Desc.getRecurrenceKind(); 1692 TargetTransformInfo::ReductionFlags Flags; 1693 Flags.NoNaN = NoNaN; 1694 switch (RecKind) { 1695 case RD::RK_FloatAdd: 1696 return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags); 1697 case RD::RK_FloatMult: 1698 return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags); 1699 case RD::RK_IntegerAdd: 1700 return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags); 1701 case RD::RK_IntegerMult: 1702 return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags); 1703 case RD::RK_IntegerAnd: 1704 return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags); 1705 case RD::RK_IntegerOr: 1706 return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags); 1707 case RD::RK_IntegerXor: 1708 return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags); 1709 case RD::RK_IntegerMinMax: { 1710 RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind(); 1711 Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax); 1712 Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin); 1713 return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags); 1714 } 1715 case RD::RK_FloatMinMax: { 1716 Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax; 1717 return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags); 1718 } 1719 default: 1720 llvm_unreachable("Unhandled RecKind"); 1721 } 1722 } 1723 1724 void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) { 1725 auto *VecOp = dyn_cast<Instruction>(I); 1726 if (!VecOp) 1727 return; 1728 auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0]) 1729 : dyn_cast<Instruction>(OpValue); 1730 if (!Intersection) 1731 return; 1732 const unsigned Opcode = Intersection->getOpcode(); 1733 VecOp->copyIRFlags(Intersection); 1734 for (auto *V : VL) { 1735 auto *Instr = dyn_cast<Instruction>(V); 1736 if (!Instr) 1737 continue; 1738 if (OpValue == nullptr || Opcode == Instr->getOpcode()) 1739 VecOp->andIRFlags(V); 1740 } 1741 } 1742