1 //===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This transformation implements the well known scalar replacement of 11 // aggregates transformation. This xform breaks up alloca instructions of 12 // aggregate type (structure or array) into individual alloca instructions for 13 // each member (if possible). Then, if possible, it transforms the individual 14 // alloca instructions into nice clean scalar SSA form. 15 // 16 // This combines a simple SRoA algorithm with the Mem2Reg algorithm because they 17 // often interact, especially for C++ programs. As such, iterating between 18 // SRoA, then Mem2Reg until we run out of things to promote works well. 19 // 20 //===----------------------------------------------------------------------===// 21 22 #include "llvm/Transforms/Scalar.h" 23 #include "llvm/ADT/SetVector.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/Statistic.h" 26 #include "llvm/Analysis/AssumptionCache.h" 27 #include "llvm/Analysis/Loads.h" 28 #include "llvm/Analysis/ValueTracking.h" 29 #include "llvm/IR/CallSite.h" 30 #include "llvm/IR/Constants.h" 31 #include "llvm/IR/DIBuilder.h" 32 #include "llvm/IR/DataLayout.h" 33 #include "llvm/IR/DebugInfo.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/Dominators.h" 36 #include "llvm/IR/Function.h" 37 #include "llvm/IR/GetElementPtrTypeIterator.h" 38 #include "llvm/IR/GlobalVariable.h" 39 #include "llvm/IR/IRBuilder.h" 40 #include "llvm/IR/Instructions.h" 41 #include "llvm/IR/IntrinsicInst.h" 42 #include "llvm/IR/LLVMContext.h" 43 #include "llvm/IR/Module.h" 44 #include "llvm/IR/Operator.h" 45 #include "llvm/Pass.h" 46 #include "llvm/Support/Debug.h" 47 #include "llvm/Support/ErrorHandling.h" 48 #include "llvm/Support/MathExtras.h" 49 #include "llvm/Support/raw_ostream.h" 50 #include "llvm/Transforms/Utils/Local.h" 51 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 52 #include "llvm/Transforms/Utils/SSAUpdater.h" 53 using namespace llvm; 54 55 #define DEBUG_TYPE "scalarrepl" 56 57 STATISTIC(NumReplaced, "Number of allocas broken up"); 58 STATISTIC(NumPromoted, "Number of allocas promoted"); 59 STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion"); 60 STATISTIC(NumConverted, "Number of aggregates converted to scalar"); 61 62 namespace { 63 #define SROA SROA_ 64 struct SROA : public FunctionPass { 65 SROA(int T, bool hasDT, char &ID, int ST, int AT, int SLT) 66 : FunctionPass(ID), HasDomTree(hasDT) { 67 if (T == -1) 68 SRThreshold = 128; 69 else 70 SRThreshold = T; 71 if (ST == -1) 72 StructMemberThreshold = 32; 73 else 74 StructMemberThreshold = ST; 75 if (AT == -1) 76 ArrayElementThreshold = 8; 77 else 78 ArrayElementThreshold = AT; 79 if (SLT == -1) 80 // Do not limit the scalar integer load size if no threshold is given. 81 ScalarLoadThreshold = -1; 82 else 83 ScalarLoadThreshold = SLT; 84 } 85 86 bool runOnFunction(Function &F) override; 87 88 bool performScalarRepl(Function &F); 89 bool performPromotion(Function &F); 90 91 private: 92 bool HasDomTree; 93 94 /// DeadInsts - Keep track of instructions we have made dead, so that 95 /// we can remove them after we are done working. 96 SmallVector<Value*, 32> DeadInsts; 97 98 /// AllocaInfo - When analyzing uses of an alloca instruction, this captures 99 /// information about the uses. All these fields are initialized to false 100 /// and set to true when something is learned. 101 struct AllocaInfo { 102 /// The alloca to promote. 103 AllocaInst *AI; 104 105 /// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite 106 /// looping and avoid redundant work. 107 SmallPtrSet<PHINode*, 8> CheckedPHIs; 108 109 /// isUnsafe - This is set to true if the alloca cannot be SROA'd. 110 bool isUnsafe : 1; 111 112 /// isMemCpySrc - This is true if this aggregate is memcpy'd from. 113 bool isMemCpySrc : 1; 114 115 /// isMemCpyDst - This is true if this aggregate is memcpy'd into. 116 bool isMemCpyDst : 1; 117 118 /// hasSubelementAccess - This is true if a subelement of the alloca is 119 /// ever accessed, or false if the alloca is only accessed with mem 120 /// intrinsics or load/store that only access the entire alloca at once. 121 bool hasSubelementAccess : 1; 122 123 /// hasALoadOrStore - This is true if there are any loads or stores to it. 124 /// The alloca may just be accessed with memcpy, for example, which would 125 /// not set this. 126 bool hasALoadOrStore : 1; 127 128 explicit AllocaInfo(AllocaInst *ai) 129 : AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false), 130 hasSubelementAccess(false), hasALoadOrStore(false) {} 131 }; 132 133 /// SRThreshold - The maximum alloca size to considered for SROA. 134 unsigned SRThreshold; 135 136 /// StructMemberThreshold - The maximum number of members a struct can 137 /// contain to be considered for SROA. 138 unsigned StructMemberThreshold; 139 140 /// ArrayElementThreshold - The maximum number of elements an array can 141 /// have to be considered for SROA. 142 unsigned ArrayElementThreshold; 143 144 /// ScalarLoadThreshold - The maximum size in bits of scalars to load when 145 /// converting to scalar 146 unsigned ScalarLoadThreshold; 147 148 void MarkUnsafe(AllocaInfo &I, Instruction *User) { 149 I.isUnsafe = true; 150 DEBUG(dbgs() << " Transformation preventing inst: " << *User << '\n'); 151 } 152 153 bool isSafeAllocaToScalarRepl(AllocaInst *AI); 154 155 void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info); 156 void isSafePHISelectUseForScalarRepl(Instruction *User, uint64_t Offset, 157 AllocaInfo &Info); 158 void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info); 159 void isSafeMemAccess(uint64_t Offset, uint64_t MemSize, 160 Type *MemOpType, bool isStore, AllocaInfo &Info, 161 Instruction *TheAccess, bool AllowWholeAccess); 162 bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size, 163 const DataLayout &DL); 164 uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset, Type *&IdxTy, 165 const DataLayout &DL); 166 167 void DoScalarReplacement(AllocaInst *AI, 168 std::vector<AllocaInst*> &WorkList); 169 void DeleteDeadInstructions(); 170 171 void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, 172 SmallVectorImpl<AllocaInst *> &NewElts); 173 void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, 174 SmallVectorImpl<AllocaInst *> &NewElts); 175 void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, 176 SmallVectorImpl<AllocaInst *> &NewElts); 177 void RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, 178 uint64_t Offset, 179 SmallVectorImpl<AllocaInst *> &NewElts); 180 void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, 181 AllocaInst *AI, 182 SmallVectorImpl<AllocaInst *> &NewElts); 183 void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, 184 SmallVectorImpl<AllocaInst *> &NewElts); 185 void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, 186 SmallVectorImpl<AllocaInst *> &NewElts); 187 bool ShouldAttemptScalarRepl(AllocaInst *AI); 188 }; 189 190 // SROA_DT - SROA that uses DominatorTree. 191 struct SROA_DT : public SROA { 192 static char ID; 193 public: 194 SROA_DT(int T = -1, int ST = -1, int AT = -1, int SLT = -1) : 195 SROA(T, true, ID, ST, AT, SLT) { 196 initializeSROA_DTPass(*PassRegistry::getPassRegistry()); 197 } 198 199 // getAnalysisUsage - This pass does not require any passes, but we know it 200 // will not alter the CFG, so say so. 201 void getAnalysisUsage(AnalysisUsage &AU) const override { 202 AU.addRequired<AssumptionCacheTracker>(); 203 AU.addRequired<DominatorTreeWrapperPass>(); 204 AU.setPreservesCFG(); 205 } 206 }; 207 208 // SROA_SSAUp - SROA that uses SSAUpdater. 209 struct SROA_SSAUp : public SROA { 210 static char ID; 211 public: 212 SROA_SSAUp(int T = -1, int ST = -1, int AT = -1, int SLT = -1) : 213 SROA(T, false, ID, ST, AT, SLT) { 214 initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry()); 215 } 216 217 // getAnalysisUsage - This pass does not require any passes, but we know it 218 // will not alter the CFG, so say so. 219 void getAnalysisUsage(AnalysisUsage &AU) const override { 220 AU.addRequired<AssumptionCacheTracker>(); 221 AU.setPreservesCFG(); 222 } 223 }; 224 225 } 226 227 char SROA_DT::ID = 0; 228 char SROA_SSAUp::ID = 0; 229 230 INITIALIZE_PASS_BEGIN(SROA_DT, "scalarrepl", 231 "Scalar Replacement of Aggregates (DT)", false, false) 232 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 233 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 234 INITIALIZE_PASS_END(SROA_DT, "scalarrepl", 235 "Scalar Replacement of Aggregates (DT)", false, false) 236 237 INITIALIZE_PASS_BEGIN(SROA_SSAUp, "scalarrepl-ssa", 238 "Scalar Replacement of Aggregates (SSAUp)", false, false) 239 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 240 INITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa", 241 "Scalar Replacement of Aggregates (SSAUp)", false, false) 242 243 // Public interface to the ScalarReplAggregates pass 244 FunctionPass *llvm::createScalarReplAggregatesPass(int Threshold, 245 bool UseDomTree, 246 int StructMemberThreshold, 247 int ArrayElementThreshold, 248 int ScalarLoadThreshold) { 249 if (UseDomTree) 250 return new SROA_DT(Threshold, StructMemberThreshold, ArrayElementThreshold, 251 ScalarLoadThreshold); 252 return new SROA_SSAUp(Threshold, StructMemberThreshold, 253 ArrayElementThreshold, ScalarLoadThreshold); 254 } 255 256 257 //===----------------------------------------------------------------------===// 258 // Convert To Scalar Optimization. 259 //===----------------------------------------------------------------------===// 260 261 namespace { 262 /// ConvertToScalarInfo - This class implements the "Convert To Scalar" 263 /// optimization, which scans the uses of an alloca and determines if it can 264 /// rewrite it in terms of a single new alloca that can be mem2reg'd. 265 class ConvertToScalarInfo { 266 /// AllocaSize - The size of the alloca being considered in bytes. 267 unsigned AllocaSize; 268 const DataLayout &DL; 269 unsigned ScalarLoadThreshold; 270 271 /// IsNotTrivial - This is set to true if there is some access to the object 272 /// which means that mem2reg can't promote it. 273 bool IsNotTrivial; 274 275 /// ScalarKind - Tracks the kind of alloca being considered for promotion, 276 /// computed based on the uses of the alloca rather than the LLVM type system. 277 enum { 278 Unknown, 279 280 // Accesses via GEPs that are consistent with element access of a vector 281 // type. This will not be converted into a vector unless there is a later 282 // access using an actual vector type. 283 ImplicitVector, 284 285 // Accesses via vector operations and GEPs that are consistent with the 286 // layout of a vector type. 287 Vector, 288 289 // An integer bag-of-bits with bitwise operations for insertion and 290 // extraction. Any combination of types can be converted into this kind 291 // of scalar. 292 Integer 293 } ScalarKind; 294 295 /// VectorTy - This tracks the type that we should promote the vector to if 296 /// it is possible to turn it into a vector. This starts out null, and if it 297 /// isn't possible to turn into a vector type, it gets set to VoidTy. 298 VectorType *VectorTy; 299 300 /// HadNonMemTransferAccess - True if there is at least one access to the 301 /// alloca that is not a MemTransferInst. We don't want to turn structs into 302 /// large integers unless there is some potential for optimization. 303 bool HadNonMemTransferAccess; 304 305 /// HadDynamicAccess - True if some element of this alloca was dynamic. 306 /// We don't yet have support for turning a dynamic access into a large 307 /// integer. 308 bool HadDynamicAccess; 309 310 public: 311 explicit ConvertToScalarInfo(unsigned Size, const DataLayout &DL, 312 unsigned SLT) 313 : AllocaSize(Size), DL(DL), ScalarLoadThreshold(SLT), IsNotTrivial(false), 314 ScalarKind(Unknown), VectorTy(nullptr), HadNonMemTransferAccess(false), 315 HadDynamicAccess(false) { } 316 317 AllocaInst *TryConvert(AllocaInst *AI); 318 319 private: 320 bool CanConvertToScalar(Value *V, uint64_t Offset, Value* NonConstantIdx); 321 void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset); 322 bool MergeInVectorType(VectorType *VInTy, uint64_t Offset); 323 void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset, 324 Value *NonConstantIdx); 325 326 Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType, 327 uint64_t Offset, Value* NonConstantIdx, 328 IRBuilder<> &Builder); 329 Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal, 330 uint64_t Offset, Value* NonConstantIdx, 331 IRBuilder<> &Builder); 332 }; 333 } // end anonymous namespace. 334 335 336 /// TryConvert - Analyze the specified alloca, and if it is safe to do so, 337 /// rewrite it to be a new alloca which is mem2reg'able. This returns the new 338 /// alloca if possible or null if not. 339 AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { 340 // If we can't convert this scalar, or if mem2reg can trivially do it, bail 341 // out. 342 if (!CanConvertToScalar(AI, 0, nullptr) || !IsNotTrivial) 343 return nullptr; 344 345 // If an alloca has only memset / memcpy uses, it may still have an Unknown 346 // ScalarKind. Treat it as an Integer below. 347 if (ScalarKind == Unknown) 348 ScalarKind = Integer; 349 350 if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8) 351 ScalarKind = Integer; 352 353 // If we were able to find a vector type that can handle this with 354 // insert/extract elements, and if there was at least one use that had 355 // a vector type, promote this to a vector. We don't want to promote 356 // random stuff that doesn't use vectors (e.g. <9 x double>) because then 357 // we just get a lot of insert/extracts. If at least one vector is 358 // involved, then we probably really do have a union of vector/array. 359 Type *NewTy; 360 if (ScalarKind == Vector) { 361 assert(VectorTy && "Missing type for vector scalar."); 362 DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " 363 << *VectorTy << '\n'); 364 NewTy = VectorTy; // Use the vector type. 365 } else { 366 unsigned BitWidth = AllocaSize * 8; 367 368 // Do not convert to scalar integer if the alloca size exceeds the 369 // scalar load threshold. 370 if (BitWidth > ScalarLoadThreshold) 371 return nullptr; 372 373 if ((ScalarKind == ImplicitVector || ScalarKind == Integer) && 374 !HadNonMemTransferAccess && !DL.fitsInLegalInteger(BitWidth)) 375 return nullptr; 376 // Dynamic accesses on integers aren't yet supported. They need us to shift 377 // by a dynamic amount which could be difficult to work out as we might not 378 // know whether to use a left or right shift. 379 if (ScalarKind == Integer && HadDynamicAccess) 380 return nullptr; 381 382 DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); 383 // Create and insert the integer alloca. 384 NewTy = IntegerType::get(AI->getContext(), BitWidth); 385 } 386 AllocaInst *NewAI = 387 new AllocaInst(NewTy, nullptr, "", &AI->getParent()->front()); 388 ConvertUsesToScalar(AI, NewAI, 0, nullptr); 389 return NewAI; 390 } 391 392 /// MergeInTypeForLoadOrStore - Add the 'In' type to the accumulated vector type 393 /// (VectorTy) so far at the offset specified by Offset (which is specified in 394 /// bytes). 395 /// 396 /// There are two cases we handle here: 397 /// 1) A union of vector types of the same size and potentially its elements. 398 /// Here we turn element accesses into insert/extract element operations. 399 /// This promotes a <4 x float> with a store of float to the third element 400 /// into a <4 x float> that uses insert element. 401 /// 2) A fully general blob of memory, which we turn into some (potentially 402 /// large) integer type with extract and insert operations where the loads 403 /// and stores would mutate the memory. We mark this by setting VectorTy 404 /// to VoidTy. 405 void ConvertToScalarInfo::MergeInTypeForLoadOrStore(Type *In, 406 uint64_t Offset) { 407 // If we already decided to turn this into a blob of integer memory, there is 408 // nothing to be done. 409 if (ScalarKind == Integer) 410 return; 411 412 // If this could be contributing to a vector, analyze it. 413 414 // If the In type is a vector that is the same size as the alloca, see if it 415 // matches the existing VecTy. 416 if (VectorType *VInTy = dyn_cast<VectorType>(In)) { 417 if (MergeInVectorType(VInTy, Offset)) 418 return; 419 } else if (In->isFloatTy() || In->isDoubleTy() || 420 (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 && 421 isPowerOf2_32(In->getPrimitiveSizeInBits()))) { 422 // Full width accesses can be ignored, because they can always be turned 423 // into bitcasts. 424 unsigned EltSize = In->getPrimitiveSizeInBits()/8; 425 if (EltSize == AllocaSize) 426 return; 427 428 // If we're accessing something that could be an element of a vector, see 429 // if the implied vector agrees with what we already have and if Offset is 430 // compatible with it. 431 if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 && 432 (!VectorTy || EltSize == VectorTy->getElementType() 433 ->getPrimitiveSizeInBits()/8)) { 434 if (!VectorTy) { 435 ScalarKind = ImplicitVector; 436 VectorTy = VectorType::get(In, AllocaSize/EltSize); 437 } 438 return; 439 } 440 } 441 442 // Otherwise, we have a case that we can't handle with an optimized vector 443 // form. We can still turn this into a large integer. 444 ScalarKind = Integer; 445 } 446 447 /// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore, 448 /// returning true if the type was successfully merged and false otherwise. 449 bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy, 450 uint64_t Offset) { 451 if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) { 452 // If we're storing/loading a vector of the right size, allow it as a 453 // vector. If this the first vector we see, remember the type so that 454 // we know the element size. If this is a subsequent access, ignore it 455 // even if it is a differing type but the same size. Worst case we can 456 // bitcast the resultant vectors. 457 if (!VectorTy) 458 VectorTy = VInTy; 459 ScalarKind = Vector; 460 return true; 461 } 462 463 return false; 464 } 465 466 /// CanConvertToScalar - V is a pointer. If we can convert the pointee and all 467 /// its accesses to a single vector type, return true and set VecTy to 468 /// the new type. If we could convert the alloca into a single promotable 469 /// integer, return true but set VecTy to VoidTy. Further, if the use is not a 470 /// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset 471 /// is the current offset from the base of the alloca being analyzed. 472 /// 473 /// If we see at least one access to the value that is as a vector type, set the 474 /// SawVec flag. 475 bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset, 476 Value* NonConstantIdx) { 477 for (User *U : V->users()) { 478 Instruction *UI = cast<Instruction>(U); 479 480 if (LoadInst *LI = dyn_cast<LoadInst>(UI)) { 481 // Don't break volatile loads. 482 if (!LI->isSimple()) 483 return false; 484 // Don't touch MMX operations. 485 if (LI->getType()->isX86_MMXTy()) 486 return false; 487 HadNonMemTransferAccess = true; 488 MergeInTypeForLoadOrStore(LI->getType(), Offset); 489 continue; 490 } 491 492 if (StoreInst *SI = dyn_cast<StoreInst>(UI)) { 493 // Storing the pointer, not into the value? 494 if (SI->getOperand(0) == V || !SI->isSimple()) return false; 495 // Don't touch MMX operations. 496 if (SI->getOperand(0)->getType()->isX86_MMXTy()) 497 return false; 498 HadNonMemTransferAccess = true; 499 MergeInTypeForLoadOrStore(SI->getOperand(0)->getType(), Offset); 500 continue; 501 } 502 503 if (BitCastInst *BCI = dyn_cast<BitCastInst>(UI)) { 504 if (!onlyUsedByLifetimeMarkers(BCI)) 505 IsNotTrivial = true; // Can't be mem2reg'd. 506 if (!CanConvertToScalar(BCI, Offset, NonConstantIdx)) 507 return false; 508 continue; 509 } 510 511 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UI)) { 512 // If this is a GEP with a variable indices, we can't handle it. 513 PointerType* PtrTy = dyn_cast<PointerType>(GEP->getPointerOperandType()); 514 if (!PtrTy) 515 return false; 516 517 // Compute the offset that this GEP adds to the pointer. 518 SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); 519 Value *GEPNonConstantIdx = nullptr; 520 if (!GEP->hasAllConstantIndices()) { 521 if (!isa<VectorType>(PtrTy->getElementType())) 522 return false; 523 if (NonConstantIdx) 524 return false; 525 GEPNonConstantIdx = Indices.pop_back_val(); 526 if (!GEPNonConstantIdx->getType()->isIntegerTy(32)) 527 return false; 528 HadDynamicAccess = true; 529 } else 530 GEPNonConstantIdx = NonConstantIdx; 531 uint64_t GEPOffset = DL.getIndexedOffset(PtrTy, 532 Indices); 533 // See if all uses can be converted. 534 if (!CanConvertToScalar(GEP, Offset+GEPOffset, GEPNonConstantIdx)) 535 return false; 536 IsNotTrivial = true; // Can't be mem2reg'd. 537 HadNonMemTransferAccess = true; 538 continue; 539 } 540 541 // If this is a constant sized memset of a constant value (e.g. 0) we can 542 // handle it. 543 if (MemSetInst *MSI = dyn_cast<MemSetInst>(UI)) { 544 // Store to dynamic index. 545 if (NonConstantIdx) 546 return false; 547 // Store of constant value. 548 if (!isa<ConstantInt>(MSI->getValue())) 549 return false; 550 551 // Store of constant size. 552 ConstantInt *Len = dyn_cast<ConstantInt>(MSI->getLength()); 553 if (!Len) 554 return false; 555 556 // If the size differs from the alloca, we can only convert the alloca to 557 // an integer bag-of-bits. 558 // FIXME: This should handle all of the cases that are currently accepted 559 // as vector element insertions. 560 if (Len->getZExtValue() != AllocaSize || Offset != 0) 561 ScalarKind = Integer; 562 563 IsNotTrivial = true; // Can't be mem2reg'd. 564 HadNonMemTransferAccess = true; 565 continue; 566 } 567 568 // If this is a memcpy or memmove into or out of the whole allocation, we 569 // can handle it like a load or store of the scalar type. 570 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(UI)) { 571 // Store to dynamic index. 572 if (NonConstantIdx) 573 return false; 574 ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength()); 575 if (!Len || Len->getZExtValue() != AllocaSize || Offset != 0) 576 return false; 577 578 IsNotTrivial = true; // Can't be mem2reg'd. 579 continue; 580 } 581 582 // If this is a lifetime intrinsic, we can handle it. 583 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(UI)) { 584 if (II->getIntrinsicID() == Intrinsic::lifetime_start || 585 II->getIntrinsicID() == Intrinsic::lifetime_end) { 586 continue; 587 } 588 } 589 590 // Otherwise, we cannot handle this! 591 return false; 592 } 593 594 return true; 595 } 596 597 /// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca 598 /// directly. This happens when we are converting an "integer union" to a 599 /// single integer scalar, or when we are converting a "vector union" to a 600 /// vector with insert/extractelement instructions. 601 /// 602 /// Offset is an offset from the original alloca, in bits that need to be 603 /// shifted to the right. By the end of this, there should be no uses of Ptr. 604 void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, 605 uint64_t Offset, 606 Value* NonConstantIdx) { 607 while (!Ptr->use_empty()) { 608 Instruction *User = cast<Instruction>(Ptr->user_back()); 609 610 if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) { 611 ConvertUsesToScalar(CI, NewAI, Offset, NonConstantIdx); 612 CI->eraseFromParent(); 613 continue; 614 } 615 616 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { 617 // Compute the offset that this GEP adds to the pointer. 618 SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); 619 Value* GEPNonConstantIdx = nullptr; 620 if (!GEP->hasAllConstantIndices()) { 621 assert(!NonConstantIdx && 622 "Dynamic GEP reading from dynamic GEP unsupported"); 623 GEPNonConstantIdx = Indices.pop_back_val(); 624 } else 625 GEPNonConstantIdx = NonConstantIdx; 626 uint64_t GEPOffset = DL.getIndexedOffset(GEP->getPointerOperandType(), 627 Indices); 628 ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, GEPNonConstantIdx); 629 GEP->eraseFromParent(); 630 continue; 631 } 632 633 IRBuilder<> Builder(User); 634 635 if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 636 // The load is a bit extract from NewAI shifted right by Offset bits. 637 Value *LoadedVal = Builder.CreateLoad(NewAI); 638 Value *NewLoadVal 639 = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, 640 NonConstantIdx, Builder); 641 LI->replaceAllUsesWith(NewLoadVal); 642 LI->eraseFromParent(); 643 continue; 644 } 645 646 if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 647 assert(SI->getOperand(0) != Ptr && "Consistency error!"); 648 Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); 649 Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset, 650 NonConstantIdx, Builder); 651 Builder.CreateStore(New, NewAI); 652 SI->eraseFromParent(); 653 654 // If the load we just inserted is now dead, then the inserted store 655 // overwrote the entire thing. 656 if (Old->use_empty()) 657 Old->eraseFromParent(); 658 continue; 659 } 660 661 // If this is a constant sized memset of a constant value (e.g. 0) we can 662 // transform it into a store of the expanded constant value. 663 if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { 664 assert(MSI->getRawDest() == Ptr && "Consistency error!"); 665 assert(!NonConstantIdx && "Cannot replace dynamic memset with insert"); 666 int64_t SNumBytes = cast<ConstantInt>(MSI->getLength())->getSExtValue(); 667 if (SNumBytes > 0 && (SNumBytes >> 32) == 0) { 668 unsigned NumBytes = static_cast<unsigned>(SNumBytes); 669 unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue(); 670 671 // Compute the value replicated the right number of times. 672 APInt APVal(NumBytes*8, Val); 673 674 // Splat the value if non-zero. 675 if (Val) 676 for (unsigned i = 1; i != NumBytes; ++i) 677 APVal |= APVal << 8; 678 679 Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); 680 Value *New = ConvertScalar_InsertValue( 681 ConstantInt::get(User->getContext(), APVal), 682 Old, Offset, nullptr, Builder); 683 Builder.CreateStore(New, NewAI); 684 685 // If the load we just inserted is now dead, then the memset overwrote 686 // the entire thing. 687 if (Old->use_empty()) 688 Old->eraseFromParent(); 689 } 690 MSI->eraseFromParent(); 691 continue; 692 } 693 694 // If this is a memcpy or memmove into or out of the whole allocation, we 695 // can handle it like a load or store of the scalar type. 696 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) { 697 assert(Offset == 0 && "must be store to start of alloca"); 698 assert(!NonConstantIdx && "Cannot replace dynamic transfer with insert"); 699 700 // If the source and destination are both to the same alloca, then this is 701 // a noop copy-to-self, just delete it. Otherwise, emit a load and store 702 // as appropriate. 703 AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, DL, 0)); 704 705 if (GetUnderlyingObject(MTI->getSource(), DL, 0) != OrigAI) { 706 // Dest must be OrigAI, change this to be a load from the original 707 // pointer (bitcasted), then a store to our new alloca. 708 assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?"); 709 Value *SrcPtr = MTI->getSource(); 710 PointerType* SPTy = cast<PointerType>(SrcPtr->getType()); 711 PointerType* AIPTy = cast<PointerType>(NewAI->getType()); 712 if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) { 713 AIPTy = PointerType::get(AIPTy->getElementType(), 714 SPTy->getAddressSpace()); 715 } 716 SrcPtr = Builder.CreateBitCast(SrcPtr, AIPTy); 717 718 LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval"); 719 SrcVal->setAlignment(MTI->getAlignment()); 720 Builder.CreateStore(SrcVal, NewAI); 721 } else if (GetUnderlyingObject(MTI->getDest(), DL, 0) != OrigAI) { 722 // Src must be OrigAI, change this to be a load from NewAI then a store 723 // through the original dest pointer (bitcasted). 724 assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?"); 725 LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval"); 726 727 PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType()); 728 PointerType* AIPTy = cast<PointerType>(NewAI->getType()); 729 if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) { 730 AIPTy = PointerType::get(AIPTy->getElementType(), 731 DPTy->getAddressSpace()); 732 } 733 Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), AIPTy); 734 735 StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr); 736 NewStore->setAlignment(MTI->getAlignment()); 737 } else { 738 // Noop transfer. Src == Dst 739 } 740 741 MTI->eraseFromParent(); 742 continue; 743 } 744 745 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { 746 if (II->getIntrinsicID() == Intrinsic::lifetime_start || 747 II->getIntrinsicID() == Intrinsic::lifetime_end) { 748 // There's no need to preserve these, as the resulting alloca will be 749 // converted to a register anyways. 750 II->eraseFromParent(); 751 continue; 752 } 753 } 754 755 llvm_unreachable("Unsupported operation!"); 756 } 757 } 758 759 /// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer 760 /// or vector value FromVal, extracting the bits from the offset specified by 761 /// Offset. This returns the value, which is of type ToType. 762 /// 763 /// This happens when we are converting an "integer union" to a single 764 /// integer scalar, or when we are converting a "vector union" to a vector with 765 /// insert/extractelement instructions. 766 /// 767 /// Offset is an offset from the original alloca, in bits that need to be 768 /// shifted to the right. 769 Value *ConvertToScalarInfo:: 770 ConvertScalar_ExtractValue(Value *FromVal, Type *ToType, 771 uint64_t Offset, Value* NonConstantIdx, 772 IRBuilder<> &Builder) { 773 // If the load is of the whole new alloca, no conversion is needed. 774 Type *FromType = FromVal->getType(); 775 if (FromType == ToType && Offset == 0) 776 return FromVal; 777 778 // If the result alloca is a vector type, this is either an element 779 // access or a bitcast to another vector type of the same size. 780 if (VectorType *VTy = dyn_cast<VectorType>(FromType)) { 781 unsigned FromTypeSize = DL.getTypeAllocSize(FromType); 782 unsigned ToTypeSize = DL.getTypeAllocSize(ToType); 783 if (FromTypeSize == ToTypeSize) 784 return Builder.CreateBitCast(FromVal, ToType); 785 786 // Otherwise it must be an element access. 787 unsigned Elt = 0; 788 if (Offset) { 789 unsigned EltSize = DL.getTypeAllocSizeInBits(VTy->getElementType()); 790 Elt = Offset/EltSize; 791 assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); 792 } 793 // Return the element extracted out of it. 794 Value *Idx; 795 if (NonConstantIdx) { 796 if (Elt) 797 Idx = Builder.CreateAdd(NonConstantIdx, 798 Builder.getInt32(Elt), 799 "dyn.offset"); 800 else 801 Idx = NonConstantIdx; 802 } else 803 Idx = Builder.getInt32(Elt); 804 Value *V = Builder.CreateExtractElement(FromVal, Idx); 805 if (V->getType() != ToType) 806 V = Builder.CreateBitCast(V, ToType); 807 return V; 808 } 809 810 // If ToType is a first class aggregate, extract out each of the pieces and 811 // use insertvalue's to form the FCA. 812 if (StructType *ST = dyn_cast<StructType>(ToType)) { 813 assert(!NonConstantIdx && 814 "Dynamic indexing into struct types not supported"); 815 const StructLayout &Layout = *DL.getStructLayout(ST); 816 Value *Res = UndefValue::get(ST); 817 for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { 818 Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i), 819 Offset+Layout.getElementOffsetInBits(i), 820 nullptr, Builder); 821 Res = Builder.CreateInsertValue(Res, Elt, i); 822 } 823 return Res; 824 } 825 826 if (ArrayType *AT = dyn_cast<ArrayType>(ToType)) { 827 assert(!NonConstantIdx && 828 "Dynamic indexing into array types not supported"); 829 uint64_t EltSize = DL.getTypeAllocSizeInBits(AT->getElementType()); 830 Value *Res = UndefValue::get(AT); 831 for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { 832 Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(), 833 Offset+i*EltSize, nullptr, 834 Builder); 835 Res = Builder.CreateInsertValue(Res, Elt, i); 836 } 837 return Res; 838 } 839 840 // Otherwise, this must be a union that was converted to an integer value. 841 IntegerType *NTy = cast<IntegerType>(FromVal->getType()); 842 843 // If this is a big-endian system and the load is narrower than the 844 // full alloca type, we need to do a shift to get the right bits. 845 int ShAmt = 0; 846 if (DL.isBigEndian()) { 847 // On big-endian machines, the lowest bit is stored at the bit offset 848 // from the pointer given by getTypeStoreSizeInBits. This matters for 849 // integers with a bitwidth that is not a multiple of 8. 850 ShAmt = DL.getTypeStoreSizeInBits(NTy) - 851 DL.getTypeStoreSizeInBits(ToType) - Offset; 852 } else { 853 ShAmt = Offset; 854 } 855 856 // Note: we support negative bitwidths (with shl) which are not defined. 857 // We do this to support (f.e.) loads off the end of a structure where 858 // only some bits are used. 859 if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth()) 860 FromVal = Builder.CreateLShr(FromVal, 861 ConstantInt::get(FromVal->getType(), ShAmt)); 862 else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth()) 863 FromVal = Builder.CreateShl(FromVal, 864 ConstantInt::get(FromVal->getType(), -ShAmt)); 865 866 // Finally, unconditionally truncate the integer to the right width. 867 unsigned LIBitWidth = DL.getTypeSizeInBits(ToType); 868 if (LIBitWidth < NTy->getBitWidth()) 869 FromVal = 870 Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), 871 LIBitWidth)); 872 else if (LIBitWidth > NTy->getBitWidth()) 873 FromVal = 874 Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), 875 LIBitWidth)); 876 877 // If the result is an integer, this is a trunc or bitcast. 878 if (ToType->isIntegerTy()) { 879 // Should be done. 880 } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) { 881 // Just do a bitcast, we know the sizes match up. 882 FromVal = Builder.CreateBitCast(FromVal, ToType); 883 } else { 884 // Otherwise must be a pointer. 885 FromVal = Builder.CreateIntToPtr(FromVal, ToType); 886 } 887 assert(FromVal->getType() == ToType && "Didn't convert right?"); 888 return FromVal; 889 } 890 891 /// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer 892 /// or vector value "Old" at the offset specified by Offset. 893 /// 894 /// This happens when we are converting an "integer union" to a 895 /// single integer scalar, or when we are converting a "vector union" to a 896 /// vector with insert/extractelement instructions. 897 /// 898 /// Offset is an offset from the original alloca, in bits that need to be 899 /// shifted to the right. 900 /// 901 /// NonConstantIdx is an index value if there was a GEP with a non-constant 902 /// index value. If this is 0 then all GEPs used to find this insert address 903 /// are constant. 904 Value *ConvertToScalarInfo:: 905 ConvertScalar_InsertValue(Value *SV, Value *Old, 906 uint64_t Offset, Value* NonConstantIdx, 907 IRBuilder<> &Builder) { 908 // Convert the stored type to the actual type, shift it left to insert 909 // then 'or' into place. 910 Type *AllocaType = Old->getType(); 911 LLVMContext &Context = Old->getContext(); 912 913 if (VectorType *VTy = dyn_cast<VectorType>(AllocaType)) { 914 uint64_t VecSize = DL.getTypeAllocSizeInBits(VTy); 915 uint64_t ValSize = DL.getTypeAllocSizeInBits(SV->getType()); 916 917 // Changing the whole vector with memset or with an access of a different 918 // vector type? 919 if (ValSize == VecSize) 920 return Builder.CreateBitCast(SV, AllocaType); 921 922 // Must be an element insertion. 923 Type *EltTy = VTy->getElementType(); 924 if (SV->getType() != EltTy) 925 SV = Builder.CreateBitCast(SV, EltTy); 926 uint64_t EltSize = DL.getTypeAllocSizeInBits(EltTy); 927 unsigned Elt = Offset/EltSize; 928 Value *Idx; 929 if (NonConstantIdx) { 930 if (Elt) 931 Idx = Builder.CreateAdd(NonConstantIdx, 932 Builder.getInt32(Elt), 933 "dyn.offset"); 934 else 935 Idx = NonConstantIdx; 936 } else 937 Idx = Builder.getInt32(Elt); 938 return Builder.CreateInsertElement(Old, SV, Idx); 939 } 940 941 // If SV is a first-class aggregate value, insert each value recursively. 942 if (StructType *ST = dyn_cast<StructType>(SV->getType())) { 943 assert(!NonConstantIdx && 944 "Dynamic indexing into struct types not supported"); 945 const StructLayout &Layout = *DL.getStructLayout(ST); 946 for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { 947 Value *Elt = Builder.CreateExtractValue(SV, i); 948 Old = ConvertScalar_InsertValue(Elt, Old, 949 Offset+Layout.getElementOffsetInBits(i), 950 nullptr, Builder); 951 } 952 return Old; 953 } 954 955 if (ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) { 956 assert(!NonConstantIdx && 957 "Dynamic indexing into array types not supported"); 958 uint64_t EltSize = DL.getTypeAllocSizeInBits(AT->getElementType()); 959 for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { 960 Value *Elt = Builder.CreateExtractValue(SV, i); 961 Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, nullptr, 962 Builder); 963 } 964 return Old; 965 } 966 967 // If SV is a float, convert it to the appropriate integer type. 968 // If it is a pointer, do the same. 969 unsigned SrcWidth = DL.getTypeSizeInBits(SV->getType()); 970 unsigned DestWidth = DL.getTypeSizeInBits(AllocaType); 971 unsigned SrcStoreWidth = DL.getTypeStoreSizeInBits(SV->getType()); 972 unsigned DestStoreWidth = DL.getTypeStoreSizeInBits(AllocaType); 973 if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy()) 974 SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth)); 975 else if (SV->getType()->isPointerTy()) 976 SV = Builder.CreatePtrToInt(SV, DL.getIntPtrType(SV->getType())); 977 978 // Zero extend or truncate the value if needed. 979 if (SV->getType() != AllocaType) { 980 if (SV->getType()->getPrimitiveSizeInBits() < 981 AllocaType->getPrimitiveSizeInBits()) 982 SV = Builder.CreateZExt(SV, AllocaType); 983 else { 984 // Truncation may be needed if storing more than the alloca can hold 985 // (undefined behavior). 986 SV = Builder.CreateTrunc(SV, AllocaType); 987 SrcWidth = DestWidth; 988 SrcStoreWidth = DestStoreWidth; 989 } 990 } 991 992 // If this is a big-endian system and the store is narrower than the 993 // full alloca type, we need to do a shift to get the right bits. 994 int ShAmt = 0; 995 if (DL.isBigEndian()) { 996 // On big-endian machines, the lowest bit is stored at the bit offset 997 // from the pointer given by getTypeStoreSizeInBits. This matters for 998 // integers with a bitwidth that is not a multiple of 8. 999 ShAmt = DestStoreWidth - SrcStoreWidth - Offset; 1000 } else { 1001 ShAmt = Offset; 1002 } 1003 1004 // Note: we support negative bitwidths (with shr) which are not defined. 1005 // We do this to support (f.e.) stores off the end of a structure where 1006 // only some bits in the structure are set. 1007 APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth)); 1008 if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) { 1009 SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), ShAmt)); 1010 Mask <<= ShAmt; 1011 } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) { 1012 SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), -ShAmt)); 1013 Mask = Mask.lshr(-ShAmt); 1014 } 1015 1016 // Mask out the bits we are about to insert from the old value, and or 1017 // in the new bits. 1018 if (SrcWidth != DestWidth) { 1019 assert(DestWidth > SrcWidth); 1020 Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask"); 1021 SV = Builder.CreateOr(Old, SV, "ins"); 1022 } 1023 return SV; 1024 } 1025 1026 1027 //===----------------------------------------------------------------------===// 1028 // SRoA Driver 1029 //===----------------------------------------------------------------------===// 1030 1031 1032 bool SROA::runOnFunction(Function &F) { 1033 if (skipOptnoneFunction(F)) 1034 return false; 1035 1036 bool Changed = performPromotion(F); 1037 1038 while (1) { 1039 bool LocalChange = performScalarRepl(F); 1040 if (!LocalChange) break; // No need to repromote if no scalarrepl 1041 Changed = true; 1042 LocalChange = performPromotion(F); 1043 if (!LocalChange) break; // No need to re-scalarrepl if no promotion 1044 } 1045 1046 return Changed; 1047 } 1048 1049 namespace { 1050 class AllocaPromoter : public LoadAndStorePromoter { 1051 AllocaInst *AI; 1052 DIBuilder *DIB; 1053 SmallVector<DbgDeclareInst *, 4> DDIs; 1054 SmallVector<DbgValueInst *, 4> DVIs; 1055 public: 1056 AllocaPromoter(ArrayRef<Instruction*> Insts, SSAUpdater &S, 1057 DIBuilder *DB) 1058 : LoadAndStorePromoter(Insts, S), AI(nullptr), DIB(DB) {} 1059 1060 void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) { 1061 // Remember which alloca we're promoting (for isInstInList). 1062 this->AI = AI; 1063 if (auto *L = LocalAsMetadata::getIfExists(AI)) { 1064 if (auto *DINode = MetadataAsValue::getIfExists(AI->getContext(), L)) { 1065 for (User *U : DINode->users()) 1066 if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U)) 1067 DDIs.push_back(DDI); 1068 else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U)) 1069 DVIs.push_back(DVI); 1070 } 1071 } 1072 1073 LoadAndStorePromoter::run(Insts); 1074 AI->eraseFromParent(); 1075 for (SmallVectorImpl<DbgDeclareInst *>::iterator I = DDIs.begin(), 1076 E = DDIs.end(); I != E; ++I) { 1077 DbgDeclareInst *DDI = *I; 1078 DDI->eraseFromParent(); 1079 } 1080 for (SmallVectorImpl<DbgValueInst *>::iterator I = DVIs.begin(), 1081 E = DVIs.end(); I != E; ++I) { 1082 DbgValueInst *DVI = *I; 1083 DVI->eraseFromParent(); 1084 } 1085 } 1086 1087 bool isInstInList(Instruction *I, 1088 const SmallVectorImpl<Instruction*> &Insts) const override { 1089 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 1090 return LI->getOperand(0) == AI; 1091 return cast<StoreInst>(I)->getPointerOperand() == AI; 1092 } 1093 1094 void updateDebugInfo(Instruction *Inst) const override { 1095 for (SmallVectorImpl<DbgDeclareInst *>::const_iterator I = DDIs.begin(), 1096 E = DDIs.end(); I != E; ++I) { 1097 DbgDeclareInst *DDI = *I; 1098 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) 1099 ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); 1100 else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) 1101 ConvertDebugDeclareToDebugValue(DDI, LI, *DIB); 1102 } 1103 for (SmallVectorImpl<DbgValueInst *>::const_iterator I = DVIs.begin(), 1104 E = DVIs.end(); I != E; ++I) { 1105 DbgValueInst *DVI = *I; 1106 Value *Arg = nullptr; 1107 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 1108 // If an argument is zero extended then use argument directly. The ZExt 1109 // may be zapped by an optimization pass in future. 1110 if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0))) 1111 Arg = dyn_cast<Argument>(ZExt->getOperand(0)); 1112 if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) 1113 Arg = dyn_cast<Argument>(SExt->getOperand(0)); 1114 if (!Arg) 1115 Arg = SI->getOperand(0); 1116 } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { 1117 Arg = LI->getOperand(0); 1118 } else { 1119 continue; 1120 } 1121 DIB->insertDbgValueIntrinsic(Arg, 0, DVI->getVariable(), 1122 DVI->getExpression(), DVI->getDebugLoc(), 1123 Inst); 1124 } 1125 } 1126 }; 1127 } // end anon namespace 1128 1129 /// isSafeSelectToSpeculate - Select instructions that use an alloca and are 1130 /// subsequently loaded can be rewritten to load both input pointers and then 1131 /// select between the result, allowing the load of the alloca to be promoted. 1132 /// From this: 1133 /// %P2 = select i1 %cond, i32* %Alloca, i32* %Other 1134 /// %V = load i32* %P2 1135 /// to: 1136 /// %V1 = load i32* %Alloca -> will be mem2reg'd 1137 /// %V2 = load i32* %Other 1138 /// %V = select i1 %cond, i32 %V1, i32 %V2 1139 /// 1140 /// We can do this to a select if its only uses are loads and if the operand to 1141 /// the select can be loaded unconditionally. 1142 static bool isSafeSelectToSpeculate(SelectInst *SI) { 1143 const DataLayout &DL = SI->getModule()->getDataLayout(); 1144 bool TDerefable = isDereferenceablePointer(SI->getTrueValue(), DL); 1145 bool FDerefable = isDereferenceablePointer(SI->getFalseValue(), DL); 1146 1147 for (User *U : SI->users()) { 1148 LoadInst *LI = dyn_cast<LoadInst>(U); 1149 if (!LI || !LI->isSimple()) return false; 1150 1151 // Both operands to the select need to be dereferencable, either absolutely 1152 // (e.g. allocas) or at this point because we can see other accesses to it. 1153 if (!TDerefable && 1154 !isSafeToLoadUnconditionally(SI->getTrueValue(), LI, 1155 LI->getAlignment())) 1156 return false; 1157 if (!FDerefable && 1158 !isSafeToLoadUnconditionally(SI->getFalseValue(), LI, 1159 LI->getAlignment())) 1160 return false; 1161 } 1162 1163 return true; 1164 } 1165 1166 /// isSafePHIToSpeculate - PHI instructions that use an alloca and are 1167 /// subsequently loaded can be rewritten to load both input pointers in the pred 1168 /// blocks and then PHI the results, allowing the load of the alloca to be 1169 /// promoted. 1170 /// From this: 1171 /// %P2 = phi [i32* %Alloca, i32* %Other] 1172 /// %V = load i32* %P2 1173 /// to: 1174 /// %V1 = load i32* %Alloca -> will be mem2reg'd 1175 /// ... 1176 /// %V2 = load i32* %Other 1177 /// ... 1178 /// %V = phi [i32 %V1, i32 %V2] 1179 /// 1180 /// We can do this to a select if its only uses are loads and if the operand to 1181 /// the select can be loaded unconditionally. 1182 static bool isSafePHIToSpeculate(PHINode *PN) { 1183 // For now, we can only do this promotion if the load is in the same block as 1184 // the PHI, and if there are no stores between the phi and load. 1185 // TODO: Allow recursive phi users. 1186 // TODO: Allow stores. 1187 BasicBlock *BB = PN->getParent(); 1188 unsigned MaxAlign = 0; 1189 for (User *U : PN->users()) { 1190 LoadInst *LI = dyn_cast<LoadInst>(U); 1191 if (!LI || !LI->isSimple()) return false; 1192 1193 // For now we only allow loads in the same block as the PHI. This is a 1194 // common case that happens when instcombine merges two loads through a PHI. 1195 if (LI->getParent() != BB) return false; 1196 1197 // Ensure that there are no instructions between the PHI and the load that 1198 // could store. 1199 for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI) 1200 if (BBI->mayWriteToMemory()) 1201 return false; 1202 1203 MaxAlign = std::max(MaxAlign, LI->getAlignment()); 1204 } 1205 1206 const DataLayout &DL = PN->getModule()->getDataLayout(); 1207 1208 // Okay, we know that we have one or more loads in the same block as the PHI. 1209 // We can transform this if it is safe to push the loads into the predecessor 1210 // blocks. The only thing to watch out for is that we can't put a possibly 1211 // trapping load in the predecessor if it is a critical edge. 1212 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 1213 BasicBlock *Pred = PN->getIncomingBlock(i); 1214 Value *InVal = PN->getIncomingValue(i); 1215 1216 // If the terminator of the predecessor has side-effects (an invoke), 1217 // there is no safe place to put a load in the predecessor. 1218 if (Pred->getTerminator()->mayHaveSideEffects()) 1219 return false; 1220 1221 // If the value is produced by the terminator of the predecessor 1222 // (an invoke), there is no valid place to put a load in the predecessor. 1223 if (Pred->getTerminator() == InVal) 1224 return false; 1225 1226 // If the predecessor has a single successor, then the edge isn't critical. 1227 if (Pred->getTerminator()->getNumSuccessors() == 1) 1228 continue; 1229 1230 // If this pointer is always safe to load, or if we can prove that there is 1231 // already a load in the block, then we can move the load to the pred block. 1232 if (isDereferenceablePointer(InVal, DL) || 1233 isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign)) 1234 continue; 1235 1236 return false; 1237 } 1238 1239 return true; 1240 } 1241 1242 1243 /// tryToMakeAllocaBePromotable - This returns true if the alloca only has 1244 /// direct (non-volatile) loads and stores to it. If the alloca is close but 1245 /// not quite there, this will transform the code to allow promotion. As such, 1246 /// it is a non-pure predicate. 1247 static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout &DL) { 1248 SetVector<Instruction*, SmallVector<Instruction*, 4>, 1249 SmallPtrSet<Instruction*, 4> > InstsToRewrite; 1250 for (User *U : AI->users()) { 1251 if (LoadInst *LI = dyn_cast<LoadInst>(U)) { 1252 if (!LI->isSimple()) 1253 return false; 1254 continue; 1255 } 1256 1257 if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 1258 if (SI->getOperand(0) == AI || !SI->isSimple()) 1259 return false; // Don't allow a store OF the AI, only INTO the AI. 1260 continue; 1261 } 1262 1263 if (SelectInst *SI = dyn_cast<SelectInst>(U)) { 1264 // If the condition being selected on is a constant, fold the select, yes 1265 // this does (rarely) happen early on. 1266 if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition())) { 1267 Value *Result = SI->getOperand(1+CI->isZero()); 1268 SI->replaceAllUsesWith(Result); 1269 SI->eraseFromParent(); 1270 1271 // This is very rare and we just scrambled the use list of AI, start 1272 // over completely. 1273 return tryToMakeAllocaBePromotable(AI, DL); 1274 } 1275 1276 // If it is safe to turn "load (select c, AI, ptr)" into a select of two 1277 // loads, then we can transform this by rewriting the select. 1278 if (!isSafeSelectToSpeculate(SI)) 1279 return false; 1280 1281 InstsToRewrite.insert(SI); 1282 continue; 1283 } 1284 1285 if (PHINode *PN = dyn_cast<PHINode>(U)) { 1286 if (PN->use_empty()) { // Dead PHIs can be stripped. 1287 InstsToRewrite.insert(PN); 1288 continue; 1289 } 1290 1291 // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads 1292 // in the pred blocks, then we can transform this by rewriting the PHI. 1293 if (!isSafePHIToSpeculate(PN)) 1294 return false; 1295 1296 InstsToRewrite.insert(PN); 1297 continue; 1298 } 1299 1300 if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { 1301 if (onlyUsedByLifetimeMarkers(BCI)) { 1302 InstsToRewrite.insert(BCI); 1303 continue; 1304 } 1305 } 1306 1307 return false; 1308 } 1309 1310 // If there are no instructions to rewrite, then all uses are load/stores and 1311 // we're done! 1312 if (InstsToRewrite.empty()) 1313 return true; 1314 1315 // If we have instructions that need to be rewritten for this to be promotable 1316 // take care of it now. 1317 for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) { 1318 if (BitCastInst *BCI = dyn_cast<BitCastInst>(InstsToRewrite[i])) { 1319 // This could only be a bitcast used by nothing but lifetime intrinsics. 1320 for (BitCastInst::user_iterator I = BCI->user_begin(), E = BCI->user_end(); 1321 I != E;) 1322 cast<Instruction>(*I++)->eraseFromParent(); 1323 BCI->eraseFromParent(); 1324 continue; 1325 } 1326 1327 if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) { 1328 // Selects in InstsToRewrite only have load uses. Rewrite each as two 1329 // loads with a new select. 1330 while (!SI->use_empty()) { 1331 LoadInst *LI = cast<LoadInst>(SI->user_back()); 1332 1333 IRBuilder<> Builder(LI); 1334 LoadInst *TrueLoad = 1335 Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t"); 1336 LoadInst *FalseLoad = 1337 Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".f"); 1338 1339 // Transfer alignment and AA info if present. 1340 TrueLoad->setAlignment(LI->getAlignment()); 1341 FalseLoad->setAlignment(LI->getAlignment()); 1342 1343 AAMDNodes Tags; 1344 LI->getAAMetadata(Tags); 1345 if (Tags) { 1346 TrueLoad->setAAMetadata(Tags); 1347 FalseLoad->setAAMetadata(Tags); 1348 } 1349 1350 Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad); 1351 V->takeName(LI); 1352 LI->replaceAllUsesWith(V); 1353 LI->eraseFromParent(); 1354 } 1355 1356 // Now that all the loads are gone, the select is gone too. 1357 SI->eraseFromParent(); 1358 continue; 1359 } 1360 1361 // Otherwise, we have a PHI node which allows us to push the loads into the 1362 // predecessors. 1363 PHINode *PN = cast<PHINode>(InstsToRewrite[i]); 1364 if (PN->use_empty()) { 1365 PN->eraseFromParent(); 1366 continue; 1367 } 1368 1369 Type *LoadTy = cast<PointerType>(PN->getType())->getElementType(); 1370 PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(), 1371 PN->getName()+".ld", PN); 1372 1373 // Get the AA tags and alignment to use from one of the loads. It doesn't 1374 // matter which one we get and if any differ, it doesn't matter. 1375 LoadInst *SomeLoad = cast<LoadInst>(PN->user_back()); 1376 1377 AAMDNodes AATags; 1378 SomeLoad->getAAMetadata(AATags); 1379 unsigned Align = SomeLoad->getAlignment(); 1380 1381 // Rewrite all loads of the PN to use the new PHI. 1382 while (!PN->use_empty()) { 1383 LoadInst *LI = cast<LoadInst>(PN->user_back()); 1384 LI->replaceAllUsesWith(NewPN); 1385 LI->eraseFromParent(); 1386 } 1387 1388 // Inject loads into all of the pred blocks. Keep track of which blocks we 1389 // insert them into in case we have multiple edges from the same block. 1390 DenseMap<BasicBlock*, LoadInst*> InsertedLoads; 1391 1392 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 1393 BasicBlock *Pred = PN->getIncomingBlock(i); 1394 LoadInst *&Load = InsertedLoads[Pred]; 1395 if (!Load) { 1396 Load = new LoadInst(PN->getIncomingValue(i), 1397 PN->getName() + "." + Pred->getName(), 1398 Pred->getTerminator()); 1399 Load->setAlignment(Align); 1400 if (AATags) Load->setAAMetadata(AATags); 1401 } 1402 1403 NewPN->addIncoming(Load, Pred); 1404 } 1405 1406 PN->eraseFromParent(); 1407 } 1408 1409 ++NumAdjusted; 1410 return true; 1411 } 1412 1413 bool SROA::performPromotion(Function &F) { 1414 std::vector<AllocaInst*> Allocas; 1415 const DataLayout &DL = F.getParent()->getDataLayout(); 1416 DominatorTree *DT = nullptr; 1417 if (HasDomTree) 1418 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1419 AssumptionCache &AC = 1420 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 1421 1422 BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function 1423 DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false); 1424 bool Changed = false; 1425 SmallVector<Instruction*, 64> Insts; 1426 while (1) { 1427 Allocas.clear(); 1428 1429 // Find allocas that are safe to promote, by looking at all instructions in 1430 // the entry node 1431 for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) 1432 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca? 1433 if (tryToMakeAllocaBePromotable(AI, DL)) 1434 Allocas.push_back(AI); 1435 1436 if (Allocas.empty()) break; 1437 1438 if (HasDomTree) 1439 PromoteMemToReg(Allocas, *DT, nullptr, &AC); 1440 else { 1441 SSAUpdater SSA; 1442 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { 1443 AllocaInst *AI = Allocas[i]; 1444 1445 // Build list of instructions to promote. 1446 for (User *U : AI->users()) 1447 Insts.push_back(cast<Instruction>(U)); 1448 AllocaPromoter(Insts, SSA, &DIB).run(AI, Insts); 1449 Insts.clear(); 1450 } 1451 } 1452 NumPromoted += Allocas.size(); 1453 Changed = true; 1454 } 1455 1456 return Changed; 1457 } 1458 1459 1460 /// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for 1461 /// SROA. It must be a struct or array type with a small number of elements. 1462 bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) { 1463 Type *T = AI->getAllocatedType(); 1464 // Do not promote any struct that has too many members. 1465 if (StructType *ST = dyn_cast<StructType>(T)) 1466 return ST->getNumElements() <= StructMemberThreshold; 1467 // Do not promote any array that has too many elements. 1468 if (ArrayType *AT = dyn_cast<ArrayType>(T)) 1469 return AT->getNumElements() <= ArrayElementThreshold; 1470 return false; 1471 } 1472 1473 // performScalarRepl - This algorithm is a simple worklist driven algorithm, 1474 // which runs on all of the alloca instructions in the entry block, removing 1475 // them if they are only used by getelementptr instructions. 1476 // 1477 bool SROA::performScalarRepl(Function &F) { 1478 std::vector<AllocaInst*> WorkList; 1479 const DataLayout &DL = F.getParent()->getDataLayout(); 1480 1481 // Scan the entry basic block, adding allocas to the worklist. 1482 BasicBlock &BB = F.getEntryBlock(); 1483 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) 1484 if (AllocaInst *A = dyn_cast<AllocaInst>(I)) 1485 WorkList.push_back(A); 1486 1487 // Process the worklist 1488 bool Changed = false; 1489 while (!WorkList.empty()) { 1490 AllocaInst *AI = WorkList.back(); 1491 WorkList.pop_back(); 1492 1493 // Handle dead allocas trivially. These can be formed by SROA'ing arrays 1494 // with unused elements. 1495 if (AI->use_empty()) { 1496 AI->eraseFromParent(); 1497 Changed = true; 1498 continue; 1499 } 1500 1501 // If this alloca is impossible for us to promote, reject it early. 1502 if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized()) 1503 continue; 1504 1505 // Check to see if we can perform the core SROA transformation. We cannot 1506 // transform the allocation instruction if it is an array allocation 1507 // (allocations OF arrays are ok though), and an allocation of a scalar 1508 // value cannot be decomposed at all. 1509 uint64_t AllocaSize = DL.getTypeAllocSize(AI->getAllocatedType()); 1510 1511 // Do not promote [0 x %struct]. 1512 if (AllocaSize == 0) continue; 1513 1514 // Do not promote any struct whose size is too big. 1515 if (AllocaSize > SRThreshold) continue; 1516 1517 // If the alloca looks like a good candidate for scalar replacement, and if 1518 // all its users can be transformed, then split up the aggregate into its 1519 // separate elements. 1520 if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) { 1521 DoScalarReplacement(AI, WorkList); 1522 Changed = true; 1523 continue; 1524 } 1525 1526 // If we can turn this aggregate value (potentially with casts) into a 1527 // simple scalar value that can be mem2reg'd into a register value. 1528 // IsNotTrivial tracks whether this is something that mem2reg could have 1529 // promoted itself. If so, we don't want to transform it needlessly. Note 1530 // that we can't just check based on the type: the alloca may be of an i32 1531 // but that has pointer arithmetic to set byte 3 of it or something. 1532 if (AllocaInst *NewAI = 1533 ConvertToScalarInfo((unsigned)AllocaSize, DL, ScalarLoadThreshold) 1534 .TryConvert(AI)) { 1535 NewAI->takeName(AI); 1536 AI->eraseFromParent(); 1537 ++NumConverted; 1538 Changed = true; 1539 continue; 1540 } 1541 1542 // Otherwise, couldn't process this alloca. 1543 } 1544 1545 return Changed; 1546 } 1547 1548 /// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl 1549 /// predicate, do SROA now. 1550 void SROA::DoScalarReplacement(AllocaInst *AI, 1551 std::vector<AllocaInst*> &WorkList) { 1552 DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n'); 1553 SmallVector<AllocaInst*, 32> ElementAllocas; 1554 if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { 1555 ElementAllocas.reserve(ST->getNumContainedTypes()); 1556 for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) { 1557 AllocaInst *NA = new AllocaInst(ST->getContainedType(i), nullptr, 1558 AI->getAlignment(), 1559 AI->getName() + "." + Twine(i), AI); 1560 ElementAllocas.push_back(NA); 1561 WorkList.push_back(NA); // Add to worklist for recursive processing 1562 } 1563 } else { 1564 ArrayType *AT = cast<ArrayType>(AI->getAllocatedType()); 1565 ElementAllocas.reserve(AT->getNumElements()); 1566 Type *ElTy = AT->getElementType(); 1567 for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { 1568 AllocaInst *NA = new AllocaInst(ElTy, nullptr, AI->getAlignment(), 1569 AI->getName() + "." + Twine(i), AI); 1570 ElementAllocas.push_back(NA); 1571 WorkList.push_back(NA); // Add to worklist for recursive processing 1572 } 1573 } 1574 1575 // Now that we have created the new alloca instructions, rewrite all the 1576 // uses of the old alloca. 1577 RewriteForScalarRepl(AI, AI, 0, ElementAllocas); 1578 1579 // Now erase any instructions that were made dead while rewriting the alloca. 1580 DeleteDeadInstructions(); 1581 AI->eraseFromParent(); 1582 1583 ++NumReplaced; 1584 } 1585 1586 /// DeleteDeadInstructions - Erase instructions on the DeadInstrs list, 1587 /// recursively including all their operands that become trivially dead. 1588 void SROA::DeleteDeadInstructions() { 1589 while (!DeadInsts.empty()) { 1590 Instruction *I = cast<Instruction>(DeadInsts.pop_back_val()); 1591 1592 for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) 1593 if (Instruction *U = dyn_cast<Instruction>(*OI)) { 1594 // Zero out the operand and see if it becomes trivially dead. 1595 // (But, don't add allocas to the dead instruction list -- they are 1596 // already on the worklist and will be deleted separately.) 1597 *OI = nullptr; 1598 if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U)) 1599 DeadInsts.push_back(U); 1600 } 1601 1602 I->eraseFromParent(); 1603 } 1604 } 1605 1606 /// isSafeForScalarRepl - Check if instruction I is a safe use with regard to 1607 /// performing scalar replacement of alloca AI. The results are flagged in 1608 /// the Info parameter. Offset indicates the position within AI that is 1609 /// referenced by this instruction. 1610 void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, 1611 AllocaInfo &Info) { 1612 const DataLayout &DL = I->getModule()->getDataLayout(); 1613 for (Use &U : I->uses()) { 1614 Instruction *User = cast<Instruction>(U.getUser()); 1615 1616 if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { 1617 isSafeForScalarRepl(BC, Offset, Info); 1618 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { 1619 uint64_t GEPOffset = Offset; 1620 isSafeGEP(GEPI, GEPOffset, Info); 1621 if (!Info.isUnsafe) 1622 isSafeForScalarRepl(GEPI, GEPOffset, Info); 1623 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { 1624 ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); 1625 if (!Length || Length->isNegative()) 1626 return MarkUnsafe(Info, User); 1627 1628 isSafeMemAccess(Offset, Length->getZExtValue(), nullptr, 1629 U.getOperandNo() == 0, Info, MI, 1630 true /*AllowWholeAccess*/); 1631 } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 1632 if (!LI->isSimple()) 1633 return MarkUnsafe(Info, User); 1634 Type *LIType = LI->getType(); 1635 isSafeMemAccess(Offset, DL.getTypeAllocSize(LIType), LIType, false, Info, 1636 LI, true /*AllowWholeAccess*/); 1637 Info.hasALoadOrStore = true; 1638 1639 } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 1640 // Store is ok if storing INTO the pointer, not storing the pointer 1641 if (!SI->isSimple() || SI->getOperand(0) == I) 1642 return MarkUnsafe(Info, User); 1643 1644 Type *SIType = SI->getOperand(0)->getType(); 1645 isSafeMemAccess(Offset, DL.getTypeAllocSize(SIType), SIType, true, Info, 1646 SI, true /*AllowWholeAccess*/); 1647 Info.hasALoadOrStore = true; 1648 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { 1649 if (II->getIntrinsicID() != Intrinsic::lifetime_start && 1650 II->getIntrinsicID() != Intrinsic::lifetime_end) 1651 return MarkUnsafe(Info, User); 1652 } else if (isa<PHINode>(User) || isa<SelectInst>(User)) { 1653 isSafePHISelectUseForScalarRepl(User, Offset, Info); 1654 } else { 1655 return MarkUnsafe(Info, User); 1656 } 1657 if (Info.isUnsafe) return; 1658 } 1659 } 1660 1661 1662 /// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer 1663 /// derived from the alloca, we can often still split the alloca into elements. 1664 /// This is useful if we have a large alloca where one element is phi'd 1665 /// together somewhere: we can SRoA and promote all the other elements even if 1666 /// we end up not being able to promote this one. 1667 /// 1668 /// All we require is that the uses of the PHI do not index into other parts of 1669 /// the alloca. The most important use case for this is single load and stores 1670 /// that are PHI'd together, which can happen due to code sinking. 1671 void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset, 1672 AllocaInfo &Info) { 1673 // If we've already checked this PHI, don't do it again. 1674 if (PHINode *PN = dyn_cast<PHINode>(I)) 1675 if (!Info.CheckedPHIs.insert(PN).second) 1676 return; 1677 1678 const DataLayout &DL = I->getModule()->getDataLayout(); 1679 for (User *U : I->users()) { 1680 Instruction *UI = cast<Instruction>(U); 1681 1682 if (BitCastInst *BC = dyn_cast<BitCastInst>(UI)) { 1683 isSafePHISelectUseForScalarRepl(BC, Offset, Info); 1684 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(UI)) { 1685 // Only allow "bitcast" GEPs for simplicity. We could generalize this, 1686 // but would have to prove that we're staying inside of an element being 1687 // promoted. 1688 if (!GEPI->hasAllZeroIndices()) 1689 return MarkUnsafe(Info, UI); 1690 isSafePHISelectUseForScalarRepl(GEPI, Offset, Info); 1691 } else if (LoadInst *LI = dyn_cast<LoadInst>(UI)) { 1692 if (!LI->isSimple()) 1693 return MarkUnsafe(Info, UI); 1694 Type *LIType = LI->getType(); 1695 isSafeMemAccess(Offset, DL.getTypeAllocSize(LIType), LIType, false, Info, 1696 LI, false /*AllowWholeAccess*/); 1697 Info.hasALoadOrStore = true; 1698 1699 } else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) { 1700 // Store is ok if storing INTO the pointer, not storing the pointer 1701 if (!SI->isSimple() || SI->getOperand(0) == I) 1702 return MarkUnsafe(Info, UI); 1703 1704 Type *SIType = SI->getOperand(0)->getType(); 1705 isSafeMemAccess(Offset, DL.getTypeAllocSize(SIType), SIType, true, Info, 1706 SI, false /*AllowWholeAccess*/); 1707 Info.hasALoadOrStore = true; 1708 } else if (isa<PHINode>(UI) || isa<SelectInst>(UI)) { 1709 isSafePHISelectUseForScalarRepl(UI, Offset, Info); 1710 } else { 1711 return MarkUnsafe(Info, UI); 1712 } 1713 if (Info.isUnsafe) return; 1714 } 1715 } 1716 1717 /// isSafeGEP - Check if a GEP instruction can be handled for scalar 1718 /// replacement. It is safe when all the indices are constant, in-bounds 1719 /// references, and when the resulting offset corresponds to an element within 1720 /// the alloca type. The results are flagged in the Info parameter. Upon 1721 /// return, Offset is adjusted as specified by the GEP indices. 1722 void SROA::isSafeGEP(GetElementPtrInst *GEPI, 1723 uint64_t &Offset, AllocaInfo &Info) { 1724 gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); 1725 if (GEPIt == E) 1726 return; 1727 bool NonConstant = false; 1728 unsigned NonConstantIdxSize = 0; 1729 1730 // Walk through the GEP type indices, checking the types that this indexes 1731 // into. 1732 for (; GEPIt != E; ++GEPIt) { 1733 // Ignore struct elements, no extra checking needed for these. 1734 if ((*GEPIt)->isStructTy()) 1735 continue; 1736 1737 ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand()); 1738 if (!IdxVal) 1739 return MarkUnsafe(Info, GEPI); 1740 } 1741 1742 // Compute the offset due to this GEP and check if the alloca has a 1743 // component element at that offset. 1744 SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); 1745 // If this GEP is non-constant then the last operand must have been a 1746 // dynamic index into a vector. Pop this now as it has no impact on the 1747 // constant part of the offset. 1748 if (NonConstant) 1749 Indices.pop_back(); 1750 1751 const DataLayout &DL = GEPI->getModule()->getDataLayout(); 1752 Offset += DL.getIndexedOffset(GEPI->getPointerOperandType(), Indices); 1753 if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, NonConstantIdxSize, 1754 DL)) 1755 MarkUnsafe(Info, GEPI); 1756 } 1757 1758 /// isHomogeneousAggregate - Check if type T is a struct or array containing 1759 /// elements of the same type (which is always true for arrays). If so, 1760 /// return true with NumElts and EltTy set to the number of elements and the 1761 /// element type, respectively. 1762 static bool isHomogeneousAggregate(Type *T, unsigned &NumElts, 1763 Type *&EltTy) { 1764 if (ArrayType *AT = dyn_cast<ArrayType>(T)) { 1765 NumElts = AT->getNumElements(); 1766 EltTy = (NumElts == 0 ? nullptr : AT->getElementType()); 1767 return true; 1768 } 1769 if (StructType *ST = dyn_cast<StructType>(T)) { 1770 NumElts = ST->getNumContainedTypes(); 1771 EltTy = (NumElts == 0 ? nullptr : ST->getContainedType(0)); 1772 for (unsigned n = 1; n < NumElts; ++n) { 1773 if (ST->getContainedType(n) != EltTy) 1774 return false; 1775 } 1776 return true; 1777 } 1778 return false; 1779 } 1780 1781 /// isCompatibleAggregate - Check if T1 and T2 are either the same type or are 1782 /// "homogeneous" aggregates with the same element type and number of elements. 1783 static bool isCompatibleAggregate(Type *T1, Type *T2) { 1784 if (T1 == T2) 1785 return true; 1786 1787 unsigned NumElts1, NumElts2; 1788 Type *EltTy1, *EltTy2; 1789 if (isHomogeneousAggregate(T1, NumElts1, EltTy1) && 1790 isHomogeneousAggregate(T2, NumElts2, EltTy2) && 1791 NumElts1 == NumElts2 && 1792 EltTy1 == EltTy2) 1793 return true; 1794 1795 return false; 1796 } 1797 1798 /// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI 1799 /// alloca or has an offset and size that corresponds to a component element 1800 /// within it. The offset checked here may have been formed from a GEP with a 1801 /// pointer bitcasted to a different type. 1802 /// 1803 /// If AllowWholeAccess is true, then this allows uses of the entire alloca as a 1804 /// unit. If false, it only allows accesses known to be in a single element. 1805 void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize, 1806 Type *MemOpType, bool isStore, 1807 AllocaInfo &Info, Instruction *TheAccess, 1808 bool AllowWholeAccess) { 1809 const DataLayout &DL = TheAccess->getModule()->getDataLayout(); 1810 // Check if this is a load/store of the entire alloca. 1811 if (Offset == 0 && AllowWholeAccess && 1812 MemSize == DL.getTypeAllocSize(Info.AI->getAllocatedType())) { 1813 // This can be safe for MemIntrinsics (where MemOpType is 0) and integer 1814 // loads/stores (which are essentially the same as the MemIntrinsics with 1815 // regard to copying padding between elements). But, if an alloca is 1816 // flagged as both a source and destination of such operations, we'll need 1817 // to check later for padding between elements. 1818 if (!MemOpType || MemOpType->isIntegerTy()) { 1819 if (isStore) 1820 Info.isMemCpyDst = true; 1821 else 1822 Info.isMemCpySrc = true; 1823 return; 1824 } 1825 // This is also safe for references using a type that is compatible with 1826 // the type of the alloca, so that loads/stores can be rewritten using 1827 // insertvalue/extractvalue. 1828 if (isCompatibleAggregate(MemOpType, Info.AI->getAllocatedType())) { 1829 Info.hasSubelementAccess = true; 1830 return; 1831 } 1832 } 1833 // Check if the offset/size correspond to a component within the alloca type. 1834 Type *T = Info.AI->getAllocatedType(); 1835 if (TypeHasComponent(T, Offset, MemSize, DL)) { 1836 Info.hasSubelementAccess = true; 1837 return; 1838 } 1839 1840 return MarkUnsafe(Info, TheAccess); 1841 } 1842 1843 /// TypeHasComponent - Return true if T has a component type with the 1844 /// specified offset and size. If Size is zero, do not check the size. 1845 bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size, 1846 const DataLayout &DL) { 1847 Type *EltTy; 1848 uint64_t EltSize; 1849 if (StructType *ST = dyn_cast<StructType>(T)) { 1850 const StructLayout *Layout = DL.getStructLayout(ST); 1851 unsigned EltIdx = Layout->getElementContainingOffset(Offset); 1852 EltTy = ST->getContainedType(EltIdx); 1853 EltSize = DL.getTypeAllocSize(EltTy); 1854 Offset -= Layout->getElementOffset(EltIdx); 1855 } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) { 1856 EltTy = AT->getElementType(); 1857 EltSize = DL.getTypeAllocSize(EltTy); 1858 if (Offset >= AT->getNumElements() * EltSize) 1859 return false; 1860 Offset %= EltSize; 1861 } else if (VectorType *VT = dyn_cast<VectorType>(T)) { 1862 EltTy = VT->getElementType(); 1863 EltSize = DL.getTypeAllocSize(EltTy); 1864 if (Offset >= VT->getNumElements() * EltSize) 1865 return false; 1866 Offset %= EltSize; 1867 } else { 1868 return false; 1869 } 1870 if (Offset == 0 && (Size == 0 || EltSize == Size)) 1871 return true; 1872 // Check if the component spans multiple elements. 1873 if (Offset + Size > EltSize) 1874 return false; 1875 return TypeHasComponent(EltTy, Offset, Size, DL); 1876 } 1877 1878 /// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite 1879 /// the instruction I, which references it, to use the separate elements. 1880 /// Offset indicates the position within AI that is referenced by this 1881 /// instruction. 1882 void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, 1883 SmallVectorImpl<AllocaInst *> &NewElts) { 1884 const DataLayout &DL = I->getModule()->getDataLayout(); 1885 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) { 1886 Use &TheUse = *UI++; 1887 Instruction *User = cast<Instruction>(TheUse.getUser()); 1888 1889 if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { 1890 RewriteBitCast(BC, AI, Offset, NewElts); 1891 continue; 1892 } 1893 1894 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { 1895 RewriteGEP(GEPI, AI, Offset, NewElts); 1896 continue; 1897 } 1898 1899 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { 1900 ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); 1901 uint64_t MemSize = Length->getZExtValue(); 1902 if (Offset == 0 && MemSize == DL.getTypeAllocSize(AI->getAllocatedType())) 1903 RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); 1904 // Otherwise the intrinsic can only touch a single element and the 1905 // address operand will be updated, so nothing else needs to be done. 1906 continue; 1907 } 1908 1909 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { 1910 if (II->getIntrinsicID() == Intrinsic::lifetime_start || 1911 II->getIntrinsicID() == Intrinsic::lifetime_end) { 1912 RewriteLifetimeIntrinsic(II, AI, Offset, NewElts); 1913 } 1914 continue; 1915 } 1916 1917 if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 1918 Type *LIType = LI->getType(); 1919 1920 if (isCompatibleAggregate(LIType, AI->getAllocatedType())) { 1921 // Replace: 1922 // %res = load { i32, i32 }* %alloc 1923 // with: 1924 // %load.0 = load i32* %alloc.0 1925 // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 1926 // %load.1 = load i32* %alloc.1 1927 // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 1928 // (Also works for arrays instead of structs) 1929 Value *Insert = UndefValue::get(LIType); 1930 IRBuilder<> Builder(LI); 1931 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 1932 Value *Load = Builder.CreateLoad(NewElts[i], "load"); 1933 Insert = Builder.CreateInsertValue(Insert, Load, i, "insert"); 1934 } 1935 LI->replaceAllUsesWith(Insert); 1936 DeadInsts.push_back(LI); 1937 } else if (LIType->isIntegerTy() && 1938 DL.getTypeAllocSize(LIType) == 1939 DL.getTypeAllocSize(AI->getAllocatedType())) { 1940 // If this is a load of the entire alloca to an integer, rewrite it. 1941 RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); 1942 } 1943 continue; 1944 } 1945 1946 if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 1947 Value *Val = SI->getOperand(0); 1948 Type *SIType = Val->getType(); 1949 if (isCompatibleAggregate(SIType, AI->getAllocatedType())) { 1950 // Replace: 1951 // store { i32, i32 } %val, { i32, i32 }* %alloc 1952 // with: 1953 // %val.0 = extractvalue { i32, i32 } %val, 0 1954 // store i32 %val.0, i32* %alloc.0 1955 // %val.1 = extractvalue { i32, i32 } %val, 1 1956 // store i32 %val.1, i32* %alloc.1 1957 // (Also works for arrays instead of structs) 1958 IRBuilder<> Builder(SI); 1959 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 1960 Value *Extract = Builder.CreateExtractValue(Val, i, Val->getName()); 1961 Builder.CreateStore(Extract, NewElts[i]); 1962 } 1963 DeadInsts.push_back(SI); 1964 } else if (SIType->isIntegerTy() && 1965 DL.getTypeAllocSize(SIType) == 1966 DL.getTypeAllocSize(AI->getAllocatedType())) { 1967 // If this is a store of the entire alloca from an integer, rewrite it. 1968 RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); 1969 } 1970 continue; 1971 } 1972 1973 if (isa<SelectInst>(User) || isa<PHINode>(User)) { 1974 // If we have a PHI user of the alloca itself (as opposed to a GEP or 1975 // bitcast) we have to rewrite it. GEP and bitcast uses will be RAUW'd to 1976 // the new pointer. 1977 if (!isa<AllocaInst>(I)) continue; 1978 1979 assert(Offset == 0 && NewElts[0] && 1980 "Direct alloca use should have a zero offset"); 1981 1982 // If we have a use of the alloca, we know the derived uses will be 1983 // utilizing just the first element of the scalarized result. Insert a 1984 // bitcast of the first alloca before the user as required. 1985 AllocaInst *NewAI = NewElts[0]; 1986 BitCastInst *BCI = new BitCastInst(NewAI, AI->getType(), "", NewAI); 1987 NewAI->moveBefore(BCI); 1988 TheUse = BCI; 1989 continue; 1990 } 1991 } 1992 } 1993 1994 /// RewriteBitCast - Update a bitcast reference to the alloca being replaced 1995 /// and recursively continue updating all of its uses. 1996 void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, 1997 SmallVectorImpl<AllocaInst *> &NewElts) { 1998 RewriteForScalarRepl(BC, AI, Offset, NewElts); 1999 if (BC->getOperand(0) != AI) 2000 return; 2001 2002 // The bitcast references the original alloca. Replace its uses with 2003 // references to the alloca containing offset zero (which is normally at 2004 // index zero, but might not be in cases involving structs with elements 2005 // of size zero). 2006 Type *T = AI->getAllocatedType(); 2007 uint64_t EltOffset = 0; 2008 Type *IdxTy; 2009 uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy, 2010 BC->getModule()->getDataLayout()); 2011 Instruction *Val = NewElts[Idx]; 2012 if (Val->getType() != BC->getDestTy()) { 2013 Val = new BitCastInst(Val, BC->getDestTy(), "", BC); 2014 Val->takeName(BC); 2015 } 2016 BC->replaceAllUsesWith(Val); 2017 DeadInsts.push_back(BC); 2018 } 2019 2020 /// FindElementAndOffset - Return the index of the element containing Offset 2021 /// within the specified type, which must be either a struct or an array. 2022 /// Sets T to the type of the element and Offset to the offset within that 2023 /// element. IdxTy is set to the type of the index result to be used in a 2024 /// GEP instruction. 2025 uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset, Type *&IdxTy, 2026 const DataLayout &DL) { 2027 uint64_t Idx = 0; 2028 2029 if (StructType *ST = dyn_cast<StructType>(T)) { 2030 const StructLayout *Layout = DL.getStructLayout(ST); 2031 Idx = Layout->getElementContainingOffset(Offset); 2032 T = ST->getContainedType(Idx); 2033 Offset -= Layout->getElementOffset(Idx); 2034 IdxTy = Type::getInt32Ty(T->getContext()); 2035 return Idx; 2036 } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) { 2037 T = AT->getElementType(); 2038 uint64_t EltSize = DL.getTypeAllocSize(T); 2039 Idx = Offset / EltSize; 2040 Offset -= Idx * EltSize; 2041 IdxTy = Type::getInt64Ty(T->getContext()); 2042 return Idx; 2043 } 2044 VectorType *VT = cast<VectorType>(T); 2045 T = VT->getElementType(); 2046 uint64_t EltSize = DL.getTypeAllocSize(T); 2047 Idx = Offset / EltSize; 2048 Offset -= Idx * EltSize; 2049 IdxTy = Type::getInt64Ty(T->getContext()); 2050 return Idx; 2051 } 2052 2053 /// RewriteGEP - Check if this GEP instruction moves the pointer across 2054 /// elements of the alloca that are being split apart, and if so, rewrite 2055 /// the GEP to be relative to the new element. 2056 void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, 2057 SmallVectorImpl<AllocaInst *> &NewElts) { 2058 uint64_t OldOffset = Offset; 2059 const DataLayout &DL = GEPI->getModule()->getDataLayout(); 2060 SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); 2061 // If the GEP was dynamic then it must have been a dynamic vector lookup. 2062 // In this case, it must be the last GEP operand which is dynamic so keep that 2063 // aside until we've found the constant GEP offset then add it back in at the 2064 // end. 2065 Value* NonConstantIdx = nullptr; 2066 if (!GEPI->hasAllConstantIndices()) 2067 NonConstantIdx = Indices.pop_back_val(); 2068 Offset += DL.getIndexedOffset(GEPI->getPointerOperandType(), Indices); 2069 2070 RewriteForScalarRepl(GEPI, AI, Offset, NewElts); 2071 2072 Type *T = AI->getAllocatedType(); 2073 Type *IdxTy; 2074 uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy, DL); 2075 if (GEPI->getOperand(0) == AI) 2076 OldIdx = ~0ULL; // Force the GEP to be rewritten. 2077 2078 T = AI->getAllocatedType(); 2079 uint64_t EltOffset = Offset; 2080 uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy, DL); 2081 2082 // If this GEP does not move the pointer across elements of the alloca 2083 // being split, then it does not needs to be rewritten. 2084 if (Idx == OldIdx) 2085 return; 2086 2087 Type *i32Ty = Type::getInt32Ty(AI->getContext()); 2088 SmallVector<Value*, 8> NewArgs; 2089 NewArgs.push_back(Constant::getNullValue(i32Ty)); 2090 while (EltOffset != 0) { 2091 uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy, DL); 2092 NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx)); 2093 } 2094 if (NonConstantIdx) { 2095 Type* GepTy = T; 2096 // This GEP has a dynamic index. We need to add "i32 0" to index through 2097 // any structs or arrays in the original type until we get to the vector 2098 // to index. 2099 while (!isa<VectorType>(GepTy)) { 2100 NewArgs.push_back(Constant::getNullValue(i32Ty)); 2101 GepTy = cast<CompositeType>(GepTy)->getTypeAtIndex(0U); 2102 } 2103 NewArgs.push_back(NonConstantIdx); 2104 } 2105 Instruction *Val = NewElts[Idx]; 2106 if (NewArgs.size() > 1) { 2107 Val = GetElementPtrInst::CreateInBounds(Val, NewArgs, "", GEPI); 2108 Val->takeName(GEPI); 2109 } 2110 if (Val->getType() != GEPI->getType()) 2111 Val = new BitCastInst(Val, GEPI->getType(), Val->getName(), GEPI); 2112 GEPI->replaceAllUsesWith(Val); 2113 DeadInsts.push_back(GEPI); 2114 } 2115 2116 /// RewriteLifetimeIntrinsic - II is a lifetime.start/lifetime.end. Rewrite it 2117 /// to mark the lifetime of the scalarized memory. 2118 void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, 2119 uint64_t Offset, 2120 SmallVectorImpl<AllocaInst *> &NewElts) { 2121 ConstantInt *OldSize = cast<ConstantInt>(II->getArgOperand(0)); 2122 // Put matching lifetime markers on everything from Offset up to 2123 // Offset+OldSize. 2124 Type *AIType = AI->getAllocatedType(); 2125 const DataLayout &DL = II->getModule()->getDataLayout(); 2126 uint64_t NewOffset = Offset; 2127 Type *IdxTy; 2128 uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy, DL); 2129 2130 IRBuilder<> Builder(II); 2131 uint64_t Size = OldSize->getLimitedValue(); 2132 2133 if (NewOffset) { 2134 // Splice the first element and index 'NewOffset' bytes in. SROA will 2135 // split the alloca again later. 2136 unsigned AS = AI->getType()->getAddressSpace(); 2137 Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy(AS)); 2138 V = Builder.CreateGEP(Builder.getInt8Ty(), V, Builder.getInt64(NewOffset)); 2139 2140 IdxTy = NewElts[Idx]->getAllocatedType(); 2141 uint64_t EltSize = DL.getTypeAllocSize(IdxTy) - NewOffset; 2142 if (EltSize > Size) { 2143 EltSize = Size; 2144 Size = 0; 2145 } else { 2146 Size -= EltSize; 2147 } 2148 if (II->getIntrinsicID() == Intrinsic::lifetime_start) 2149 Builder.CreateLifetimeStart(V, Builder.getInt64(EltSize)); 2150 else 2151 Builder.CreateLifetimeEnd(V, Builder.getInt64(EltSize)); 2152 ++Idx; 2153 } 2154 2155 for (; Idx != NewElts.size() && Size; ++Idx) { 2156 IdxTy = NewElts[Idx]->getAllocatedType(); 2157 uint64_t EltSize = DL.getTypeAllocSize(IdxTy); 2158 if (EltSize > Size) { 2159 EltSize = Size; 2160 Size = 0; 2161 } else { 2162 Size -= EltSize; 2163 } 2164 if (II->getIntrinsicID() == Intrinsic::lifetime_start) 2165 Builder.CreateLifetimeStart(NewElts[Idx], 2166 Builder.getInt64(EltSize)); 2167 else 2168 Builder.CreateLifetimeEnd(NewElts[Idx], 2169 Builder.getInt64(EltSize)); 2170 } 2171 DeadInsts.push_back(II); 2172 } 2173 2174 /// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. 2175 /// Rewrite it to copy or set the elements of the scalarized memory. 2176 void 2177 SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, 2178 AllocaInst *AI, 2179 SmallVectorImpl<AllocaInst *> &NewElts) { 2180 // If this is a memcpy/memmove, construct the other pointer as the 2181 // appropriate type. The "Other" pointer is the pointer that goes to memory 2182 // that doesn't have anything to do with the alloca that we are promoting. For 2183 // memset, this Value* stays null. 2184 Value *OtherPtr = nullptr; 2185 unsigned MemAlignment = MI->getAlignment(); 2186 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy 2187 if (Inst == MTI->getRawDest()) 2188 OtherPtr = MTI->getRawSource(); 2189 else { 2190 assert(Inst == MTI->getRawSource()); 2191 OtherPtr = MTI->getRawDest(); 2192 } 2193 } 2194 2195 // If there is an other pointer, we want to convert it to the same pointer 2196 // type as AI has, so we can GEP through it safely. 2197 if (OtherPtr) { 2198 unsigned AddrSpace = 2199 cast<PointerType>(OtherPtr->getType())->getAddressSpace(); 2200 2201 // Remove bitcasts and all-zero GEPs from OtherPtr. This is an 2202 // optimization, but it's also required to detect the corner case where 2203 // both pointer operands are referencing the same memory, and where 2204 // OtherPtr may be a bitcast or GEP that currently being rewritten. (This 2205 // function is only called for mem intrinsics that access the whole 2206 // aggregate, so non-zero GEPs are not an issue here.) 2207 OtherPtr = OtherPtr->stripPointerCasts(); 2208 2209 // Copying the alloca to itself is a no-op: just delete it. 2210 if (OtherPtr == AI || OtherPtr == NewElts[0]) { 2211 // This code will run twice for a no-op memcpy -- once for each operand. 2212 // Put only one reference to MI on the DeadInsts list. 2213 for (SmallVectorImpl<Value *>::const_iterator I = DeadInsts.begin(), 2214 E = DeadInsts.end(); I != E; ++I) 2215 if (*I == MI) return; 2216 DeadInsts.push_back(MI); 2217 return; 2218 } 2219 2220 // If the pointer is not the right type, insert a bitcast to the right 2221 // type. 2222 Type *NewTy = 2223 PointerType::get(AI->getType()->getElementType(), AddrSpace); 2224 2225 if (OtherPtr->getType() != NewTy) 2226 OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI); 2227 } 2228 2229 // Process each element of the aggregate. 2230 bool SROADest = MI->getRawDest() == Inst; 2231 2232 Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); 2233 const DataLayout &DL = MI->getModule()->getDataLayout(); 2234 2235 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 2236 // If this is a memcpy/memmove, emit a GEP of the other element address. 2237 Value *OtherElt = nullptr; 2238 unsigned OtherEltAlign = MemAlignment; 2239 2240 if (OtherPtr) { 2241 Value *Idx[2] = { Zero, 2242 ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; 2243 OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, 2244 OtherPtr->getName()+"."+Twine(i), 2245 MI); 2246 uint64_t EltOffset; 2247 PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType()); 2248 Type *OtherTy = OtherPtrTy->getElementType(); 2249 if (StructType *ST = dyn_cast<StructType>(OtherTy)) { 2250 EltOffset = DL.getStructLayout(ST)->getElementOffset(i); 2251 } else { 2252 Type *EltTy = cast<SequentialType>(OtherTy)->getElementType(); 2253 EltOffset = DL.getTypeAllocSize(EltTy) * i; 2254 } 2255 2256 // The alignment of the other pointer is the guaranteed alignment of the 2257 // element, which is affected by both the known alignment of the whole 2258 // mem intrinsic and the alignment of the element. If the alignment of 2259 // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the 2260 // known alignment is just 4 bytes. 2261 OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset); 2262 } 2263 2264 Value *EltPtr = NewElts[i]; 2265 Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType(); 2266 2267 // If we got down to a scalar, insert a load or store as appropriate. 2268 if (EltTy->isSingleValueType()) { 2269 if (isa<MemTransferInst>(MI)) { 2270 if (SROADest) { 2271 // From Other to Alloca. 2272 Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI); 2273 new StoreInst(Elt, EltPtr, MI); 2274 } else { 2275 // From Alloca to Other. 2276 Value *Elt = new LoadInst(EltPtr, "tmp", MI); 2277 new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI); 2278 } 2279 continue; 2280 } 2281 assert(isa<MemSetInst>(MI)); 2282 2283 // If the stored element is zero (common case), just store a null 2284 // constant. 2285 Constant *StoreVal; 2286 if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getArgOperand(1))) { 2287 if (CI->isZero()) { 2288 StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> 2289 } else { 2290 // If EltTy is a vector type, get the element type. 2291 Type *ValTy = EltTy->getScalarType(); 2292 2293 // Construct an integer with the right value. 2294 unsigned EltSize = DL.getTypeSizeInBits(ValTy); 2295 APInt OneVal(EltSize, CI->getZExtValue()); 2296 APInt TotalVal(OneVal); 2297 // Set each byte. 2298 for (unsigned i = 0; 8*i < EltSize; ++i) { 2299 TotalVal = TotalVal.shl(8); 2300 TotalVal |= OneVal; 2301 } 2302 2303 // Convert the integer value to the appropriate type. 2304 StoreVal = ConstantInt::get(CI->getContext(), TotalVal); 2305 if (ValTy->isPointerTy()) 2306 StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy); 2307 else if (ValTy->isFloatingPointTy()) 2308 StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy); 2309 assert(StoreVal->getType() == ValTy && "Type mismatch!"); 2310 2311 // If the requested value was a vector constant, create it. 2312 if (EltTy->isVectorTy()) { 2313 unsigned NumElts = cast<VectorType>(EltTy)->getNumElements(); 2314 StoreVal = ConstantVector::getSplat(NumElts, StoreVal); 2315 } 2316 } 2317 new StoreInst(StoreVal, EltPtr, MI); 2318 continue; 2319 } 2320 // Otherwise, if we're storing a byte variable, use a memset call for 2321 // this element. 2322 } 2323 2324 unsigned EltSize = DL.getTypeAllocSize(EltTy); 2325 if (!EltSize) 2326 continue; 2327 2328 IRBuilder<> Builder(MI); 2329 2330 // Finally, insert the meminst for this element. 2331 if (isa<MemSetInst>(MI)) { 2332 Builder.CreateMemSet(EltPtr, MI->getArgOperand(1), EltSize, 2333 MI->isVolatile()); 2334 } else { 2335 assert(isa<MemTransferInst>(MI)); 2336 Value *Dst = SROADest ? EltPtr : OtherElt; // Dest ptr 2337 Value *Src = SROADest ? OtherElt : EltPtr; // Src ptr 2338 2339 if (isa<MemCpyInst>(MI)) 2340 Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,MI->isVolatile()); 2341 else 2342 Builder.CreateMemMove(Dst, Src, EltSize,OtherEltAlign,MI->isVolatile()); 2343 } 2344 } 2345 DeadInsts.push_back(MI); 2346 } 2347 2348 /// RewriteStoreUserOfWholeAlloca - We found a store of an integer that 2349 /// overwrites the entire allocation. Extract out the pieces of the stored 2350 /// integer and store them individually. 2351 void 2352 SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, 2353 SmallVectorImpl<AllocaInst *> &NewElts) { 2354 // Extract each element out of the integer according to its structure offset 2355 // and store the element value to the individual alloca. 2356 Value *SrcVal = SI->getOperand(0); 2357 Type *AllocaEltTy = AI->getAllocatedType(); 2358 const DataLayout &DL = SI->getModule()->getDataLayout(); 2359 uint64_t AllocaSizeBits = DL.getTypeAllocSizeInBits(AllocaEltTy); 2360 2361 IRBuilder<> Builder(SI); 2362 2363 // Handle tail padding by extending the operand 2364 if (DL.getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) 2365 SrcVal = Builder.CreateZExt(SrcVal, 2366 IntegerType::get(SI->getContext(), AllocaSizeBits)); 2367 2368 DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI 2369 << '\n'); 2370 2371 // There are two forms here: AI could be an array or struct. Both cases 2372 // have different ways to compute the element offset. 2373 if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { 2374 const StructLayout *Layout = DL.getStructLayout(EltSTy); 2375 2376 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 2377 // Get the number of bits to shift SrcVal to get the value. 2378 Type *FieldTy = EltSTy->getElementType(i); 2379 uint64_t Shift = Layout->getElementOffsetInBits(i); 2380 2381 if (DL.isBigEndian()) 2382 Shift = AllocaSizeBits - Shift - DL.getTypeAllocSizeInBits(FieldTy); 2383 2384 Value *EltVal = SrcVal; 2385 if (Shift) { 2386 Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); 2387 EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt"); 2388 } 2389 2390 // Truncate down to an integer of the right size. 2391 uint64_t FieldSizeBits = DL.getTypeSizeInBits(FieldTy); 2392 2393 // Ignore zero sized fields like {}, they obviously contain no data. 2394 if (FieldSizeBits == 0) continue; 2395 2396 if (FieldSizeBits != AllocaSizeBits) 2397 EltVal = Builder.CreateTrunc(EltVal, 2398 IntegerType::get(SI->getContext(), FieldSizeBits)); 2399 Value *DestField = NewElts[i]; 2400 if (EltVal->getType() == FieldTy) { 2401 // Storing to an integer field of this size, just do it. 2402 } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) { 2403 // Bitcast to the right element type (for fp/vector values). 2404 EltVal = Builder.CreateBitCast(EltVal, FieldTy); 2405 } else { 2406 // Otherwise, bitcast the dest pointer (for aggregates). 2407 DestField = Builder.CreateBitCast(DestField, 2408 PointerType::getUnqual(EltVal->getType())); 2409 } 2410 new StoreInst(EltVal, DestField, SI); 2411 } 2412 2413 } else { 2414 ArrayType *ATy = cast<ArrayType>(AllocaEltTy); 2415 Type *ArrayEltTy = ATy->getElementType(); 2416 uint64_t ElementOffset = DL.getTypeAllocSizeInBits(ArrayEltTy); 2417 uint64_t ElementSizeBits = DL.getTypeSizeInBits(ArrayEltTy); 2418 2419 uint64_t Shift; 2420 2421 if (DL.isBigEndian()) 2422 Shift = AllocaSizeBits-ElementOffset; 2423 else 2424 Shift = 0; 2425 2426 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 2427 // Ignore zero sized fields like {}, they obviously contain no data. 2428 if (ElementSizeBits == 0) continue; 2429 2430 Value *EltVal = SrcVal; 2431 if (Shift) { 2432 Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); 2433 EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt"); 2434 } 2435 2436 // Truncate down to an integer of the right size. 2437 if (ElementSizeBits != AllocaSizeBits) 2438 EltVal = Builder.CreateTrunc(EltVal, 2439 IntegerType::get(SI->getContext(), 2440 ElementSizeBits)); 2441 Value *DestField = NewElts[i]; 2442 if (EltVal->getType() == ArrayEltTy) { 2443 // Storing to an integer field of this size, just do it. 2444 } else if (ArrayEltTy->isFloatingPointTy() || 2445 ArrayEltTy->isVectorTy()) { 2446 // Bitcast to the right element type (for fp/vector values). 2447 EltVal = Builder.CreateBitCast(EltVal, ArrayEltTy); 2448 } else { 2449 // Otherwise, bitcast the dest pointer (for aggregates). 2450 DestField = Builder.CreateBitCast(DestField, 2451 PointerType::getUnqual(EltVal->getType())); 2452 } 2453 new StoreInst(EltVal, DestField, SI); 2454 2455 if (DL.isBigEndian()) 2456 Shift -= ElementOffset; 2457 else 2458 Shift += ElementOffset; 2459 } 2460 } 2461 2462 DeadInsts.push_back(SI); 2463 } 2464 2465 /// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to 2466 /// an integer. Load the individual pieces to form the aggregate value. 2467 void 2468 SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, 2469 SmallVectorImpl<AllocaInst *> &NewElts) { 2470 // Extract each element out of the NewElts according to its structure offset 2471 // and form the result value. 2472 Type *AllocaEltTy = AI->getAllocatedType(); 2473 const DataLayout &DL = LI->getModule()->getDataLayout(); 2474 uint64_t AllocaSizeBits = DL.getTypeAllocSizeInBits(AllocaEltTy); 2475 2476 DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI 2477 << '\n'); 2478 2479 // There are two forms here: AI could be an array or struct. Both cases 2480 // have different ways to compute the element offset. 2481 const StructLayout *Layout = nullptr; 2482 uint64_t ArrayEltBitOffset = 0; 2483 if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { 2484 Layout = DL.getStructLayout(EltSTy); 2485 } else { 2486 Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType(); 2487 ArrayEltBitOffset = DL.getTypeAllocSizeInBits(ArrayEltTy); 2488 } 2489 2490 Value *ResultVal = 2491 Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits)); 2492 2493 for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 2494 // Load the value from the alloca. If the NewElt is an aggregate, cast 2495 // the pointer to an integer of the same size before doing the load. 2496 Value *SrcField = NewElts[i]; 2497 Type *FieldTy = 2498 cast<PointerType>(SrcField->getType())->getElementType(); 2499 uint64_t FieldSizeBits = DL.getTypeSizeInBits(FieldTy); 2500 2501 // Ignore zero sized fields like {}, they obviously contain no data. 2502 if (FieldSizeBits == 0) continue; 2503 2504 IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), 2505 FieldSizeBits); 2506 if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() && 2507 !FieldTy->isVectorTy()) 2508 SrcField = new BitCastInst(SrcField, 2509 PointerType::getUnqual(FieldIntTy), 2510 "", LI); 2511 SrcField = new LoadInst(SrcField, "sroa.load.elt", LI); 2512 2513 // If SrcField is a fp or vector of the right size but that isn't an 2514 // integer type, bitcast to an integer so we can shift it. 2515 if (SrcField->getType() != FieldIntTy) 2516 SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI); 2517 2518 // Zero extend the field to be the same size as the final alloca so that 2519 // we can shift and insert it. 2520 if (SrcField->getType() != ResultVal->getType()) 2521 SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI); 2522 2523 // Determine the number of bits to shift SrcField. 2524 uint64_t Shift; 2525 if (Layout) // Struct case. 2526 Shift = Layout->getElementOffsetInBits(i); 2527 else // Array case. 2528 Shift = i*ArrayEltBitOffset; 2529 2530 if (DL.isBigEndian()) 2531 Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth(); 2532 2533 if (Shift) { 2534 Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift); 2535 SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI); 2536 } 2537 2538 // Don't create an 'or x, 0' on the first iteration. 2539 if (!isa<Constant>(ResultVal) || 2540 !cast<Constant>(ResultVal)->isNullValue()) 2541 ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI); 2542 else 2543 ResultVal = SrcField; 2544 } 2545 2546 // Handle tail padding by truncating the result 2547 if (DL.getTypeSizeInBits(LI->getType()) != AllocaSizeBits) 2548 ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI); 2549 2550 LI->replaceAllUsesWith(ResultVal); 2551 DeadInsts.push_back(LI); 2552 } 2553 2554 /// HasPadding - Return true if the specified type has any structure or 2555 /// alignment padding in between the elements that would be split apart 2556 /// by SROA; return false otherwise. 2557 static bool HasPadding(Type *Ty, const DataLayout &DL) { 2558 if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 2559 Ty = ATy->getElementType(); 2560 return DL.getTypeSizeInBits(Ty) != DL.getTypeAllocSizeInBits(Ty); 2561 } 2562 2563 // SROA currently handles only Arrays and Structs. 2564 StructType *STy = cast<StructType>(Ty); 2565 const StructLayout *SL = DL.getStructLayout(STy); 2566 unsigned PrevFieldBitOffset = 0; 2567 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 2568 unsigned FieldBitOffset = SL->getElementOffsetInBits(i); 2569 2570 // Check to see if there is any padding between this element and the 2571 // previous one. 2572 if (i) { 2573 unsigned PrevFieldEnd = 2574 PrevFieldBitOffset+DL.getTypeSizeInBits(STy->getElementType(i-1)); 2575 if (PrevFieldEnd < FieldBitOffset) 2576 return true; 2577 } 2578 PrevFieldBitOffset = FieldBitOffset; 2579 } 2580 // Check for tail padding. 2581 if (unsigned EltCount = STy->getNumElements()) { 2582 unsigned PrevFieldEnd = PrevFieldBitOffset + 2583 DL.getTypeSizeInBits(STy->getElementType(EltCount-1)); 2584 if (PrevFieldEnd < SL->getSizeInBits()) 2585 return true; 2586 } 2587 return false; 2588 } 2589 2590 /// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of 2591 /// an aggregate can be broken down into elements. Return 0 if not, 3 if safe, 2592 /// or 1 if safe after canonicalization has been performed. 2593 bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { 2594 // Loop over the use list of the alloca. We can only transform it if all of 2595 // the users are safe to transform. 2596 AllocaInfo Info(AI); 2597 2598 isSafeForScalarRepl(AI, 0, Info); 2599 if (Info.isUnsafe) { 2600 DEBUG(dbgs() << "Cannot transform: " << *AI << '\n'); 2601 return false; 2602 } 2603 2604 const DataLayout &DL = AI->getModule()->getDataLayout(); 2605 2606 // Okay, we know all the users are promotable. If the aggregate is a memcpy 2607 // source and destination, we have to be careful. In particular, the memcpy 2608 // could be moving around elements that live in structure padding of the LLVM 2609 // types, but may actually be used. In these cases, we refuse to promote the 2610 // struct. 2611 if (Info.isMemCpySrc && Info.isMemCpyDst && 2612 HasPadding(AI->getAllocatedType(), DL)) 2613 return false; 2614 2615 // If the alloca never has an access to just *part* of it, but is accessed 2616 // via loads and stores, then we should use ConvertToScalarInfo to promote 2617 // the alloca instead of promoting each piece at a time and inserting fission 2618 // and fusion code. 2619 if (!Info.hasSubelementAccess && Info.hasALoadOrStore) { 2620 // If the struct/array just has one element, use basic SRoA. 2621 if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { 2622 if (ST->getNumElements() > 1) return false; 2623 } else { 2624 if (cast<ArrayType>(AI->getAllocatedType())->getNumElements() > 1) 2625 return false; 2626 } 2627 } 2628 2629 return true; 2630 } 2631