1 //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the visit functions for load, store and alloca. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "InstCombine.h" 15 #include "llvm/ADT/Statistic.h" 16 #include "llvm/Analysis/Loads.h" 17 #include "llvm/IR/DataLayout.h" 18 #include "llvm/IR/IntrinsicInst.h" 19 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 20 #include "llvm/Transforms/Utils/Local.h" 21 using namespace llvm; 22 23 #define DEBUG_TYPE "instcombine" 24 25 STATISTIC(NumDeadStore, "Number of dead stores eliminated"); 26 STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); 27 28 /// pointsToConstantGlobal - Return true if V (possibly indirectly) points to 29 /// some part of a constant global variable. This intentionally only accepts 30 /// constant expressions because we can't rewrite arbitrary instructions. 31 static bool pointsToConstantGlobal(Value *V) { 32 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 33 return GV->isConstant(); 34 35 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { 36 if (CE->getOpcode() == Instruction::BitCast || 37 CE->getOpcode() == Instruction::AddrSpaceCast || 38 CE->getOpcode() == Instruction::GetElementPtr) 39 return pointsToConstantGlobal(CE->getOperand(0)); 40 } 41 return false; 42 } 43 44 /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) 45 /// pointer to an alloca. Ignore any reads of the pointer, return false if we 46 /// see any stores or other unknown uses. If we see pointer arithmetic, keep 47 /// track of whether it moves the pointer (with IsOffset) but otherwise traverse 48 /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to 49 /// the alloca, and if the source pointer is a pointer to a constant global, we 50 /// can optimize this. 51 static bool 52 isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, 53 SmallVectorImpl<Instruction *> &ToDelete) { 54 // We track lifetime intrinsics as we encounter them. If we decide to go 55 // ahead and replace the value with the global, this lets the caller quickly 56 // eliminate the markers. 57 58 SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect; 59 ValuesToInspect.push_back(std::make_pair(V, false)); 60 while (!ValuesToInspect.empty()) { 61 auto ValuePair = ValuesToInspect.pop_back_val(); 62 const bool IsOffset = ValuePair.second; 63 for (auto &U : ValuePair.first->uses()) { 64 Instruction *I = cast<Instruction>(U.getUser()); 65 66 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 67 // Ignore non-volatile loads, they are always ok. 68 if (!LI->isSimple()) return false; 69 continue; 70 } 71 72 if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) { 73 // If uses of the bitcast are ok, we are ok. 74 ValuesToInspect.push_back(std::make_pair(I, IsOffset)); 75 continue; 76 } 77 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { 78 // If the GEP has all zero indices, it doesn't offset the pointer. If it 79 // doesn't, it does. 80 ValuesToInspect.push_back( 81 std::make_pair(I, IsOffset || !GEP->hasAllZeroIndices())); 82 continue; 83 } 84 85 if (CallSite CS = I) { 86 // If this is the function being called then we treat it like a load and 87 // ignore it. 88 if (CS.isCallee(&U)) 89 continue; 90 91 // Inalloca arguments are clobbered by the call. 92 unsigned ArgNo = CS.getArgumentNo(&U); 93 if (CS.isInAllocaArgument(ArgNo)) 94 return false; 95 96 // If this is a readonly/readnone call site, then we know it is just a 97 // load (but one that potentially returns the value itself), so we can 98 // ignore it if we know that the value isn't captured. 99 if (CS.onlyReadsMemory() && 100 (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) 101 continue; 102 103 // If this is being passed as a byval argument, the caller is making a 104 // copy, so it is only a read of the alloca. 105 if (CS.isByValArgument(ArgNo)) 106 continue; 107 } 108 109 // Lifetime intrinsics can be handled by the caller. 110 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 111 if (II->getIntrinsicID() == Intrinsic::lifetime_start || 112 II->getIntrinsicID() == Intrinsic::lifetime_end) { 113 assert(II->use_empty() && "Lifetime markers have no result to use!"); 114 ToDelete.push_back(II); 115 continue; 116 } 117 } 118 119 // If this is isn't our memcpy/memmove, reject it as something we can't 120 // handle. 121 MemTransferInst *MI = dyn_cast<MemTransferInst>(I); 122 if (!MI) 123 return false; 124 125 // If the transfer is using the alloca as a source of the transfer, then 126 // ignore it since it is a load (unless the transfer is volatile). 127 if (U.getOperandNo() == 1) { 128 if (MI->isVolatile()) return false; 129 continue; 130 } 131 132 // If we already have seen a copy, reject the second one. 133 if (TheCopy) return false; 134 135 // If the pointer has been offset from the start of the alloca, we can't 136 // safely handle this. 137 if (IsOffset) return false; 138 139 // If the memintrinsic isn't using the alloca as the dest, reject it. 140 if (U.getOperandNo() != 0) return false; 141 142 // If the source of the memcpy/move is not a constant global, reject it. 143 if (!pointsToConstantGlobal(MI->getSource())) 144 return false; 145 146 // Otherwise, the transform is safe. Remember the copy instruction. 147 TheCopy = MI; 148 } 149 } 150 return true; 151 } 152 153 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only 154 /// modified by a copy from a constant global. If we can prove this, we can 155 /// replace any uses of the alloca with uses of the global directly. 156 static MemTransferInst * 157 isOnlyCopiedFromConstantGlobal(AllocaInst *AI, 158 SmallVectorImpl<Instruction *> &ToDelete) { 159 MemTransferInst *TheCopy = nullptr; 160 if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete)) 161 return TheCopy; 162 return nullptr; 163 } 164 165 Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { 166 // Ensure that the alloca array size argument has type intptr_t, so that 167 // any casting is exposed early. 168 if (DL) { 169 Type *IntPtrTy = DL->getIntPtrType(AI.getType()); 170 if (AI.getArraySize()->getType() != IntPtrTy) { 171 Value *V = Builder->CreateIntCast(AI.getArraySize(), 172 IntPtrTy, false); 173 AI.setOperand(0, V); 174 return &AI; 175 } 176 } 177 178 // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 179 if (AI.isArrayAllocation()) { // Check C != 1 180 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { 181 Type *NewTy = 182 ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); 183 AllocaInst *New = Builder->CreateAlloca(NewTy, nullptr, AI.getName()); 184 New->setAlignment(AI.getAlignment()); 185 186 // Scan to the end of the allocation instructions, to skip over a block of 187 // allocas if possible...also skip interleaved debug info 188 // 189 BasicBlock::iterator It = New; 190 while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It; 191 192 // Now that I is pointing to the first non-allocation-inst in the block, 193 // insert our getelementptr instruction... 194 // 195 Type *IdxTy = DL 196 ? DL->getIntPtrType(AI.getType()) 197 : Type::getInt64Ty(AI.getContext()); 198 Value *NullIdx = Constant::getNullValue(IdxTy); 199 Value *Idx[2] = { NullIdx, NullIdx }; 200 Instruction *GEP = 201 GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub"); 202 InsertNewInstBefore(GEP, *It); 203 204 // Now make everything use the getelementptr instead of the original 205 // allocation. 206 return ReplaceInstUsesWith(AI, GEP); 207 } else if (isa<UndefValue>(AI.getArraySize())) { 208 return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); 209 } 210 } 211 212 if (DL && AI.getAllocatedType()->isSized()) { 213 // If the alignment is 0 (unspecified), assign it the preferred alignment. 214 if (AI.getAlignment() == 0) 215 AI.setAlignment(DL->getPrefTypeAlignment(AI.getAllocatedType())); 216 217 // Move all alloca's of zero byte objects to the entry block and merge them 218 // together. Note that we only do this for alloca's, because malloc should 219 // allocate and return a unique pointer, even for a zero byte allocation. 220 if (DL->getTypeAllocSize(AI.getAllocatedType()) == 0) { 221 // For a zero sized alloca there is no point in doing an array allocation. 222 // This is helpful if the array size is a complicated expression not used 223 // elsewhere. 224 if (AI.isArrayAllocation()) { 225 AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1)); 226 return &AI; 227 } 228 229 // Get the first instruction in the entry block. 230 BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock(); 231 Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg(); 232 if (FirstInst != &AI) { 233 // If the entry block doesn't start with a zero-size alloca then move 234 // this one to the start of the entry block. There is no problem with 235 // dominance as the array size was forced to a constant earlier already. 236 AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst); 237 if (!EntryAI || !EntryAI->getAllocatedType()->isSized() || 238 DL->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { 239 AI.moveBefore(FirstInst); 240 return &AI; 241 } 242 243 // If the alignment of the entry block alloca is 0 (unspecified), 244 // assign it the preferred alignment. 245 if (EntryAI->getAlignment() == 0) 246 EntryAI->setAlignment( 247 DL->getPrefTypeAlignment(EntryAI->getAllocatedType())); 248 // Replace this zero-sized alloca with the one at the start of the entry 249 // block after ensuring that the address will be aligned enough for both 250 // types. 251 unsigned MaxAlign = std::max(EntryAI->getAlignment(), 252 AI.getAlignment()); 253 EntryAI->setAlignment(MaxAlign); 254 if (AI.getType() != EntryAI->getType()) 255 return new BitCastInst(EntryAI, AI.getType()); 256 return ReplaceInstUsesWith(AI, EntryAI); 257 } 258 } 259 } 260 261 if (AI.getAlignment()) { 262 // Check to see if this allocation is only modified by a memcpy/memmove from 263 // a constant global whose alignment is equal to or exceeds that of the 264 // allocation. If this is the case, we can change all users to use 265 // the constant global instead. This is commonly produced by the CFE by 266 // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' 267 // is only subsequently read. 268 SmallVector<Instruction *, 4> ToDelete; 269 if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { 270 unsigned SourceAlign = getOrEnforceKnownAlignment(Copy->getSource(), 271 AI.getAlignment(), DL); 272 if (AI.getAlignment() <= SourceAlign) { 273 DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); 274 DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); 275 for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) 276 EraseInstFromFunction(*ToDelete[i]); 277 Constant *TheSrc = cast<Constant>(Copy->getSource()); 278 Constant *Cast 279 = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType()); 280 Instruction *NewI = ReplaceInstUsesWith(AI, Cast); 281 EraseInstFromFunction(*Copy); 282 ++NumGlobalCopies; 283 return NewI; 284 } 285 } 286 } 287 288 // At last, use the generic allocation site handler to aggressively remove 289 // unused allocas. 290 return visitAllocSite(AI); 291 } 292 293 294 /// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible. 295 static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, 296 const DataLayout *DL) { 297 User *CI = cast<User>(LI.getOperand(0)); 298 Value *CastOp = CI->getOperand(0); 299 300 PointerType *DestTy = cast<PointerType>(CI->getType()); 301 Type *DestPTy = DestTy->getElementType(); 302 if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { 303 304 // If the address spaces don't match, don't eliminate the cast. 305 if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) 306 return nullptr; 307 308 Type *SrcPTy = SrcTy->getElementType(); 309 310 if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || 311 DestPTy->isVectorTy()) { 312 // If the source is an array, the code below will not succeed. Check to 313 // see if a trivial 'gep P, 0, 0' will help matters. Only do this for 314 // constants. 315 if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) 316 if (Constant *CSrc = dyn_cast<Constant>(CastOp)) 317 if (ASrcTy->getNumElements() != 0) { 318 Type *IdxTy = DL 319 ? DL->getIntPtrType(SrcTy) 320 : Type::getInt64Ty(SrcTy->getContext()); 321 Value *Idx = Constant::getNullValue(IdxTy); 322 Value *Idxs[2] = { Idx, Idx }; 323 CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs); 324 SrcTy = cast<PointerType>(CastOp->getType()); 325 SrcPTy = SrcTy->getElementType(); 326 } 327 328 if (IC.getDataLayout() && 329 (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || 330 SrcPTy->isVectorTy()) && 331 // Do not allow turning this into a load of an integer, which is then 332 // casted to a pointer, this pessimizes pointer analysis a lot. 333 (SrcPTy->isPtrOrPtrVectorTy() == 334 LI.getType()->isPtrOrPtrVectorTy()) && 335 IC.getDataLayout()->getTypeSizeInBits(SrcPTy) == 336 IC.getDataLayout()->getTypeSizeInBits(DestPTy)) { 337 338 // Okay, we are casting from one integer or pointer type to another of 339 // the same size. Instead of casting the pointer before the load, cast 340 // the result of the loaded value. 341 LoadInst *NewLoad = 342 IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); 343 NewLoad->setAlignment(LI.getAlignment()); 344 NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); 345 // Now cast the result of the load. 346 PointerType *OldTy = dyn_cast<PointerType>(NewLoad->getType()); 347 PointerType *NewTy = dyn_cast<PointerType>(LI.getType()); 348 if (OldTy && NewTy && 349 OldTy->getAddressSpace() != NewTy->getAddressSpace()) { 350 return new AddrSpaceCastInst(NewLoad, LI.getType()); 351 } 352 353 return new BitCastInst(NewLoad, LI.getType()); 354 } 355 } 356 } 357 return nullptr; 358 } 359 360 Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { 361 Value *Op = LI.getOperand(0); 362 363 // Attempt to improve the alignment. 364 if (DL) { 365 unsigned KnownAlign = 366 getOrEnforceKnownAlignment(Op, DL->getPrefTypeAlignment(LI.getType()),DL); 367 unsigned LoadAlign = LI.getAlignment(); 368 unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign : 369 DL->getABITypeAlignment(LI.getType()); 370 371 if (KnownAlign > EffectiveLoadAlign) 372 LI.setAlignment(KnownAlign); 373 else if (LoadAlign == 0) 374 LI.setAlignment(EffectiveLoadAlign); 375 } 376 377 // load (cast X) --> cast (load X) iff safe. 378 if (isa<CastInst>(Op)) 379 if (Instruction *Res = InstCombineLoadCast(*this, LI, DL)) 380 return Res; 381 382 // None of the following transforms are legal for volatile/atomic loads. 383 // FIXME: Some of it is okay for atomic loads; needs refactoring. 384 if (!LI.isSimple()) return nullptr; 385 386 // Do really simple store-to-load forwarding and load CSE, to catch cases 387 // where there are several consecutive memory accesses to the same location, 388 // separated by a few arithmetic operations. 389 BasicBlock::iterator BBI = &LI; 390 if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) 391 return ReplaceInstUsesWith(LI, AvailableVal); 392 393 // load(gep null, ...) -> unreachable 394 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { 395 const Value *GEPI0 = GEPI->getOperand(0); 396 // TODO: Consider a target hook for valid address spaces for this xform. 397 if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){ 398 // Insert a new store to null instruction before the load to indicate 399 // that this code is not reachable. We do this instead of inserting 400 // an unreachable instruction directly because we cannot modify the 401 // CFG. 402 new StoreInst(UndefValue::get(LI.getType()), 403 Constant::getNullValue(Op->getType()), &LI); 404 return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 405 } 406 } 407 408 // load null/undef -> unreachable 409 // TODO: Consider a target hook for valid address spaces for this xform. 410 if (isa<UndefValue>(Op) || 411 (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) { 412 // Insert a new store to null instruction before the load to indicate that 413 // this code is not reachable. We do this instead of inserting an 414 // unreachable instruction directly because we cannot modify the CFG. 415 new StoreInst(UndefValue::get(LI.getType()), 416 Constant::getNullValue(Op->getType()), &LI); 417 return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 418 } 419 420 // Instcombine load (constantexpr_cast global) -> cast (load global) 421 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) 422 if (CE->isCast()) 423 if (Instruction *Res = InstCombineLoadCast(*this, LI, DL)) 424 return Res; 425 426 if (Op->hasOneUse()) { 427 // Change select and PHI nodes to select values instead of addresses: this 428 // helps alias analysis out a lot, allows many others simplifications, and 429 // exposes redundancy in the code. 430 // 431 // Note that we cannot do the transformation unless we know that the 432 // introduced loads cannot trap! Something like this is valid as long as 433 // the condition is always false: load (select bool %C, int* null, int* %G), 434 // but it would not be valid if we transformed it to load from null 435 // unconditionally. 436 // 437 if (SelectInst *SI = dyn_cast<SelectInst>(Op)) { 438 // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). 439 unsigned Align = LI.getAlignment(); 440 if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, DL) && 441 isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, DL)) { 442 LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), 443 SI->getOperand(1)->getName()+".val"); 444 LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), 445 SI->getOperand(2)->getName()+".val"); 446 V1->setAlignment(Align); 447 V2->setAlignment(Align); 448 return SelectInst::Create(SI->getCondition(), V1, V2); 449 } 450 451 // load (select (cond, null, P)) -> load P 452 if (Constant *C = dyn_cast<Constant>(SI->getOperand(1))) 453 if (C->isNullValue()) { 454 LI.setOperand(0, SI->getOperand(2)); 455 return &LI; 456 } 457 458 // load (select (cond, P, null)) -> load P 459 if (Constant *C = dyn_cast<Constant>(SI->getOperand(2))) 460 if (C->isNullValue()) { 461 LI.setOperand(0, SI->getOperand(1)); 462 return &LI; 463 } 464 } 465 } 466 return nullptr; 467 } 468 469 /// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P 470 /// when possible. This makes it generally easy to do alias analysis and/or 471 /// SROA/mem2reg of the memory object. 472 static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { 473 User *CI = cast<User>(SI.getOperand(1)); 474 Value *CastOp = CI->getOperand(0); 475 476 Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); 477 PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); 478 if (!SrcTy) return nullptr; 479 480 Type *SrcPTy = SrcTy->getElementType(); 481 482 if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) 483 return nullptr; 484 485 /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" 486 /// to its first element. This allows us to handle things like: 487 /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) 488 /// on 32-bit hosts. 489 SmallVector<Value*, 4> NewGEPIndices; 490 491 // If the source is an array, the code below will not succeed. Check to 492 // see if a trivial 'gep P, 0, 0' will help matters. Only do this for 493 // constants. 494 if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) { 495 // Index through pointer. 496 Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext())); 497 NewGEPIndices.push_back(Zero); 498 499 while (1) { 500 if (StructType *STy = dyn_cast<StructType>(SrcPTy)) { 501 if (!STy->getNumElements()) /* Struct can be empty {} */ 502 break; 503 NewGEPIndices.push_back(Zero); 504 SrcPTy = STy->getElementType(0); 505 } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) { 506 NewGEPIndices.push_back(Zero); 507 SrcPTy = ATy->getElementType(); 508 } else { 509 break; 510 } 511 } 512 513 SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); 514 } 515 516 if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy()) 517 return nullptr; 518 519 // If the pointers point into different address spaces don't do the 520 // transformation. 521 if (SrcTy->getAddressSpace() != 522 cast<PointerType>(CI->getType())->getAddressSpace()) 523 return nullptr; 524 525 // If the pointers point to values of different sizes don't do the 526 // transformation. 527 if (!IC.getDataLayout() || 528 IC.getDataLayout()->getTypeSizeInBits(SrcPTy) != 529 IC.getDataLayout()->getTypeSizeInBits(DestPTy)) 530 return nullptr; 531 532 // If the pointers point to pointers to different address spaces don't do the 533 // transformation. It is not safe to introduce an addrspacecast instruction in 534 // this case since, depending on the target, addrspacecast may not be a no-op 535 // cast. 536 if (SrcPTy->isPointerTy() && DestPTy->isPointerTy() && 537 SrcPTy->getPointerAddressSpace() != DestPTy->getPointerAddressSpace()) 538 return nullptr; 539 540 // Okay, we are casting from one integer or pointer type to another of 541 // the same size. Instead of casting the pointer before 542 // the store, cast the value to be stored. 543 Value *NewCast; 544 Instruction::CastOps opcode = Instruction::BitCast; 545 Type* CastSrcTy = DestPTy; 546 Type* CastDstTy = SrcPTy; 547 if (CastDstTy->isPointerTy()) { 548 if (CastSrcTy->isIntegerTy()) 549 opcode = Instruction::IntToPtr; 550 } else if (CastDstTy->isIntegerTy()) { 551 if (CastSrcTy->isPointerTy()) 552 opcode = Instruction::PtrToInt; 553 } 554 555 // SIOp0 is a pointer to aggregate and this is a store to the first field, 556 // emit a GEP to index into its first field. 557 if (!NewGEPIndices.empty()) 558 CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices); 559 560 Value *SIOp0 = SI.getOperand(0); 561 NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, 562 SIOp0->getName()+".c"); 563 SI.setOperand(0, NewCast); 564 SI.setOperand(1, CastOp); 565 return &SI; 566 } 567 568 /// equivalentAddressValues - Test if A and B will obviously have the same 569 /// value. This includes recognizing that %t0 and %t1 will have the same 570 /// value in code like this: 571 /// %t0 = getelementptr \@a, 0, 3 572 /// store i32 0, i32* %t0 573 /// %t1 = getelementptr \@a, 0, 3 574 /// %t2 = load i32* %t1 575 /// 576 static bool equivalentAddressValues(Value *A, Value *B) { 577 // Test if the values are trivially equivalent. 578 if (A == B) return true; 579 580 // Test if the values come form identical arithmetic instructions. 581 // This uses isIdenticalToWhenDefined instead of isIdenticalTo because 582 // its only used to compare two uses within the same basic block, which 583 // means that they'll always either have the same value or one of them 584 // will have an undefined value. 585 if (isa<BinaryOperator>(A) || 586 isa<CastInst>(A) || 587 isa<PHINode>(A) || 588 isa<GetElementPtrInst>(A)) 589 if (Instruction *BI = dyn_cast<Instruction>(B)) 590 if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) 591 return true; 592 593 // Otherwise they may not be equivalent. 594 return false; 595 } 596 597 Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { 598 Value *Val = SI.getOperand(0); 599 Value *Ptr = SI.getOperand(1); 600 601 // Attempt to improve the alignment. 602 if (DL) { 603 unsigned KnownAlign = 604 getOrEnforceKnownAlignment(Ptr, DL->getPrefTypeAlignment(Val->getType()), 605 DL); 606 unsigned StoreAlign = SI.getAlignment(); 607 unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : 608 DL->getABITypeAlignment(Val->getType()); 609 610 if (KnownAlign > EffectiveStoreAlign) 611 SI.setAlignment(KnownAlign); 612 else if (StoreAlign == 0) 613 SI.setAlignment(EffectiveStoreAlign); 614 } 615 616 // Don't hack volatile/atomic stores. 617 // FIXME: Some bits are legal for atomic stores; needs refactoring. 618 if (!SI.isSimple()) return nullptr; 619 620 // If the RHS is an alloca with a single use, zapify the store, making the 621 // alloca dead. 622 if (Ptr->hasOneUse()) { 623 if (isa<AllocaInst>(Ptr)) 624 return EraseInstFromFunction(SI); 625 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { 626 if (isa<AllocaInst>(GEP->getOperand(0))) { 627 if (GEP->getOperand(0)->hasOneUse()) 628 return EraseInstFromFunction(SI); 629 } 630 } 631 } 632 633 // Do really simple DSE, to catch cases where there are several consecutive 634 // stores to the same location, separated by a few arithmetic operations. This 635 // situation often occurs with bitfield accesses. 636 BasicBlock::iterator BBI = &SI; 637 for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; 638 --ScanInsts) { 639 --BBI; 640 // Don't count debug info directives, lest they affect codegen, 641 // and we skip pointer-to-pointer bitcasts, which are NOPs. 642 if (isa<DbgInfoIntrinsic>(BBI) || 643 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { 644 ScanInsts++; 645 continue; 646 } 647 648 if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { 649 // Prev store isn't volatile, and stores to the same location? 650 if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), 651 SI.getOperand(1))) { 652 ++NumDeadStore; 653 ++BBI; 654 EraseInstFromFunction(*PrevSI); 655 continue; 656 } 657 break; 658 } 659 660 // If this is a load, we have to stop. However, if the loaded value is from 661 // the pointer we're loading and is producing the pointer we're storing, 662 // then *this* store is dead (X = load P; store X -> P). 663 if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { 664 if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && 665 LI->isSimple()) 666 return EraseInstFromFunction(SI); 667 668 // Otherwise, this is a load from some other location. Stores before it 669 // may not be dead. 670 break; 671 } 672 673 // Don't skip over loads or things that can modify memory. 674 if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) 675 break; 676 } 677 678 // store X, null -> turns into 'unreachable' in SimplifyCFG 679 if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) { 680 if (!isa<UndefValue>(Val)) { 681 SI.setOperand(0, UndefValue::get(Val->getType())); 682 if (Instruction *U = dyn_cast<Instruction>(Val)) 683 Worklist.Add(U); // Dropped a use. 684 } 685 return nullptr; // Do not modify these! 686 } 687 688 // store undef, Ptr -> noop 689 if (isa<UndefValue>(Val)) 690 return EraseInstFromFunction(SI); 691 692 // If the pointer destination is a cast, see if we can fold the cast into the 693 // source instead. 694 if (isa<CastInst>(Ptr)) 695 if (Instruction *Res = InstCombineStoreToCast(*this, SI)) 696 return Res; 697 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) 698 if (CE->isCast()) 699 if (Instruction *Res = InstCombineStoreToCast(*this, SI)) 700 return Res; 701 702 703 // If this store is the last instruction in the basic block (possibly 704 // excepting debug info instructions), and if the block ends with an 705 // unconditional branch, try to move it to the successor block. 706 BBI = &SI; 707 do { 708 ++BBI; 709 } while (isa<DbgInfoIntrinsic>(BBI) || 710 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())); 711 if (BranchInst *BI = dyn_cast<BranchInst>(BBI)) 712 if (BI->isUnconditional()) 713 if (SimplifyStoreAtEndOfBlock(SI)) 714 return nullptr; // xform done! 715 716 return nullptr; 717 } 718 719 /// SimplifyStoreAtEndOfBlock - Turn things like: 720 /// if () { *P = v1; } else { *P = v2 } 721 /// into a phi node with a store in the successor. 722 /// 723 /// Simplify things like: 724 /// *P = v1; if () { *P = v2; } 725 /// into a phi node with a store in the successor. 726 /// 727 bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { 728 BasicBlock *StoreBB = SI.getParent(); 729 730 // Check to see if the successor block has exactly two incoming edges. If 731 // so, see if the other predecessor contains a store to the same location. 732 // if so, insert a PHI node (if needed) and move the stores down. 733 BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); 734 735 // Determine whether Dest has exactly two predecessors and, if so, compute 736 // the other predecessor. 737 pred_iterator PI = pred_begin(DestBB); 738 BasicBlock *P = *PI; 739 BasicBlock *OtherBB = nullptr; 740 741 if (P != StoreBB) 742 OtherBB = P; 743 744 if (++PI == pred_end(DestBB)) 745 return false; 746 747 P = *PI; 748 if (P != StoreBB) { 749 if (OtherBB) 750 return false; 751 OtherBB = P; 752 } 753 if (++PI != pred_end(DestBB)) 754 return false; 755 756 // Bail out if all the relevant blocks aren't distinct (this can happen, 757 // for example, if SI is in an infinite loop) 758 if (StoreBB == DestBB || OtherBB == DestBB) 759 return false; 760 761 // Verify that the other block ends in a branch and is not otherwise empty. 762 BasicBlock::iterator BBI = OtherBB->getTerminator(); 763 BranchInst *OtherBr = dyn_cast<BranchInst>(BBI); 764 if (!OtherBr || BBI == OtherBB->begin()) 765 return false; 766 767 // If the other block ends in an unconditional branch, check for the 'if then 768 // else' case. there is an instruction before the branch. 769 StoreInst *OtherStore = nullptr; 770 if (OtherBr->isUnconditional()) { 771 --BBI; 772 // Skip over debugging info. 773 while (isa<DbgInfoIntrinsic>(BBI) || 774 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { 775 if (BBI==OtherBB->begin()) 776 return false; 777 --BBI; 778 } 779 // If this isn't a store, isn't a store to the same location, or is not the 780 // right kind of store, bail out. 781 OtherStore = dyn_cast<StoreInst>(BBI); 782 if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || 783 !SI.isSameOperationAs(OtherStore)) 784 return false; 785 } else { 786 // Otherwise, the other block ended with a conditional branch. If one of the 787 // destinations is StoreBB, then we have the if/then case. 788 if (OtherBr->getSuccessor(0) != StoreBB && 789 OtherBr->getSuccessor(1) != StoreBB) 790 return false; 791 792 // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an 793 // if/then triangle. See if there is a store to the same ptr as SI that 794 // lives in OtherBB. 795 for (;; --BBI) { 796 // Check to see if we find the matching store. 797 if ((OtherStore = dyn_cast<StoreInst>(BBI))) { 798 if (OtherStore->getOperand(1) != SI.getOperand(1) || 799 !SI.isSameOperationAs(OtherStore)) 800 return false; 801 break; 802 } 803 // If we find something that may be using or overwriting the stored 804 // value, or if we run out of instructions, we can't do the xform. 805 if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() || 806 BBI == OtherBB->begin()) 807 return false; 808 } 809 810 // In order to eliminate the store in OtherBr, we have to 811 // make sure nothing reads or overwrites the stored value in 812 // StoreBB. 813 for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { 814 // FIXME: This should really be AA driven. 815 if (I->mayReadFromMemory() || I->mayWriteToMemory()) 816 return false; 817 } 818 } 819 820 // Insert a PHI node now if we need it. 821 Value *MergedVal = OtherStore->getOperand(0); 822 if (MergedVal != SI.getOperand(0)) { 823 PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge"); 824 PN->addIncoming(SI.getOperand(0), SI.getParent()); 825 PN->addIncoming(OtherStore->getOperand(0), OtherBB); 826 MergedVal = InsertNewInstBefore(PN, DestBB->front()); 827 } 828 829 // Advance to a place where it is safe to insert the new store and 830 // insert it. 831 BBI = DestBB->getFirstInsertionPt(); 832 StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), 833 SI.isVolatile(), 834 SI.getAlignment(), 835 SI.getOrdering(), 836 SI.getSynchScope()); 837 InsertNewInstBefore(NewSI, *BBI); 838 NewSI->setDebugLoc(OtherStore->getDebugLoc()); 839 840 // If the two stores had the same TBAA tag, preserve it. 841 if (MDNode *TBAATag = SI.getMetadata(LLVMContext::MD_tbaa)) 842 if ((TBAATag = MDNode::getMostGenericTBAA(TBAATag, 843 OtherStore->getMetadata(LLVMContext::MD_tbaa)))) 844 NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag); 845 846 847 // Nuke the old stores. 848 EraseInstFromFunction(SI); 849 EraseInstFromFunction(*OtherStore); 850 return true; 851 } 852