Home | History | Annotate | Download | only in InstCombine
      1 //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements the visit functions for load, store and alloca.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "InstCombine.h"
     15 #include "llvm/IntrinsicInst.h"
     16 #include "llvm/Analysis/Loads.h"
     17 #include "llvm/Target/TargetData.h"
     18 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
     19 #include "llvm/Transforms/Utils/Local.h"
     20 #include "llvm/ADT/Statistic.h"
     21 using namespace llvm;
     22 
     23 STATISTIC(NumDeadStore, "Number of dead stores eliminated");
     24 
     25 Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
     26   // Ensure that the alloca array size argument has type intptr_t, so that
     27   // any casting is exposed early.
     28   if (TD) {
     29     Type *IntPtrTy = TD->getIntPtrType(AI.getContext());
     30     if (AI.getArraySize()->getType() != IntPtrTy) {
     31       Value *V = Builder->CreateIntCast(AI.getArraySize(),
     32                                         IntPtrTy, false);
     33       AI.setOperand(0, V);
     34       return &AI;
     35     }
     36   }
     37 
     38   // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
     39   if (AI.isArrayAllocation()) {  // Check C != 1
     40     if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
     41       Type *NewTy =
     42         ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
     43       assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!");
     44       AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
     45       New->setAlignment(AI.getAlignment());
     46 
     47       // Scan to the end of the allocation instructions, to skip over a block of
     48       // allocas if possible...also skip interleaved debug info
     49       //
     50       BasicBlock::iterator It = New;
     51       while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It;
     52 
     53       // Now that I is pointing to the first non-allocation-inst in the block,
     54       // insert our getelementptr instruction...
     55       //
     56       Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext()));
     57       Value *Idx[2];
     58       Idx[0] = NullIdx;
     59       Idx[1] = NullIdx;
     60       Instruction *GEP =
     61            GetElementPtrInst::CreateInBounds(New, Idx, New->getName()+".sub");
     62       InsertNewInstBefore(GEP, *It);
     63 
     64       // Now make everything use the getelementptr instead of the original
     65       // allocation.
     66       return ReplaceInstUsesWith(AI, GEP);
     67     } else if (isa<UndefValue>(AI.getArraySize())) {
     68       return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
     69     }
     70   }
     71 
     72   if (TD && isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) {
     73     // If alloca'ing a zero byte object, replace the alloca with a null pointer.
     74     // Note that we only do this for alloca's, because malloc should allocate
     75     // and return a unique pointer, even for a zero byte allocation.
     76     if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0)
     77       return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
     78 
     79     // If the alignment is 0 (unspecified), assign it the preferred alignment.
     80     if (AI.getAlignment() == 0)
     81       AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType()));
     82   }
     83 
     84   return 0;
     85 }
     86 
     87 
     88 /// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
     89 static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
     90                                         const TargetData *TD) {
     91   User *CI = cast<User>(LI.getOperand(0));
     92   Value *CastOp = CI->getOperand(0);
     93 
     94   PointerType *DestTy = cast<PointerType>(CI->getType());
     95   Type *DestPTy = DestTy->getElementType();
     96   if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
     97 
     98     // If the address spaces don't match, don't eliminate the cast.
     99     if (DestTy->getAddressSpace() != SrcTy->getAddressSpace())
    100       return 0;
    101 
    102     Type *SrcPTy = SrcTy->getElementType();
    103 
    104     if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
    105          DestPTy->isVectorTy()) {
    106       // If the source is an array, the code below will not succeed.  Check to
    107       // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
    108       // constants.
    109       if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
    110         if (Constant *CSrc = dyn_cast<Constant>(CastOp))
    111           if (ASrcTy->getNumElements() != 0) {
    112             Value *Idxs[2];
    113             Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext()));
    114             Idxs[1] = Idxs[0];
    115             CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs);
    116             SrcTy = cast<PointerType>(CastOp->getType());
    117             SrcPTy = SrcTy->getElementType();
    118           }
    119 
    120       if (IC.getTargetData() &&
    121           (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
    122             SrcPTy->isVectorTy()) &&
    123           // Do not allow turning this into a load of an integer, which is then
    124           // casted to a pointer, this pessimizes pointer analysis a lot.
    125           (SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) &&
    126           IC.getTargetData()->getTypeSizeInBits(SrcPTy) ==
    127                IC.getTargetData()->getTypeSizeInBits(DestPTy)) {
    128 
    129         // Okay, we are casting from one integer or pointer type to another of
    130         // the same size.  Instead of casting the pointer before the load, cast
    131         // the result of the loaded value.
    132         LoadInst *NewLoad =
    133           IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
    134         NewLoad->setAlignment(LI.getAlignment());
    135         NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
    136         // Now cast the result of the load.
    137         return new BitCastInst(NewLoad, LI.getType());
    138       }
    139     }
    140   }
    141   return 0;
    142 }
    143 
    144 Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
    145   Value *Op = LI.getOperand(0);
    146 
    147   // Attempt to improve the alignment.
    148   if (TD) {
    149     unsigned KnownAlign =
    150       getOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()),TD);
    151     unsigned LoadAlign = LI.getAlignment();
    152     unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
    153       TD->getABITypeAlignment(LI.getType());
    154 
    155     if (KnownAlign > EffectiveLoadAlign)
    156       LI.setAlignment(KnownAlign);
    157     else if (LoadAlign == 0)
    158       LI.setAlignment(EffectiveLoadAlign);
    159   }
    160 
    161   // load (cast X) --> cast (load X) iff safe.
    162   if (isa<CastInst>(Op))
    163     if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
    164       return Res;
    165 
    166   // None of the following transforms are legal for volatile/atomic loads.
    167   // FIXME: Some of it is okay for atomic loads; needs refactoring.
    168   if (!LI.isSimple()) return 0;
    169 
    170   // Do really simple store-to-load forwarding and load CSE, to catch cases
    171   // where there are several consecutive memory accesses to the same location,
    172   // separated by a few arithmetic operations.
    173   BasicBlock::iterator BBI = &LI;
    174   if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
    175     return ReplaceInstUsesWith(LI, AvailableVal);
    176 
    177   // load(gep null, ...) -> unreachable
    178   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
    179     const Value *GEPI0 = GEPI->getOperand(0);
    180     // TODO: Consider a target hook for valid address spaces for this xform.
    181     if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
    182       // Insert a new store to null instruction before the load to indicate
    183       // that this code is not reachable.  We do this instead of inserting
    184       // an unreachable instruction directly because we cannot modify the
    185       // CFG.
    186       new StoreInst(UndefValue::get(LI.getType()),
    187                     Constant::getNullValue(Op->getType()), &LI);
    188       return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
    189     }
    190   }
    191 
    192   // load null/undef -> unreachable
    193   // TODO: Consider a target hook for valid address spaces for this xform.
    194   if (isa<UndefValue>(Op) ||
    195       (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
    196     // Insert a new store to null instruction before the load to indicate that
    197     // this code is not reachable.  We do this instead of inserting an
    198     // unreachable instruction directly because we cannot modify the CFG.
    199     new StoreInst(UndefValue::get(LI.getType()),
    200                   Constant::getNullValue(Op->getType()), &LI);
    201     return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
    202   }
    203 
    204   // Instcombine load (constantexpr_cast global) -> cast (load global)
    205   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op))
    206     if (CE->isCast())
    207       if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
    208         return Res;
    209 
    210   if (Op->hasOneUse()) {
    211     // Change select and PHI nodes to select values instead of addresses: this
    212     // helps alias analysis out a lot, allows many others simplifications, and
    213     // exposes redundancy in the code.
    214     //
    215     // Note that we cannot do the transformation unless we know that the
    216     // introduced loads cannot trap!  Something like this is valid as long as
    217     // the condition is always false: load (select bool %C, int* null, int* %G),
    218     // but it would not be valid if we transformed it to load from null
    219     // unconditionally.
    220     //
    221     if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
    222       // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).
    223       unsigned Align = LI.getAlignment();
    224       if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, TD) &&
    225           isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, TD)) {
    226         LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
    227                                            SI->getOperand(1)->getName()+".val");
    228         LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
    229                                            SI->getOperand(2)->getName()+".val");
    230         V1->setAlignment(Align);
    231         V2->setAlignment(Align);
    232         return SelectInst::Create(SI->getCondition(), V1, V2);
    233       }
    234 
    235       // load (select (cond, null, P)) -> load P
    236       if (Constant *C = dyn_cast<Constant>(SI->getOperand(1)))
    237         if (C->isNullValue()) {
    238           LI.setOperand(0, SI->getOperand(2));
    239           return &LI;
    240         }
    241 
    242       // load (select (cond, P, null)) -> load P
    243       if (Constant *C = dyn_cast<Constant>(SI->getOperand(2)))
    244         if (C->isNullValue()) {
    245           LI.setOperand(0, SI->getOperand(1));
    246           return &LI;
    247         }
    248     }
    249   }
    250   return 0;
    251 }
    252 
    253 /// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P
    254 /// when possible.  This makes it generally easy to do alias analysis and/or
    255 /// SROA/mem2reg of the memory object.
    256 static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
    257   User *CI = cast<User>(SI.getOperand(1));
    258   Value *CastOp = CI->getOperand(0);
    259 
    260   Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
    261   PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
    262   if (SrcTy == 0) return 0;
    263 
    264   Type *SrcPTy = SrcTy->getElementType();
    265 
    266   if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
    267     return 0;
    268 
    269   /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
    270   /// to its first element.  This allows us to handle things like:
    271   ///   store i32 xxx, (bitcast {foo*, float}* %P to i32*)
    272   /// on 32-bit hosts.
    273   SmallVector<Value*, 4> NewGEPIndices;
    274 
    275   // If the source is an array, the code below will not succeed.  Check to
    276   // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
    277   // constants.
    278   if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) {
    279     // Index through pointer.
    280     Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
    281     NewGEPIndices.push_back(Zero);
    282 
    283     while (1) {
    284       if (StructType *STy = dyn_cast<StructType>(SrcPTy)) {
    285         if (!STy->getNumElements()) /* Struct can be empty {} */
    286           break;
    287         NewGEPIndices.push_back(Zero);
    288         SrcPTy = STy->getElementType(0);
    289       } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) {
    290         NewGEPIndices.push_back(Zero);
    291         SrcPTy = ATy->getElementType();
    292       } else {
    293         break;
    294       }
    295     }
    296 
    297     SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
    298   }
    299 
    300   if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
    301     return 0;
    302 
    303   // If the pointers point into different address spaces or if they point to
    304   // values with different sizes, we can't do the transformation.
    305   if (!IC.getTargetData() ||
    306       SrcTy->getAddressSpace() !=
    307         cast<PointerType>(CI->getType())->getAddressSpace() ||
    308       IC.getTargetData()->getTypeSizeInBits(SrcPTy) !=
    309       IC.getTargetData()->getTypeSizeInBits(DestPTy))
    310     return 0;
    311 
    312   // Okay, we are casting from one integer or pointer type to another of
    313   // the same size.  Instead of casting the pointer before
    314   // the store, cast the value to be stored.
    315   Value *NewCast;
    316   Value *SIOp0 = SI.getOperand(0);
    317   Instruction::CastOps opcode = Instruction::BitCast;
    318   Type* CastSrcTy = SIOp0->getType();
    319   Type* CastDstTy = SrcPTy;
    320   if (CastDstTy->isPointerTy()) {
    321     if (CastSrcTy->isIntegerTy())
    322       opcode = Instruction::IntToPtr;
    323   } else if (CastDstTy->isIntegerTy()) {
    324     if (SIOp0->getType()->isPointerTy())
    325       opcode = Instruction::PtrToInt;
    326   }
    327 
    328   // SIOp0 is a pointer to aggregate and this is a store to the first field,
    329   // emit a GEP to index into its first field.
    330   if (!NewGEPIndices.empty())
    331     CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices);
    332 
    333   NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
    334                                    SIOp0->getName()+".c");
    335   SI.setOperand(0, NewCast);
    336   SI.setOperand(1, CastOp);
    337   return &SI;
    338 }
    339 
    340 /// equivalentAddressValues - Test if A and B will obviously have the same
    341 /// value. This includes recognizing that %t0 and %t1 will have the same
    342 /// value in code like this:
    343 ///   %t0 = getelementptr \@a, 0, 3
    344 ///   store i32 0, i32* %t0
    345 ///   %t1 = getelementptr \@a, 0, 3
    346 ///   %t2 = load i32* %t1
    347 ///
    348 static bool equivalentAddressValues(Value *A, Value *B) {
    349   // Test if the values are trivially equivalent.
    350   if (A == B) return true;
    351 
    352   // Test if the values come form identical arithmetic instructions.
    353   // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
    354   // its only used to compare two uses within the same basic block, which
    355   // means that they'll always either have the same value or one of them
    356   // will have an undefined value.
    357   if (isa<BinaryOperator>(A) ||
    358       isa<CastInst>(A) ||
    359       isa<PHINode>(A) ||
    360       isa<GetElementPtrInst>(A))
    361     if (Instruction *BI = dyn_cast<Instruction>(B))
    362       if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
    363         return true;
    364 
    365   // Otherwise they may not be equivalent.
    366   return false;
    367 }
    368 
    369 Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
    370   Value *Val = SI.getOperand(0);
    371   Value *Ptr = SI.getOperand(1);
    372 
    373   // Attempt to improve the alignment.
    374   if (TD) {
    375     unsigned KnownAlign =
    376       getOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()),
    377                                  TD);
    378     unsigned StoreAlign = SI.getAlignment();
    379     unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
    380       TD->getABITypeAlignment(Val->getType());
    381 
    382     if (KnownAlign > EffectiveStoreAlign)
    383       SI.setAlignment(KnownAlign);
    384     else if (StoreAlign == 0)
    385       SI.setAlignment(EffectiveStoreAlign);
    386   }
    387 
    388   // Don't hack volatile/atomic stores.
    389   // FIXME: Some bits are legal for atomic stores; needs refactoring.
    390   if (!SI.isSimple()) return 0;
    391 
    392   // If the RHS is an alloca with a single use, zapify the store, making the
    393   // alloca dead.
    394   if (Ptr->hasOneUse()) {
    395     if (isa<AllocaInst>(Ptr))
    396       return EraseInstFromFunction(SI);
    397     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
    398       if (isa<AllocaInst>(GEP->getOperand(0))) {
    399         if (GEP->getOperand(0)->hasOneUse())
    400           return EraseInstFromFunction(SI);
    401       }
    402     }
    403   }
    404 
    405   // Do really simple DSE, to catch cases where there are several consecutive
    406   // stores to the same location, separated by a few arithmetic operations. This
    407   // situation often occurs with bitfield accesses.
    408   BasicBlock::iterator BBI = &SI;
    409   for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
    410        --ScanInsts) {
    411     --BBI;
    412     // Don't count debug info directives, lest they affect codegen,
    413     // and we skip pointer-to-pointer bitcasts, which are NOPs.
    414     if (isa<DbgInfoIntrinsic>(BBI) ||
    415         (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
    416       ScanInsts++;
    417       continue;
    418     }
    419 
    420     if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
    421       // Prev store isn't volatile, and stores to the same location?
    422       if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
    423                                                         SI.getOperand(1))) {
    424         ++NumDeadStore;
    425         ++BBI;
    426         EraseInstFromFunction(*PrevSI);
    427         continue;
    428       }
    429       break;
    430     }
    431 
    432     // If this is a load, we have to stop.  However, if the loaded value is from
    433     // the pointer we're loading and is producing the pointer we're storing,
    434     // then *this* store is dead (X = load P; store X -> P).
    435     if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
    436       if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
    437           LI->isSimple())
    438         return EraseInstFromFunction(SI);
    439 
    440       // Otherwise, this is a load from some other location.  Stores before it
    441       // may not be dead.
    442       break;
    443     }
    444 
    445     // Don't skip over loads or things that can modify memory.
    446     if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
    447       break;
    448   }
    449 
    450   // store X, null    -> turns into 'unreachable' in SimplifyCFG
    451   if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
    452     if (!isa<UndefValue>(Val)) {
    453       SI.setOperand(0, UndefValue::get(Val->getType()));
    454       if (Instruction *U = dyn_cast<Instruction>(Val))
    455         Worklist.Add(U);  // Dropped a use.
    456     }
    457     return 0;  // Do not modify these!
    458   }
    459 
    460   // store undef, Ptr -> noop
    461   if (isa<UndefValue>(Val))
    462     return EraseInstFromFunction(SI);
    463 
    464   // If the pointer destination is a cast, see if we can fold the cast into the
    465   // source instead.
    466   if (isa<CastInst>(Ptr))
    467     if (Instruction *Res = InstCombineStoreToCast(*this, SI))
    468       return Res;
    469   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
    470     if (CE->isCast())
    471       if (Instruction *Res = InstCombineStoreToCast(*this, SI))
    472         return Res;
    473 
    474 
    475   // If this store is the last instruction in the basic block (possibly
    476   // excepting debug info instructions), and if the block ends with an
    477   // unconditional branch, try to move it to the successor block.
    478   BBI = &SI;
    479   do {
    480     ++BBI;
    481   } while (isa<DbgInfoIntrinsic>(BBI) ||
    482            (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
    483   if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
    484     if (BI->isUnconditional())
    485       if (SimplifyStoreAtEndOfBlock(SI))
    486         return 0;  // xform done!
    487 
    488   return 0;
    489 }
    490 
    491 /// SimplifyStoreAtEndOfBlock - Turn things like:
    492 ///   if () { *P = v1; } else { *P = v2 }
    493 /// into a phi node with a store in the successor.
    494 ///
    495 /// Simplify things like:
    496 ///   *P = v1; if () { *P = v2; }
    497 /// into a phi node with a store in the successor.
    498 ///
    499 bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
    500   BasicBlock *StoreBB = SI.getParent();
    501 
    502   // Check to see if the successor block has exactly two incoming edges.  If
    503   // so, see if the other predecessor contains a store to the same location.
    504   // if so, insert a PHI node (if needed) and move the stores down.
    505   BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
    506 
    507   // Determine whether Dest has exactly two predecessors and, if so, compute
    508   // the other predecessor.
    509   pred_iterator PI = pred_begin(DestBB);
    510   BasicBlock *P = *PI;
    511   BasicBlock *OtherBB = 0;
    512 
    513   if (P != StoreBB)
    514     OtherBB = P;
    515 
    516   if (++PI == pred_end(DestBB))
    517     return false;
    518 
    519   P = *PI;
    520   if (P != StoreBB) {
    521     if (OtherBB)
    522       return false;
    523     OtherBB = P;
    524   }
    525   if (++PI != pred_end(DestBB))
    526     return false;
    527 
    528   // Bail out if all the relevant blocks aren't distinct (this can happen,
    529   // for example, if SI is in an infinite loop)
    530   if (StoreBB == DestBB || OtherBB == DestBB)
    531     return false;
    532 
    533   // Verify that the other block ends in a branch and is not otherwise empty.
    534   BasicBlock::iterator BBI = OtherBB->getTerminator();
    535   BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
    536   if (!OtherBr || BBI == OtherBB->begin())
    537     return false;
    538 
    539   // If the other block ends in an unconditional branch, check for the 'if then
    540   // else' case.  there is an instruction before the branch.
    541   StoreInst *OtherStore = 0;
    542   if (OtherBr->isUnconditional()) {
    543     --BBI;
    544     // Skip over debugging info.
    545     while (isa<DbgInfoIntrinsic>(BBI) ||
    546            (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
    547       if (BBI==OtherBB->begin())
    548         return false;
    549       --BBI;
    550     }
    551     // If this isn't a store, isn't a store to the same location, or is not the
    552     // right kind of store, bail out.
    553     OtherStore = dyn_cast<StoreInst>(BBI);
    554     if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
    555         !SI.isSameOperationAs(OtherStore))
    556       return false;
    557   } else {
    558     // Otherwise, the other block ended with a conditional branch. If one of the
    559     // destinations is StoreBB, then we have the if/then case.
    560     if (OtherBr->getSuccessor(0) != StoreBB &&
    561         OtherBr->getSuccessor(1) != StoreBB)
    562       return false;
    563 
    564     // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
    565     // if/then triangle.  See if there is a store to the same ptr as SI that
    566     // lives in OtherBB.
    567     for (;; --BBI) {
    568       // Check to see if we find the matching store.
    569       if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
    570         if (OtherStore->getOperand(1) != SI.getOperand(1) ||
    571             !SI.isSameOperationAs(OtherStore))
    572           return false;
    573         break;
    574       }
    575       // If we find something that may be using or overwriting the stored
    576       // value, or if we run out of instructions, we can't do the xform.
    577       if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
    578           BBI == OtherBB->begin())
    579         return false;
    580     }
    581 
    582     // In order to eliminate the store in OtherBr, we have to
    583     // make sure nothing reads or overwrites the stored value in
    584     // StoreBB.
    585     for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
    586       // FIXME: This should really be AA driven.
    587       if (I->mayReadFromMemory() || I->mayWriteToMemory())
    588         return false;
    589     }
    590   }
    591 
    592   // Insert a PHI node now if we need it.
    593   Value *MergedVal = OtherStore->getOperand(0);
    594   if (MergedVal != SI.getOperand(0)) {
    595     PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
    596     PN->addIncoming(SI.getOperand(0), SI.getParent());
    597     PN->addIncoming(OtherStore->getOperand(0), OtherBB);
    598     MergedVal = InsertNewInstBefore(PN, DestBB->front());
    599   }
    600 
    601   // Advance to a place where it is safe to insert the new store and
    602   // insert it.
    603   BBI = DestBB->getFirstInsertionPt();
    604   StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
    605                                    SI.isVolatile(),
    606                                    SI.getAlignment(),
    607                                    SI.getOrdering(),
    608                                    SI.getSynchScope());
    609   InsertNewInstBefore(NewSI, *BBI);
    610   NewSI->setDebugLoc(OtherStore->getDebugLoc());
    611 
    612   // Nuke the old stores.
    613   EraseInstFromFunction(SI);
    614   EraseInstFromFunction(*OtherStore);
    615   return true;
    616 }
    617