Home | History | Annotate | Download | only in InstCombine
      1 //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements the visit functions for load, store and alloca.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "InstCombine.h"
     15 #include "llvm/IntrinsicInst.h"
     16 #include "llvm/Analysis/Loads.h"
     17 #include "llvm/Target/TargetData.h"
     18 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
     19 #include "llvm/Transforms/Utils/Local.h"
     20 #include "llvm/ADT/Statistic.h"
     21 using namespace llvm;
     22 
     23 STATISTIC(NumDeadStore, "Number of dead stores eliminated");
     24 
     25 // Try to kill dead allocas by walking through its uses until we see some use
     26 // that could escape. This is a conservative analysis which tries to handle
     27 // GEPs, bitcasts, stores, and no-op intrinsics. These tend to be the things
     28 // left after inlining and SROA finish chewing on an alloca.
     29 static Instruction *removeDeadAlloca(InstCombiner &IC, AllocaInst &AI) {
     30   SmallVector<Instruction *, 4> Worklist, DeadStores;
     31   Worklist.push_back(&AI);
     32   do {
     33     Instruction *PI = Worklist.pop_back_val();
     34     for (Value::use_iterator UI = PI->use_begin(), UE = PI->use_end();
     35          UI != UE; ++UI) {
     36       Instruction *I = cast<Instruction>(*UI);
     37       switch (I->getOpcode()) {
     38       default:
     39         // Give up the moment we see something we can't handle.
     40         return 0;
     41 
     42       case Instruction::GetElementPtr:
     43       case Instruction::BitCast:
     44         Worklist.push_back(I);
     45         continue;
     46 
     47       case Instruction::Call:
     48         // We can handle a limited subset of calls to no-op intrinsics.
     49         if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
     50           switch (II->getIntrinsicID()) {
     51           case Intrinsic::dbg_declare:
     52           case Intrinsic::dbg_value:
     53           case Intrinsic::invariant_start:
     54           case Intrinsic::invariant_end:
     55           case Intrinsic::lifetime_start:
     56           case Intrinsic::lifetime_end:
     57             continue;
     58           default:
     59             return 0;
     60           }
     61         }
     62         // Reject everything else.
     63         return 0;
     64 
     65       case Instruction::Store: {
     66         // Stores into the alloca are only live if the alloca is live.
     67         StoreInst *SI = cast<StoreInst>(I);
     68         // We can eliminate atomic stores, but not volatile.
     69         if (SI->isVolatile())
     70           return 0;
     71         // The store is only trivially safe if the poniter is the destination
     72         // as opposed to the value. We're conservative here and don't check for
     73         // the case where we store the address of a dead alloca into a dead
     74         // alloca.
     75         if (SI->getPointerOperand() != PI)
     76           return 0;
     77         DeadStores.push_back(I);
     78         continue;
     79       }
     80       }
     81     }
     82   } while (!Worklist.empty());
     83 
     84   // The alloca is dead. Kill off all the stores to it, and then replace it
     85   // with undef.
     86   while (!DeadStores.empty())
     87     IC.EraseInstFromFunction(*DeadStores.pop_back_val());
     88   return IC.ReplaceInstUsesWith(AI, UndefValue::get(AI.getType()));
     89 }
     90 
     91 Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
     92   // Ensure that the alloca array size argument has type intptr_t, so that
     93   // any casting is exposed early.
     94   if (TD) {
     95     Type *IntPtrTy = TD->getIntPtrType(AI.getContext());
     96     if (AI.getArraySize()->getType() != IntPtrTy) {
     97       Value *V = Builder->CreateIntCast(AI.getArraySize(),
     98                                         IntPtrTy, false);
     99       AI.setOperand(0, V);
    100       return &AI;
    101     }
    102   }
    103 
    104   // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
    105   if (AI.isArrayAllocation()) {  // Check C != 1
    106     if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
    107       Type *NewTy =
    108         ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
    109       assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!");
    110       AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
    111       New->setAlignment(AI.getAlignment());
    112 
    113       // Scan to the end of the allocation instructions, to skip over a block of
    114       // allocas if possible...also skip interleaved debug info
    115       //
    116       BasicBlock::iterator It = New;
    117       while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It;
    118 
    119       // Now that I is pointing to the first non-allocation-inst in the block,
    120       // insert our getelementptr instruction...
    121       //
    122       Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext()));
    123       Value *Idx[2];
    124       Idx[0] = NullIdx;
    125       Idx[1] = NullIdx;
    126       Instruction *GEP =
    127            GetElementPtrInst::CreateInBounds(New, Idx, New->getName()+".sub");
    128       InsertNewInstBefore(GEP, *It);
    129 
    130       // Now make everything use the getelementptr instead of the original
    131       // allocation.
    132       return ReplaceInstUsesWith(AI, GEP);
    133     } else if (isa<UndefValue>(AI.getArraySize())) {
    134       return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
    135     }
    136   }
    137 
    138   if (TD && isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) {
    139     // If alloca'ing a zero byte object, replace the alloca with a null pointer.
    140     // Note that we only do this for alloca's, because malloc should allocate
    141     // and return a unique pointer, even for a zero byte allocation.
    142     if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0)
    143       return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
    144 
    145     // If the alignment is 0 (unspecified), assign it the preferred alignment.
    146     if (AI.getAlignment() == 0)
    147       AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType()));
    148   }
    149 
    150   // Try to aggressively remove allocas which are only used for GEPs, lifetime
    151   // markers, and stores. This happens when SROA iteratively promotes stores
    152   // out of the alloca, and we need to cleanup after it.
    153   return removeDeadAlloca(*this, AI);
    154 }
    155 
    156 
    157 /// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
    158 static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
    159                                         const TargetData *TD) {
    160   User *CI = cast<User>(LI.getOperand(0));
    161   Value *CastOp = CI->getOperand(0);
    162 
    163   PointerType *DestTy = cast<PointerType>(CI->getType());
    164   Type *DestPTy = DestTy->getElementType();
    165   if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
    166 
    167     // If the address spaces don't match, don't eliminate the cast.
    168     if (DestTy->getAddressSpace() != SrcTy->getAddressSpace())
    169       return 0;
    170 
    171     Type *SrcPTy = SrcTy->getElementType();
    172 
    173     if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
    174          DestPTy->isVectorTy()) {
    175       // If the source is an array, the code below will not succeed.  Check to
    176       // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
    177       // constants.
    178       if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
    179         if (Constant *CSrc = dyn_cast<Constant>(CastOp))
    180           if (ASrcTy->getNumElements() != 0) {
    181             Value *Idxs[2];
    182             Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext()));
    183             Idxs[1] = Idxs[0];
    184             CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs);
    185             SrcTy = cast<PointerType>(CastOp->getType());
    186             SrcPTy = SrcTy->getElementType();
    187           }
    188 
    189       if (IC.getTargetData() &&
    190           (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
    191             SrcPTy->isVectorTy()) &&
    192           // Do not allow turning this into a load of an integer, which is then
    193           // casted to a pointer, this pessimizes pointer analysis a lot.
    194           (SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) &&
    195           IC.getTargetData()->getTypeSizeInBits(SrcPTy) ==
    196                IC.getTargetData()->getTypeSizeInBits(DestPTy)) {
    197 
    198         // Okay, we are casting from one integer or pointer type to another of
    199         // the same size.  Instead of casting the pointer before the load, cast
    200         // the result of the loaded value.
    201         LoadInst *NewLoad =
    202           IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
    203         NewLoad->setAlignment(LI.getAlignment());
    204         NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
    205         // Now cast the result of the load.
    206         return new BitCastInst(NewLoad, LI.getType());
    207       }
    208     }
    209   }
    210   return 0;
    211 }
    212 
    213 Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
    214   Value *Op = LI.getOperand(0);
    215 
    216   // Attempt to improve the alignment.
    217   if (TD) {
    218     unsigned KnownAlign =
    219       getOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()),TD);
    220     unsigned LoadAlign = LI.getAlignment();
    221     unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
    222       TD->getABITypeAlignment(LI.getType());
    223 
    224     if (KnownAlign > EffectiveLoadAlign)
    225       LI.setAlignment(KnownAlign);
    226     else if (LoadAlign == 0)
    227       LI.setAlignment(EffectiveLoadAlign);
    228   }
    229 
    230   // load (cast X) --> cast (load X) iff safe.
    231   if (isa<CastInst>(Op))
    232     if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
    233       return Res;
    234 
    235   // None of the following transforms are legal for volatile/atomic loads.
    236   // FIXME: Some of it is okay for atomic loads; needs refactoring.
    237   if (!LI.isSimple()) return 0;
    238 
    239   // Do really simple store-to-load forwarding and load CSE, to catch cases
    240   // where there are several consecutive memory accesses to the same location,
    241   // separated by a few arithmetic operations.
    242   BasicBlock::iterator BBI = &LI;
    243   if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
    244     return ReplaceInstUsesWith(LI, AvailableVal);
    245 
    246   // load(gep null, ...) -> unreachable
    247   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
    248     const Value *GEPI0 = GEPI->getOperand(0);
    249     // TODO: Consider a target hook for valid address spaces for this xform.
    250     if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
    251       // Insert a new store to null instruction before the load to indicate
    252       // that this code is not reachable.  We do this instead of inserting
    253       // an unreachable instruction directly because we cannot modify the
    254       // CFG.
    255       new StoreInst(UndefValue::get(LI.getType()),
    256                     Constant::getNullValue(Op->getType()), &LI);
    257       return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
    258     }
    259   }
    260 
    261   // load null/undef -> unreachable
    262   // TODO: Consider a target hook for valid address spaces for this xform.
    263   if (isa<UndefValue>(Op) ||
    264       (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
    265     // Insert a new store to null instruction before the load to indicate that
    266     // this code is not reachable.  We do this instead of inserting an
    267     // unreachable instruction directly because we cannot modify the CFG.
    268     new StoreInst(UndefValue::get(LI.getType()),
    269                   Constant::getNullValue(Op->getType()), &LI);
    270     return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
    271   }
    272 
    273   // Instcombine load (constantexpr_cast global) -> cast (load global)
    274   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op))
    275     if (CE->isCast())
    276       if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
    277         return Res;
    278 
    279   if (Op->hasOneUse()) {
    280     // Change select and PHI nodes to select values instead of addresses: this
    281     // helps alias analysis out a lot, allows many others simplifications, and
    282     // exposes redundancy in the code.
    283     //
    284     // Note that we cannot do the transformation unless we know that the
    285     // introduced loads cannot trap!  Something like this is valid as long as
    286     // the condition is always false: load (select bool %C, int* null, int* %G),
    287     // but it would not be valid if we transformed it to load from null
    288     // unconditionally.
    289     //
    290     if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
    291       // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).
    292       unsigned Align = LI.getAlignment();
    293       if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, TD) &&
    294           isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, TD)) {
    295         LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
    296                                            SI->getOperand(1)->getName()+".val");
    297         LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
    298                                            SI->getOperand(2)->getName()+".val");
    299         V1->setAlignment(Align);
    300         V2->setAlignment(Align);
    301         return SelectInst::Create(SI->getCondition(), V1, V2);
    302       }
    303 
    304       // load (select (cond, null, P)) -> load P
    305       if (Constant *C = dyn_cast<Constant>(SI->getOperand(1)))
    306         if (C->isNullValue()) {
    307           LI.setOperand(0, SI->getOperand(2));
    308           return &LI;
    309         }
    310 
    311       // load (select (cond, P, null)) -> load P
    312       if (Constant *C = dyn_cast<Constant>(SI->getOperand(2)))
    313         if (C->isNullValue()) {
    314           LI.setOperand(0, SI->getOperand(1));
    315           return &LI;
    316         }
    317     }
    318   }
    319   return 0;
    320 }
    321 
    322 /// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P
    323 /// when possible.  This makes it generally easy to do alias analysis and/or
    324 /// SROA/mem2reg of the memory object.
    325 static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
    326   User *CI = cast<User>(SI.getOperand(1));
    327   Value *CastOp = CI->getOperand(0);
    328 
    329   Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
    330   PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
    331   if (SrcTy == 0) return 0;
    332 
    333   Type *SrcPTy = SrcTy->getElementType();
    334 
    335   if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
    336     return 0;
    337 
    338   /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
    339   /// to its first element.  This allows us to handle things like:
    340   ///   store i32 xxx, (bitcast {foo*, float}* %P to i32*)
    341   /// on 32-bit hosts.
    342   SmallVector<Value*, 4> NewGEPIndices;
    343 
    344   // If the source is an array, the code below will not succeed.  Check to
    345   // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
    346   // constants.
    347   if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) {
    348     // Index through pointer.
    349     Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
    350     NewGEPIndices.push_back(Zero);
    351 
    352     while (1) {
    353       if (StructType *STy = dyn_cast<StructType>(SrcPTy)) {
    354         if (!STy->getNumElements()) /* Struct can be empty {} */
    355           break;
    356         NewGEPIndices.push_back(Zero);
    357         SrcPTy = STy->getElementType(0);
    358       } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) {
    359         NewGEPIndices.push_back(Zero);
    360         SrcPTy = ATy->getElementType();
    361       } else {
    362         break;
    363       }
    364     }
    365 
    366     SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
    367   }
    368 
    369   if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
    370     return 0;
    371 
    372   // If the pointers point into different address spaces or if they point to
    373   // values with different sizes, we can't do the transformation.
    374   if (!IC.getTargetData() ||
    375       SrcTy->getAddressSpace() !=
    376         cast<PointerType>(CI->getType())->getAddressSpace() ||
    377       IC.getTargetData()->getTypeSizeInBits(SrcPTy) !=
    378       IC.getTargetData()->getTypeSizeInBits(DestPTy))
    379     return 0;
    380 
    381   // Okay, we are casting from one integer or pointer type to another of
    382   // the same size.  Instead of casting the pointer before
    383   // the store, cast the value to be stored.
    384   Value *NewCast;
    385   Value *SIOp0 = SI.getOperand(0);
    386   Instruction::CastOps opcode = Instruction::BitCast;
    387   Type* CastSrcTy = SIOp0->getType();
    388   Type* CastDstTy = SrcPTy;
    389   if (CastDstTy->isPointerTy()) {
    390     if (CastSrcTy->isIntegerTy())
    391       opcode = Instruction::IntToPtr;
    392   } else if (CastDstTy->isIntegerTy()) {
    393     if (SIOp0->getType()->isPointerTy())
    394       opcode = Instruction::PtrToInt;
    395   }
    396 
    397   // SIOp0 is a pointer to aggregate and this is a store to the first field,
    398   // emit a GEP to index into its first field.
    399   if (!NewGEPIndices.empty())
    400     CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices);
    401 
    402   NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
    403                                    SIOp0->getName()+".c");
    404   SI.setOperand(0, NewCast);
    405   SI.setOperand(1, CastOp);
    406   return &SI;
    407 }
    408 
    409 /// equivalentAddressValues - Test if A and B will obviously have the same
    410 /// value. This includes recognizing that %t0 and %t1 will have the same
    411 /// value in code like this:
    412 ///   %t0 = getelementptr \@a, 0, 3
    413 ///   store i32 0, i32* %t0
    414 ///   %t1 = getelementptr \@a, 0, 3
    415 ///   %t2 = load i32* %t1
    416 ///
    417 static bool equivalentAddressValues(Value *A, Value *B) {
    418   // Test if the values are trivially equivalent.
    419   if (A == B) return true;
    420 
    421   // Test if the values come form identical arithmetic instructions.
    422   // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
    423   // its only used to compare two uses within the same basic block, which
    424   // means that they'll always either have the same value or one of them
    425   // will have an undefined value.
    426   if (isa<BinaryOperator>(A) ||
    427       isa<CastInst>(A) ||
    428       isa<PHINode>(A) ||
    429       isa<GetElementPtrInst>(A))
    430     if (Instruction *BI = dyn_cast<Instruction>(B))
    431       if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
    432         return true;
    433 
    434   // Otherwise they may not be equivalent.
    435   return false;
    436 }
    437 
    438 Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
    439   Value *Val = SI.getOperand(0);
    440   Value *Ptr = SI.getOperand(1);
    441 
    442   // Attempt to improve the alignment.
    443   if (TD) {
    444     unsigned KnownAlign =
    445       getOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()),
    446                                  TD);
    447     unsigned StoreAlign = SI.getAlignment();
    448     unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
    449       TD->getABITypeAlignment(Val->getType());
    450 
    451     if (KnownAlign > EffectiveStoreAlign)
    452       SI.setAlignment(KnownAlign);
    453     else if (StoreAlign == 0)
    454       SI.setAlignment(EffectiveStoreAlign);
    455   }
    456 
    457   // Don't hack volatile/atomic stores.
    458   // FIXME: Some bits are legal for atomic stores; needs refactoring.
    459   if (!SI.isSimple()) return 0;
    460 
    461   // If the RHS is an alloca with a single use, zapify the store, making the
    462   // alloca dead.
    463   if (Ptr->hasOneUse()) {
    464     if (isa<AllocaInst>(Ptr))
    465       return EraseInstFromFunction(SI);
    466     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
    467       if (isa<AllocaInst>(GEP->getOperand(0))) {
    468         if (GEP->getOperand(0)->hasOneUse())
    469           return EraseInstFromFunction(SI);
    470       }
    471     }
    472   }
    473 
    474   // Do really simple DSE, to catch cases where there are several consecutive
    475   // stores to the same location, separated by a few arithmetic operations. This
    476   // situation often occurs with bitfield accesses.
    477   BasicBlock::iterator BBI = &SI;
    478   for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
    479        --ScanInsts) {
    480     --BBI;
    481     // Don't count debug info directives, lest they affect codegen,
    482     // and we skip pointer-to-pointer bitcasts, which are NOPs.
    483     if (isa<DbgInfoIntrinsic>(BBI) ||
    484         (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
    485       ScanInsts++;
    486       continue;
    487     }
    488 
    489     if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
    490       // Prev store isn't volatile, and stores to the same location?
    491       if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
    492                                                         SI.getOperand(1))) {
    493         ++NumDeadStore;
    494         ++BBI;
    495         EraseInstFromFunction(*PrevSI);
    496         continue;
    497       }
    498       break;
    499     }
    500 
    501     // If this is a load, we have to stop.  However, if the loaded value is from
    502     // the pointer we're loading and is producing the pointer we're storing,
    503     // then *this* store is dead (X = load P; store X -> P).
    504     if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
    505       if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
    506           LI->isSimple())
    507         return EraseInstFromFunction(SI);
    508 
    509       // Otherwise, this is a load from some other location.  Stores before it
    510       // may not be dead.
    511       break;
    512     }
    513 
    514     // Don't skip over loads or things that can modify memory.
    515     if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
    516       break;
    517   }
    518 
    519   // store X, null    -> turns into 'unreachable' in SimplifyCFG
    520   if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
    521     if (!isa<UndefValue>(Val)) {
    522       SI.setOperand(0, UndefValue::get(Val->getType()));
    523       if (Instruction *U = dyn_cast<Instruction>(Val))
    524         Worklist.Add(U);  // Dropped a use.
    525     }
    526     return 0;  // Do not modify these!
    527   }
    528 
    529   // store undef, Ptr -> noop
    530   if (isa<UndefValue>(Val))
    531     return EraseInstFromFunction(SI);
    532 
    533   // If the pointer destination is a cast, see if we can fold the cast into the
    534   // source instead.
    535   if (isa<CastInst>(Ptr))
    536     if (Instruction *Res = InstCombineStoreToCast(*this, SI))
    537       return Res;
    538   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
    539     if (CE->isCast())
    540       if (Instruction *Res = InstCombineStoreToCast(*this, SI))
    541         return Res;
    542 
    543 
    544   // If this store is the last instruction in the basic block (possibly
    545   // excepting debug info instructions), and if the block ends with an
    546   // unconditional branch, try to move it to the successor block.
    547   BBI = &SI;
    548   do {
    549     ++BBI;
    550   } while (isa<DbgInfoIntrinsic>(BBI) ||
    551            (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
    552   if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
    553     if (BI->isUnconditional())
    554       if (SimplifyStoreAtEndOfBlock(SI))
    555         return 0;  // xform done!
    556 
    557   return 0;
    558 }
    559 
    560 /// SimplifyStoreAtEndOfBlock - Turn things like:
    561 ///   if () { *P = v1; } else { *P = v2 }
    562 /// into a phi node with a store in the successor.
    563 ///
    564 /// Simplify things like:
    565 ///   *P = v1; if () { *P = v2; }
    566 /// into a phi node with a store in the successor.
    567 ///
    568 bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
    569   BasicBlock *StoreBB = SI.getParent();
    570 
    571   // Check to see if the successor block has exactly two incoming edges.  If
    572   // so, see if the other predecessor contains a store to the same location.
    573   // if so, insert a PHI node (if needed) and move the stores down.
    574   BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
    575 
    576   // Determine whether Dest has exactly two predecessors and, if so, compute
    577   // the other predecessor.
    578   pred_iterator PI = pred_begin(DestBB);
    579   BasicBlock *P = *PI;
    580   BasicBlock *OtherBB = 0;
    581 
    582   if (P != StoreBB)
    583     OtherBB = P;
    584 
    585   if (++PI == pred_end(DestBB))
    586     return false;
    587 
    588   P = *PI;
    589   if (P != StoreBB) {
    590     if (OtherBB)
    591       return false;
    592     OtherBB = P;
    593   }
    594   if (++PI != pred_end(DestBB))
    595     return false;
    596 
    597   // Bail out if all the relevant blocks aren't distinct (this can happen,
    598   // for example, if SI is in an infinite loop)
    599   if (StoreBB == DestBB || OtherBB == DestBB)
    600     return false;
    601 
    602   // Verify that the other block ends in a branch and is not otherwise empty.
    603   BasicBlock::iterator BBI = OtherBB->getTerminator();
    604   BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
    605   if (!OtherBr || BBI == OtherBB->begin())
    606     return false;
    607 
    608   // If the other block ends in an unconditional branch, check for the 'if then
    609   // else' case.  there is an instruction before the branch.
    610   StoreInst *OtherStore = 0;
    611   if (OtherBr->isUnconditional()) {
    612     --BBI;
    613     // Skip over debugging info.
    614     while (isa<DbgInfoIntrinsic>(BBI) ||
    615            (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
    616       if (BBI==OtherBB->begin())
    617         return false;
    618       --BBI;
    619     }
    620     // If this isn't a store, isn't a store to the same location, or is not the
    621     // right kind of store, bail out.
    622     OtherStore = dyn_cast<StoreInst>(BBI);
    623     if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
    624         !SI.isSameOperationAs(OtherStore))
    625       return false;
    626   } else {
    627     // Otherwise, the other block ended with a conditional branch. If one of the
    628     // destinations is StoreBB, then we have the if/then case.
    629     if (OtherBr->getSuccessor(0) != StoreBB &&
    630         OtherBr->getSuccessor(1) != StoreBB)
    631       return false;
    632 
    633     // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
    634     // if/then triangle.  See if there is a store to the same ptr as SI that
    635     // lives in OtherBB.
    636     for (;; --BBI) {
    637       // Check to see if we find the matching store.
    638       if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
    639         if (OtherStore->getOperand(1) != SI.getOperand(1) ||
    640             !SI.isSameOperationAs(OtherStore))
    641           return false;
    642         break;
    643       }
    644       // If we find something that may be using or overwriting the stored
    645       // value, or if we run out of instructions, we can't do the xform.
    646       if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
    647           BBI == OtherBB->begin())
    648         return false;
    649     }
    650 
    651     // In order to eliminate the store in OtherBr, we have to
    652     // make sure nothing reads or overwrites the stored value in
    653     // StoreBB.
    654     for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
    655       // FIXME: This should really be AA driven.
    656       if (I->mayReadFromMemory() || I->mayWriteToMemory())
    657         return false;
    658     }
    659   }
    660 
    661   // Insert a PHI node now if we need it.
    662   Value *MergedVal = OtherStore->getOperand(0);
    663   if (MergedVal != SI.getOperand(0)) {
    664     PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
    665     PN->addIncoming(SI.getOperand(0), SI.getParent());
    666     PN->addIncoming(OtherStore->getOperand(0), OtherBB);
    667     MergedVal = InsertNewInstBefore(PN, DestBB->front());
    668   }
    669 
    670   // Advance to a place where it is safe to insert the new store and
    671   // insert it.
    672   BBI = DestBB->getFirstInsertionPt();
    673   StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
    674                                    SI.isVolatile(),
    675                                    SI.getAlignment(),
    676                                    SI.getOrdering(),
    677                                    SI.getSynchScope());
    678   InsertNewInstBefore(NewSI, *BBI);
    679   NewSI->setDebugLoc(OtherStore->getDebugLoc());
    680 
    681   // Nuke the old stores.
    682   EraseInstFromFunction(SI);
    683   EraseInstFromFunction(*OtherStore);
    684   return true;
    685 }
    686