Home | History | Annotate | Download | only in IPO
      1 //===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This pass transforms simple global variables that never have their address
     11 // taken.  If obviously true, it marks read/write globals as constant, deletes
     12 // variables only stored to, etc.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #define DEBUG_TYPE "globalopt"
     17 #include "llvm/Transforms/IPO.h"
     18 #include "llvm/ADT/DenseMap.h"
     19 #include "llvm/ADT/STLExtras.h"
     20 #include "llvm/ADT/SmallPtrSet.h"
     21 #include "llvm/ADT/SmallVector.h"
     22 #include "llvm/ADT/Statistic.h"
     23 #include "llvm/Analysis/ConstantFolding.h"
     24 #include "llvm/Analysis/MemoryBuiltins.h"
     25 #include "llvm/IR/CallingConv.h"
     26 #include "llvm/IR/Constants.h"
     27 #include "llvm/IR/DataLayout.h"
     28 #include "llvm/IR/DerivedTypes.h"
     29 #include "llvm/IR/Instructions.h"
     30 #include "llvm/IR/IntrinsicInst.h"
     31 #include "llvm/IR/Module.h"
     32 #include "llvm/IR/Operator.h"
     33 #include "llvm/Pass.h"
     34 #include "llvm/Support/CallSite.h"
     35 #include "llvm/Support/Debug.h"
     36 #include "llvm/Support/ErrorHandling.h"
     37 #include "llvm/Support/GetElementPtrTypeIterator.h"
     38 #include "llvm/Support/MathExtras.h"
     39 #include "llvm/Support/raw_ostream.h"
     40 #include "llvm/Target/TargetLibraryInfo.h"
     41 #include <algorithm>
     42 using namespace llvm;
     43 
     44 STATISTIC(NumMarked    , "Number of globals marked constant");
     45 STATISTIC(NumUnnamed   , "Number of globals marked unnamed_addr");
     46 STATISTIC(NumSRA       , "Number of aggregate globals broken into scalars");
     47 STATISTIC(NumHeapSRA   , "Number of heap objects SRA'd");
     48 STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
     49 STATISTIC(NumDeleted   , "Number of globals deleted");
     50 STATISTIC(NumFnDeleted , "Number of functions deleted");
     51 STATISTIC(NumGlobUses  , "Number of global uses devirtualized");
     52 STATISTIC(NumLocalized , "Number of globals localized");
     53 STATISTIC(NumShrunkToBool  , "Number of global vars shrunk to booleans");
     54 STATISTIC(NumFastCallFns   , "Number of functions converted to fastcc");
     55 STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
     56 STATISTIC(NumNestRemoved   , "Number of nest attributes removed");
     57 STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
     58 STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
     59 STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
     60 
     61 namespace {
     62   struct GlobalStatus;
     63   struct GlobalOpt : public ModulePass {
     64     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
     65       AU.addRequired<TargetLibraryInfo>();
     66     }
     67     static char ID; // Pass identification, replacement for typeid
     68     GlobalOpt() : ModulePass(ID) {
     69       initializeGlobalOptPass(*PassRegistry::getPassRegistry());
     70     }
     71 
     72     bool runOnModule(Module &M);
     73 
     74   private:
     75     GlobalVariable *FindGlobalCtors(Module &M);
     76     bool OptimizeFunctions(Module &M);
     77     bool OptimizeGlobalVars(Module &M);
     78     bool OptimizeGlobalAliases(Module &M);
     79     bool OptimizeGlobalCtorsList(GlobalVariable *&GCL);
     80     bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
     81     bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
     82                                const SmallPtrSet<const PHINode*, 16> &PHIUsers,
     83                                const GlobalStatus &GS);
     84     bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
     85 
     86     DataLayout *TD;
     87     TargetLibraryInfo *TLI;
     88   };
     89 }
     90 
     91 char GlobalOpt::ID = 0;
     92 INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
     93                 "Global Variable Optimizer", false, false)
     94 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
     95 INITIALIZE_PASS_END(GlobalOpt, "globalopt",
     96                 "Global Variable Optimizer", false, false)
     97 
     98 ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
     99 
    100 namespace {
    101 
    102 /// GlobalStatus - As we analyze each global, keep track of some information
    103 /// about it.  If we find out that the address of the global is taken, none of
    104 /// this info will be accurate.
    105 struct GlobalStatus {
    106   /// isCompared - True if the global's address is used in a comparison.
    107   bool isCompared;
    108 
    109   /// isLoaded - True if the global is ever loaded.  If the global isn't ever
    110   /// loaded it can be deleted.
    111   bool isLoaded;
    112 
    113   /// StoredType - Keep track of what stores to the global look like.
    114   ///
    115   enum StoredType {
    116     /// NotStored - There is no store to this global.  It can thus be marked
    117     /// constant.
    118     NotStored,
    119 
    120     /// isInitializerStored - This global is stored to, but the only thing
    121     /// stored is the constant it was initialized with.  This is only tracked
    122     /// for scalar globals.
    123     isInitializerStored,
    124 
    125     /// isStoredOnce - This global is stored to, but only its initializer and
    126     /// one other value is ever stored to it.  If this global isStoredOnce, we
    127     /// track the value stored to it in StoredOnceValue below.  This is only
    128     /// tracked for scalar globals.
    129     isStoredOnce,
    130 
    131     /// isStored - This global is stored to by multiple values or something else
    132     /// that we cannot track.
    133     isStored
    134   } StoredType;
    135 
    136   /// StoredOnceValue - If only one value (besides the initializer constant) is
    137   /// ever stored to this global, keep track of what value it is.
    138   Value *StoredOnceValue;
    139 
    140   /// AccessingFunction/HasMultipleAccessingFunctions - These start out
    141   /// null/false.  When the first accessing function is noticed, it is recorded.
    142   /// When a second different accessing function is noticed,
    143   /// HasMultipleAccessingFunctions is set to true.
    144   const Function *AccessingFunction;
    145   bool HasMultipleAccessingFunctions;
    146 
    147   /// HasNonInstructionUser - Set to true if this global has a user that is not
    148   /// an instruction (e.g. a constant expr or GV initializer).
    149   bool HasNonInstructionUser;
    150 
    151   /// AtomicOrdering - Set to the strongest atomic ordering requirement.
    152   AtomicOrdering Ordering;
    153 
    154   GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored),
    155                    StoredOnceValue(0), AccessingFunction(0),
    156                    HasMultipleAccessingFunctions(false),
    157                    HasNonInstructionUser(false), Ordering(NotAtomic) {}
    158 };
    159 
    160 }
    161 
    162 /// StrongerOrdering - Return the stronger of the two ordering. If the two
    163 /// orderings are acquire and release, then return AcquireRelease.
    164 ///
    165 static AtomicOrdering StrongerOrdering(AtomicOrdering X, AtomicOrdering Y) {
    166   if (X == Acquire && Y == Release) return AcquireRelease;
    167   if (Y == Acquire && X == Release) return AcquireRelease;
    168   return (AtomicOrdering)std::max(X, Y);
    169 }
    170 
    171 /// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used
    172 /// by constants itself.  Note that constants cannot be cyclic, so this test is
    173 /// pretty easy to implement recursively.
    174 ///
    175 static bool SafeToDestroyConstant(const Constant *C) {
    176   if (isa<GlobalValue>(C)) return false;
    177 
    178   for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E;
    179        ++UI)
    180     if (const Constant *CU = dyn_cast<Constant>(*UI)) {
    181       if (!SafeToDestroyConstant(CU)) return false;
    182     } else
    183       return false;
    184   return true;
    185 }
    186 
    187 
    188 /// AnalyzeGlobal - Look at all uses of the global and fill in the GlobalStatus
    189 /// structure.  If the global has its address taken, return true to indicate we
    190 /// can't do anything with it.
    191 ///
    192 static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
    193                           SmallPtrSet<const PHINode*, 16> &PHIUsers) {
    194   for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
    195        ++UI) {
    196     const User *U = *UI;
    197     if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
    198       GS.HasNonInstructionUser = true;
    199 
    200       // If the result of the constantexpr isn't pointer type, then we won't
    201       // know to expect it in various places.  Just reject early.
    202       if (!isa<PointerType>(CE->getType())) return true;
    203 
    204       if (AnalyzeGlobal(CE, GS, PHIUsers)) return true;
    205     } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
    206       if (!GS.HasMultipleAccessingFunctions) {
    207         const Function *F = I->getParent()->getParent();
    208         if (GS.AccessingFunction == 0)
    209           GS.AccessingFunction = F;
    210         else if (GS.AccessingFunction != F)
    211           GS.HasMultipleAccessingFunctions = true;
    212       }
    213       if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
    214         GS.isLoaded = true;
    215         // Don't hack on volatile loads.
    216         if (LI->isVolatile()) return true;
    217         GS.Ordering = StrongerOrdering(GS.Ordering, LI->getOrdering());
    218       } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
    219         // Don't allow a store OF the address, only stores TO the address.
    220         if (SI->getOperand(0) == V) return true;
    221 
    222         // Don't hack on volatile stores.
    223         if (SI->isVolatile()) return true;
    224 
    225         GS.Ordering = StrongerOrdering(GS.Ordering, SI->getOrdering());
    226 
    227         // If this is a direct store to the global (i.e., the global is a scalar
    228         // value, not an aggregate), keep more specific information about
    229         // stores.
    230         if (GS.StoredType != GlobalStatus::isStored) {
    231           if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(
    232                                                            SI->getOperand(1))) {
    233             Value *StoredVal = SI->getOperand(0);
    234 
    235             if (Constant *C = dyn_cast<Constant>(StoredVal)) {
    236               if (C->isThreadDependent()) {
    237                 // The stored value changes between threads; don't track it.
    238                 return true;
    239               }
    240             }
    241 
    242             if (StoredVal == GV->getInitializer()) {
    243               if (GS.StoredType < GlobalStatus::isInitializerStored)
    244                 GS.StoredType = GlobalStatus::isInitializerStored;
    245             } else if (isa<LoadInst>(StoredVal) &&
    246                        cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
    247               if (GS.StoredType < GlobalStatus::isInitializerStored)
    248                 GS.StoredType = GlobalStatus::isInitializerStored;
    249             } else if (GS.StoredType < GlobalStatus::isStoredOnce) {
    250               GS.StoredType = GlobalStatus::isStoredOnce;
    251               GS.StoredOnceValue = StoredVal;
    252             } else if (GS.StoredType == GlobalStatus::isStoredOnce &&
    253                        GS.StoredOnceValue == StoredVal) {
    254               // noop.
    255             } else {
    256               GS.StoredType = GlobalStatus::isStored;
    257             }
    258           } else {
    259             GS.StoredType = GlobalStatus::isStored;
    260           }
    261         }
    262       } else if (isa<BitCastInst>(I)) {
    263         if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
    264       } else if (isa<GetElementPtrInst>(I)) {
    265         if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
    266       } else if (isa<SelectInst>(I)) {
    267         if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
    268       } else if (const PHINode *PN = dyn_cast<PHINode>(I)) {
    269         // PHI nodes we can check just like select or GEP instructions, but we
    270         // have to be careful about infinite recursion.
    271         if (PHIUsers.insert(PN))  // Not already visited.
    272           if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
    273       } else if (isa<CmpInst>(I)) {
    274         GS.isCompared = true;
    275       } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
    276         if (MTI->isVolatile()) return true;
    277         if (MTI->getArgOperand(0) == V)
    278           GS.StoredType = GlobalStatus::isStored;
    279         if (MTI->getArgOperand(1) == V)
    280           GS.isLoaded = true;
    281       } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) {
    282         assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!");
    283         if (MSI->isVolatile()) return true;
    284         GS.StoredType = GlobalStatus::isStored;
    285       } else {
    286         return true;  // Any other non-load instruction might take address!
    287       }
    288     } else if (const Constant *C = dyn_cast<Constant>(U)) {
    289       GS.HasNonInstructionUser = true;
    290       // We might have a dead and dangling constant hanging off of here.
    291       if (!SafeToDestroyConstant(C))
    292         return true;
    293     } else {
    294       GS.HasNonInstructionUser = true;
    295       // Otherwise must be some other user.
    296       return true;
    297     }
    298   }
    299 
    300   return false;
    301 }
    302 
    303 /// isLeakCheckerRoot - Is this global variable possibly used by a leak checker
    304 /// as a root?  If so, we might not really want to eliminate the stores to it.
    305 static bool isLeakCheckerRoot(GlobalVariable *GV) {
    306   // A global variable is a root if it is a pointer, or could plausibly contain
    307   // a pointer.  There are two challenges; one is that we could have a struct
    308   // the has an inner member which is a pointer.  We recurse through the type to
    309   // detect these (up to a point).  The other is that we may actually be a union
    310   // of a pointer and another type, and so our LLVM type is an integer which
    311   // gets converted into a pointer, or our type is an [i8 x #] with a pointer
    312   // potentially contained here.
    313 
    314   if (GV->hasPrivateLinkage())
    315     return false;
    316 
    317   SmallVector<Type *, 4> Types;
    318   Types.push_back(cast<PointerType>(GV->getType())->getElementType());
    319 
    320   unsigned Limit = 20;
    321   do {
    322     Type *Ty = Types.pop_back_val();
    323     switch (Ty->getTypeID()) {
    324       default: break;
    325       case Type::PointerTyID: return true;
    326       case Type::ArrayTyID:
    327       case Type::VectorTyID: {
    328         SequentialType *STy = cast<SequentialType>(Ty);
    329         Types.push_back(STy->getElementType());
    330         break;
    331       }
    332       case Type::StructTyID: {
    333         StructType *STy = cast<StructType>(Ty);
    334         if (STy->isOpaque()) return true;
    335         for (StructType::element_iterator I = STy->element_begin(),
    336                  E = STy->element_end(); I != E; ++I) {
    337           Type *InnerTy = *I;
    338           if (isa<PointerType>(InnerTy)) return true;
    339           if (isa<CompositeType>(InnerTy))
    340             Types.push_back(InnerTy);
    341         }
    342         break;
    343       }
    344     }
    345     if (--Limit == 0) return true;
    346   } while (!Types.empty());
    347   return false;
    348 }
    349 
    350 /// Given a value that is stored to a global but never read, determine whether
    351 /// it's safe to remove the store and the chain of computation that feeds the
    352 /// store.
    353 static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
    354   do {
    355     if (isa<Constant>(V))
    356       return true;
    357     if (!V->hasOneUse())
    358       return false;
    359     if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
    360         isa<GlobalValue>(V))
    361       return false;
    362     if (isAllocationFn(V, TLI))
    363       return true;
    364 
    365     Instruction *I = cast<Instruction>(V);
    366     if (I->mayHaveSideEffects())
    367       return false;
    368     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
    369       if (!GEP->hasAllConstantIndices())
    370         return false;
    371     } else if (I->getNumOperands() != 1) {
    372       return false;
    373     }
    374 
    375     V = I->getOperand(0);
    376   } while (1);
    377 }
    378 
    379 /// CleanupPointerRootUsers - This GV is a pointer root.  Loop over all users
    380 /// of the global and clean up any that obviously don't assign the global a
    381 /// value that isn't dynamically allocated.
    382 ///
    383 static bool CleanupPointerRootUsers(GlobalVariable *GV,
    384                                     const TargetLibraryInfo *TLI) {
    385   // A brief explanation of leak checkers.  The goal is to find bugs where
    386   // pointers are forgotten, causing an accumulating growth in memory
    387   // usage over time.  The common strategy for leak checkers is to whitelist the
    388   // memory pointed to by globals at exit.  This is popular because it also
    389   // solves another problem where the main thread of a C++ program may shut down
    390   // before other threads that are still expecting to use those globals.  To
    391   // handle that case, we expect the program may create a singleton and never
    392   // destroy it.
    393 
    394   bool Changed = false;
    395 
    396   // If Dead[n].first is the only use of a malloc result, we can delete its
    397   // chain of computation and the store to the global in Dead[n].second.
    398   SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
    399 
    400   // Constants can't be pointers to dynamically allocated memory.
    401   for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end();
    402        UI != E;) {
    403     User *U = *UI++;
    404     if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
    405       Value *V = SI->getValueOperand();
    406       if (isa<Constant>(V)) {
    407         Changed = true;
    408         SI->eraseFromParent();
    409       } else if (Instruction *I = dyn_cast<Instruction>(V)) {
    410         if (I->hasOneUse())
    411           Dead.push_back(std::make_pair(I, SI));
    412       }
    413     } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) {
    414       if (isa<Constant>(MSI->getValue())) {
    415         Changed = true;
    416         MSI->eraseFromParent();
    417       } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) {
    418         if (I->hasOneUse())
    419           Dead.push_back(std::make_pair(I, MSI));
    420       }
    421     } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) {
    422       GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource());
    423       if (MemSrc && MemSrc->isConstant()) {
    424         Changed = true;
    425         MTI->eraseFromParent();
    426       } else if (Instruction *I = dyn_cast<Instruction>(MemSrc)) {
    427         if (I->hasOneUse())
    428           Dead.push_back(std::make_pair(I, MTI));
    429       }
    430     } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
    431       if (CE->use_empty()) {
    432         CE->destroyConstant();
    433         Changed = true;
    434       }
    435     } else if (Constant *C = dyn_cast<Constant>(U)) {
    436       if (SafeToDestroyConstant(C)) {
    437         C->destroyConstant();
    438         // This could have invalidated UI, start over from scratch.
    439         Dead.clear();
    440         CleanupPointerRootUsers(GV, TLI);
    441         return true;
    442       }
    443     }
    444   }
    445 
    446   for (int i = 0, e = Dead.size(); i != e; ++i) {
    447     if (IsSafeComputationToRemove(Dead[i].first, TLI)) {
    448       Dead[i].second->eraseFromParent();
    449       Instruction *I = Dead[i].first;
    450       do {
    451         if (isAllocationFn(I, TLI))
    452           break;
    453         Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
    454         if (!J)
    455           break;
    456         I->eraseFromParent();
    457         I = J;
    458       } while (1);
    459       I->eraseFromParent();
    460     }
    461   }
    462 
    463   return Changed;
    464 }
    465 
    466 /// CleanupConstantGlobalUsers - We just marked GV constant.  Loop over all
    467 /// users of the global, cleaning up the obvious ones.  This is largely just a
    468 /// quick scan over the use list to clean up the easy and obvious cruft.  This
    469 /// returns true if it made a change.
    470 static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
    471                                        DataLayout *TD, TargetLibraryInfo *TLI) {
    472   bool Changed = false;
    473   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) {
    474     User *U = *UI++;
    475 
    476     if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
    477       if (Init) {
    478         // Replace the load with the initializer.
    479         LI->replaceAllUsesWith(Init);
    480         LI->eraseFromParent();
    481         Changed = true;
    482       }
    483     } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
    484       // Store must be unreachable or storing Init into the global.
    485       SI->eraseFromParent();
    486       Changed = true;
    487     } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
    488       if (CE->getOpcode() == Instruction::GetElementPtr) {
    489         Constant *SubInit = 0;
    490         if (Init)
    491           SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
    492         Changed |= CleanupConstantGlobalUsers(CE, SubInit, TD, TLI);
    493       } else if (CE->getOpcode() == Instruction::BitCast &&
    494                  CE->getType()->isPointerTy()) {
    495         // Pointer cast, delete any stores and memsets to the global.
    496         Changed |= CleanupConstantGlobalUsers(CE, 0, TD, TLI);
    497       }
    498 
    499       if (CE->use_empty()) {
    500         CE->destroyConstant();
    501         Changed = true;
    502       }
    503     } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
    504       // Do not transform "gepinst (gep constexpr (GV))" here, because forming
    505       // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
    506       // and will invalidate our notion of what Init is.
    507       Constant *SubInit = 0;
    508       if (!isa<ConstantExpr>(GEP->getOperand(0))) {
    509         ConstantExpr *CE =
    510           dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, TD, TLI));
    511         if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
    512           SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
    513 
    514         // If the initializer is an all-null value and we have an inbounds GEP,
    515         // we already know what the result of any load from that GEP is.
    516         // TODO: Handle splats.
    517         if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds())
    518           SubInit = Constant::getNullValue(GEP->getType()->getElementType());
    519       }
    520       Changed |= CleanupConstantGlobalUsers(GEP, SubInit, TD, TLI);
    521 
    522       if (GEP->use_empty()) {
    523         GEP->eraseFromParent();
    524         Changed = true;
    525       }
    526     } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
    527       if (MI->getRawDest() == V) {
    528         MI->eraseFromParent();
    529         Changed = true;
    530       }
    531 
    532     } else if (Constant *C = dyn_cast<Constant>(U)) {
    533       // If we have a chain of dead constantexprs or other things dangling from
    534       // us, and if they are all dead, nuke them without remorse.
    535       if (SafeToDestroyConstant(C)) {
    536         C->destroyConstant();
    537         // This could have invalidated UI, start over from scratch.
    538         CleanupConstantGlobalUsers(V, Init, TD, TLI);
    539         return true;
    540       }
    541     }
    542   }
    543   return Changed;
    544 }
    545 
    546 /// isSafeSROAElementUse - Return true if the specified instruction is a safe
    547 /// user of a derived expression from a global that we want to SROA.
    548 static bool isSafeSROAElementUse(Value *V) {
    549   // We might have a dead and dangling constant hanging off of here.
    550   if (Constant *C = dyn_cast<Constant>(V))
    551     return SafeToDestroyConstant(C);
    552 
    553   Instruction *I = dyn_cast<Instruction>(V);
    554   if (!I) return false;
    555 
    556   // Loads are ok.
    557   if (isa<LoadInst>(I)) return true;
    558 
    559   // Stores *to* the pointer are ok.
    560   if (StoreInst *SI = dyn_cast<StoreInst>(I))
    561     return SI->getOperand(0) != V;
    562 
    563   // Otherwise, it must be a GEP.
    564   GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
    565   if (GEPI == 0) return false;
    566 
    567   if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
    568       !cast<Constant>(GEPI->getOperand(1))->isNullValue())
    569     return false;
    570 
    571   for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end();
    572        I != E; ++I)
    573     if (!isSafeSROAElementUse(*I))
    574       return false;
    575   return true;
    576 }
    577 
    578 
    579 /// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value.
    580 /// Look at it and its uses and decide whether it is safe to SROA this global.
    581 ///
    582 static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
    583   // The user of the global must be a GEP Inst or a ConstantExpr GEP.
    584   if (!isa<GetElementPtrInst>(U) &&
    585       (!isa<ConstantExpr>(U) ||
    586        cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
    587     return false;
    588 
    589   // Check to see if this ConstantExpr GEP is SRA'able.  In particular, we
    590   // don't like < 3 operand CE's, and we don't like non-constant integer
    591   // indices.  This enforces that all uses are 'gep GV, 0, C, ...' for some
    592   // value of C.
    593   if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
    594       !cast<Constant>(U->getOperand(1))->isNullValue() ||
    595       !isa<ConstantInt>(U->getOperand(2)))
    596     return false;
    597 
    598   gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
    599   ++GEPI;  // Skip over the pointer index.
    600 
    601   // If this is a use of an array allocation, do a bit more checking for sanity.
    602   if (ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) {
    603     uint64_t NumElements = AT->getNumElements();
    604     ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
    605 
    606     // Check to make sure that index falls within the array.  If not,
    607     // something funny is going on, so we won't do the optimization.
    608     //
    609     if (Idx->getZExtValue() >= NumElements)
    610       return false;
    611 
    612     // We cannot scalar repl this level of the array unless any array
    613     // sub-indices are in-range constants.  In particular, consider:
    614     // A[0][i].  We cannot know that the user isn't doing invalid things like
    615     // allowing i to index an out-of-range subscript that accesses A[1].
    616     //
    617     // Scalar replacing *just* the outer index of the array is probably not
    618     // going to be a win anyway, so just give up.
    619     for (++GEPI; // Skip array index.
    620          GEPI != E;
    621          ++GEPI) {
    622       uint64_t NumElements;
    623       if (ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI))
    624         NumElements = SubArrayTy->getNumElements();
    625       else if (VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI))
    626         NumElements = SubVectorTy->getNumElements();
    627       else {
    628         assert((*GEPI)->isStructTy() &&
    629                "Indexed GEP type is not array, vector, or struct!");
    630         continue;
    631       }
    632 
    633       ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
    634       if (!IdxVal || IdxVal->getZExtValue() >= NumElements)
    635         return false;
    636     }
    637   }
    638 
    639   for (Value::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I)
    640     if (!isSafeSROAElementUse(*I))
    641       return false;
    642   return true;
    643 }
    644 
    645 /// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it
    646 /// is safe for us to perform this transformation.
    647 ///
    648 static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
    649   for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end();
    650        UI != E; ++UI) {
    651     if (!IsUserOfGlobalSafeForSRA(*UI, GV))
    652       return false;
    653   }
    654   return true;
    655 }
    656 
    657 
    658 /// SRAGlobal - Perform scalar replacement of aggregates on the specified global
    659 /// variable.  This opens the door for other optimizations by exposing the
    660 /// behavior of the program in a more fine-grained way.  We have determined that
    661 /// this transformation is safe already.  We return the first global variable we
    662 /// insert so that the caller can reprocess it.
    663 static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &TD) {
    664   // Make sure this global only has simple uses that we can SRA.
    665   if (!GlobalUsersSafeToSRA(GV))
    666     return 0;
    667 
    668   assert(GV->hasLocalLinkage() && !GV->isConstant());
    669   Constant *Init = GV->getInitializer();
    670   Type *Ty = Init->getType();
    671 
    672   std::vector<GlobalVariable*> NewGlobals;
    673   Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
    674 
    675   // Get the alignment of the global, either explicit or target-specific.
    676   unsigned StartAlignment = GV->getAlignment();
    677   if (StartAlignment == 0)
    678     StartAlignment = TD.getABITypeAlignment(GV->getType());
    679 
    680   if (StructType *STy = dyn_cast<StructType>(Ty)) {
    681     NewGlobals.reserve(STy->getNumElements());
    682     const StructLayout &Layout = *TD.getStructLayout(STy);
    683     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
    684       Constant *In = Init->getAggregateElement(i);
    685       assert(In && "Couldn't get element of initializer?");
    686       GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
    687                                                GlobalVariable::InternalLinkage,
    688                                                In, GV->getName()+"."+Twine(i),
    689                                                GV->getThreadLocalMode(),
    690                                               GV->getType()->getAddressSpace());
    691       Globals.insert(GV, NGV);
    692       NewGlobals.push_back(NGV);
    693 
    694       // Calculate the known alignment of the field.  If the original aggregate
    695       // had 256 byte alignment for example, something might depend on that:
    696       // propagate info to each field.
    697       uint64_t FieldOffset = Layout.getElementOffset(i);
    698       unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset);
    699       if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i)))
    700         NGV->setAlignment(NewAlign);
    701     }
    702   } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
    703     unsigned NumElements = 0;
    704     if (ArrayType *ATy = dyn_cast<ArrayType>(STy))
    705       NumElements = ATy->getNumElements();
    706     else
    707       NumElements = cast<VectorType>(STy)->getNumElements();
    708 
    709     if (NumElements > 16 && GV->hasNUsesOrMore(16))
    710       return 0; // It's not worth it.
    711     NewGlobals.reserve(NumElements);
    712 
    713     uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType());
    714     unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
    715     for (unsigned i = 0, e = NumElements; i != e; ++i) {
    716       Constant *In = Init->getAggregateElement(i);
    717       assert(In && "Couldn't get element of initializer?");
    718 
    719       GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
    720                                                GlobalVariable::InternalLinkage,
    721                                                In, GV->getName()+"."+Twine(i),
    722                                                GV->getThreadLocalMode(),
    723                                               GV->getType()->getAddressSpace());
    724       Globals.insert(GV, NGV);
    725       NewGlobals.push_back(NGV);
    726 
    727       // Calculate the known alignment of the field.  If the original aggregate
    728       // had 256 byte alignment for example, something might depend on that:
    729       // propagate info to each field.
    730       unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i);
    731       if (NewAlign > EltAlign)
    732         NGV->setAlignment(NewAlign);
    733     }
    734   }
    735 
    736   if (NewGlobals.empty())
    737     return 0;
    738 
    739   DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
    740 
    741   Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
    742 
    743   // Loop over all of the uses of the global, replacing the constantexpr geps,
    744   // with smaller constantexpr geps or direct references.
    745   while (!GV->use_empty()) {
    746     User *GEP = GV->use_back();
    747     assert(((isa<ConstantExpr>(GEP) &&
    748              cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)||
    749             isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!");
    750 
    751     // Ignore the 1th operand, which has to be zero or else the program is quite
    752     // broken (undefined).  Get the 2nd operand, which is the structure or array
    753     // index.
    754     unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
    755     if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access.
    756 
    757     Value *NewPtr = NewGlobals[Val];
    758 
    759     // Form a shorter GEP if needed.
    760     if (GEP->getNumOperands() > 3) {
    761       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) {
    762         SmallVector<Constant*, 8> Idxs;
    763         Idxs.push_back(NullInt);
    764         for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
    765           Idxs.push_back(CE->getOperand(i));
    766         NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), Idxs);
    767       } else {
    768         GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
    769         SmallVector<Value*, 8> Idxs;
    770         Idxs.push_back(NullInt);
    771         for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
    772           Idxs.push_back(GEPI->getOperand(i));
    773         NewPtr = GetElementPtrInst::Create(NewPtr, Idxs,
    774                                            GEPI->getName()+"."+Twine(Val),GEPI);
    775       }
    776     }
    777     GEP->replaceAllUsesWith(NewPtr);
    778 
    779     if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP))
    780       GEPI->eraseFromParent();
    781     else
    782       cast<ConstantExpr>(GEP)->destroyConstant();
    783   }
    784 
    785   // Delete the old global, now that it is dead.
    786   Globals.erase(GV);
    787   ++NumSRA;
    788 
    789   // Loop over the new globals array deleting any globals that are obviously
    790   // dead.  This can arise due to scalarization of a structure or an array that
    791   // has elements that are dead.
    792   unsigned FirstGlobal = 0;
    793   for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i)
    794     if (NewGlobals[i]->use_empty()) {
    795       Globals.erase(NewGlobals[i]);
    796       if (FirstGlobal == i) ++FirstGlobal;
    797     }
    798 
    799   return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : 0;
    800 }
    801 
    802 /// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
    803 /// value will trap if the value is dynamically null.  PHIs keeps track of any
    804 /// phi nodes we've seen to avoid reprocessing them.
    805 static bool AllUsesOfValueWillTrapIfNull(const Value *V,
    806                                          SmallPtrSet<const PHINode*, 8> &PHIs) {
    807   for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
    808        ++UI) {
    809     const User *U = *UI;
    810 
    811     if (isa<LoadInst>(U)) {
    812       // Will trap.
    813     } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
    814       if (SI->getOperand(0) == V) {
    815         //cerr << "NONTRAPPING USE: " << *U;
    816         return false;  // Storing the value.
    817       }
    818     } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
    819       if (CI->getCalledValue() != V) {
    820         //cerr << "NONTRAPPING USE: " << *U;
    821         return false;  // Not calling the ptr
    822       }
    823     } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
    824       if (II->getCalledValue() != V) {
    825         //cerr << "NONTRAPPING USE: " << *U;
    826         return false;  // Not calling the ptr
    827       }
    828     } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
    829       if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
    830     } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
    831       if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
    832     } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
    833       // If we've already seen this phi node, ignore it, it has already been
    834       // checked.
    835       if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
    836         return false;
    837     } else if (isa<ICmpInst>(U) &&
    838                isa<ConstantPointerNull>(UI->getOperand(1))) {
    839       // Ignore icmp X, null
    840     } else {
    841       //cerr << "NONTRAPPING USE: " << *U;
    842       return false;
    843     }
    844   }
    845   return true;
    846 }
    847 
    848 /// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads
    849 /// from GV will trap if the loaded value is null.  Note that this also permits
    850 /// comparisons of the loaded value against null, as a special case.
    851 static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
    852   for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
    853        UI != E; ++UI) {
    854     const User *U = *UI;
    855 
    856     if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
    857       SmallPtrSet<const PHINode*, 8> PHIs;
    858       if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
    859         return false;
    860     } else if (isa<StoreInst>(U)) {
    861       // Ignore stores to the global.
    862     } else {
    863       // We don't know or understand this user, bail out.
    864       //cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
    865       return false;
    866     }
    867   }
    868   return true;
    869 }
    870 
    871 static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
    872   bool Changed = false;
    873   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) {
    874     Instruction *I = cast<Instruction>(*UI++);
    875     if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
    876       LI->setOperand(0, NewV);
    877       Changed = true;
    878     } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
    879       if (SI->getOperand(1) == V) {
    880         SI->setOperand(1, NewV);
    881         Changed = true;
    882       }
    883     } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
    884       CallSite CS(I);
    885       if (CS.getCalledValue() == V) {
    886         // Calling through the pointer!  Turn into a direct call, but be careful
    887         // that the pointer is not also being passed as an argument.
    888         CS.setCalledFunction(NewV);
    889         Changed = true;
    890         bool PassedAsArg = false;
    891         for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
    892           if (CS.getArgument(i) == V) {
    893             PassedAsArg = true;
    894             CS.setArgument(i, NewV);
    895           }
    896 
    897         if (PassedAsArg) {
    898           // Being passed as an argument also.  Be careful to not invalidate UI!
    899           UI = V->use_begin();
    900         }
    901       }
    902     } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
    903       Changed |= OptimizeAwayTrappingUsesOfValue(CI,
    904                                 ConstantExpr::getCast(CI->getOpcode(),
    905                                                       NewV, CI->getType()));
    906       if (CI->use_empty()) {
    907         Changed = true;
    908         CI->eraseFromParent();
    909       }
    910     } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
    911       // Should handle GEP here.
    912       SmallVector<Constant*, 8> Idxs;
    913       Idxs.reserve(GEPI->getNumOperands()-1);
    914       for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
    915            i != e; ++i)
    916         if (Constant *C = dyn_cast<Constant>(*i))
    917           Idxs.push_back(C);
    918         else
    919           break;
    920       if (Idxs.size() == GEPI->getNumOperands()-1)
    921         Changed |= OptimizeAwayTrappingUsesOfValue(GEPI,
    922                           ConstantExpr::getGetElementPtr(NewV, Idxs));
    923       if (GEPI->use_empty()) {
    924         Changed = true;
    925         GEPI->eraseFromParent();
    926       }
    927     }
    928   }
    929 
    930   return Changed;
    931 }
    932 
    933 
    934 /// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null
    935 /// value stored into it.  If there are uses of the loaded value that would trap
    936 /// if the loaded value is dynamically null, then we know that they cannot be
    937 /// reachable with a null optimize away the load.
    938 static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
    939                                             DataLayout *TD,
    940                                             TargetLibraryInfo *TLI) {
    941   bool Changed = false;
    942 
    943   // Keep track of whether we are able to remove all the uses of the global
    944   // other than the store that defines it.
    945   bool AllNonStoreUsesGone = true;
    946 
    947   // Replace all uses of loads with uses of uses of the stored value.
    948   for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){
    949     User *GlobalUser = *GUI++;
    950     if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
    951       Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
    952       // If we were able to delete all uses of the loads
    953       if (LI->use_empty()) {
    954         LI->eraseFromParent();
    955         Changed = true;
    956       } else {
    957         AllNonStoreUsesGone = false;
    958       }
    959     } else if (isa<StoreInst>(GlobalUser)) {
    960       // Ignore the store that stores "LV" to the global.
    961       assert(GlobalUser->getOperand(1) == GV &&
    962              "Must be storing *to* the global");
    963     } else {
    964       AllNonStoreUsesGone = false;
    965 
    966       // If we get here we could have other crazy uses that are transitively
    967       // loaded.
    968       assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
    969               isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||
    970               isa<BitCastInst>(GlobalUser) ||
    971               isa<GetElementPtrInst>(GlobalUser)) &&
    972              "Only expect load and stores!");
    973     }
    974   }
    975 
    976   if (Changed) {
    977     DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV);
    978     ++NumGlobUses;
    979   }
    980 
    981   // If we nuked all of the loads, then none of the stores are needed either,
    982   // nor is the global.
    983   if (AllNonStoreUsesGone) {
    984     if (isLeakCheckerRoot(GV)) {
    985       Changed |= CleanupPointerRootUsers(GV, TLI);
    986     } else {
    987       Changed = true;
    988       CleanupConstantGlobalUsers(GV, 0, TD, TLI);
    989     }
    990     if (GV->use_empty()) {
    991       DEBUG(dbgs() << "  *** GLOBAL NOW DEAD!\n");
    992       Changed = true;
    993       GV->eraseFromParent();
    994       ++NumDeleted;
    995     }
    996   }
    997   return Changed;
    998 }
    999 
   1000 /// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
   1001 /// instructions that are foldable.
   1002 static void ConstantPropUsersOf(Value *V,
   1003                                 DataLayout *TD, TargetLibraryInfo *TLI) {
   1004   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; )
   1005     if (Instruction *I = dyn_cast<Instruction>(*UI++))
   1006       if (Constant *NewC = ConstantFoldInstruction(I, TD, TLI)) {
   1007         I->replaceAllUsesWith(NewC);
   1008 
   1009         // Advance UI to the next non-I use to avoid invalidating it!
   1010         // Instructions could multiply use V.
   1011         while (UI != E && *UI == I)
   1012           ++UI;
   1013         I->eraseFromParent();
   1014       }
   1015 }
   1016 
   1017 /// OptimizeGlobalAddressOfMalloc - This function takes the specified global
   1018 /// variable, and transforms the program as if it always contained the result of
   1019 /// the specified malloc.  Because it is always the result of the specified
   1020 /// malloc, there is no reason to actually DO the malloc.  Instead, turn the
   1021 /// malloc into a global, and any loads of GV as uses of the new global.
   1022 static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
   1023                                                      CallInst *CI,
   1024                                                      Type *AllocTy,
   1025                                                      ConstantInt *NElements,
   1026                                                      DataLayout *TD,
   1027                                                      TargetLibraryInfo *TLI) {
   1028   DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI << '\n');
   1029 
   1030   Type *GlobalType;
   1031   if (NElements->getZExtValue() == 1)
   1032     GlobalType = AllocTy;
   1033   else
   1034     // If we have an array allocation, the global variable is of an array.
   1035     GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue());
   1036 
   1037   // Create the new global variable.  The contents of the malloc'd memory is
   1038   // undefined, so initialize with an undef value.
   1039   GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
   1040                                              GlobalType, false,
   1041                                              GlobalValue::InternalLinkage,
   1042                                              UndefValue::get(GlobalType),
   1043                                              GV->getName()+".body",
   1044                                              GV,
   1045                                              GV->getThreadLocalMode());
   1046 
   1047   // If there are bitcast users of the malloc (which is typical, usually we have
   1048   // a malloc + bitcast) then replace them with uses of the new global.  Update
   1049   // other users to use the global as well.
   1050   BitCastInst *TheBC = 0;
   1051   while (!CI->use_empty()) {
   1052     Instruction *User = cast<Instruction>(CI->use_back());
   1053     if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
   1054       if (BCI->getType() == NewGV->getType()) {
   1055         BCI->replaceAllUsesWith(NewGV);
   1056         BCI->eraseFromParent();
   1057       } else {
   1058         BCI->setOperand(0, NewGV);
   1059       }
   1060     } else {
   1061       if (TheBC == 0)
   1062         TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
   1063       User->replaceUsesOfWith(CI, TheBC);
   1064     }
   1065   }
   1066 
   1067   Constant *RepValue = NewGV;
   1068   if (NewGV->getType() != GV->getType()->getElementType())
   1069     RepValue = ConstantExpr::getBitCast(RepValue,
   1070                                         GV->getType()->getElementType());
   1071 
   1072   // If there is a comparison against null, we will insert a global bool to
   1073   // keep track of whether the global was initialized yet or not.
   1074   GlobalVariable *InitBool =
   1075     new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
   1076                        GlobalValue::InternalLinkage,
   1077                        ConstantInt::getFalse(GV->getContext()),
   1078                        GV->getName()+".init", GV->getThreadLocalMode());
   1079   bool InitBoolUsed = false;
   1080 
   1081   // Loop over all uses of GV, processing them in turn.
   1082   while (!GV->use_empty()) {
   1083     if (StoreInst *SI = dyn_cast<StoreInst>(GV->use_back())) {
   1084       // The global is initialized when the store to it occurs.
   1085       new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0,
   1086                     SI->getOrdering(), SI->getSynchScope(), SI);
   1087       SI->eraseFromParent();
   1088       continue;
   1089     }
   1090 
   1091     LoadInst *LI = cast<LoadInst>(GV->use_back());
   1092     while (!LI->use_empty()) {
   1093       Use &LoadUse = LI->use_begin().getUse();
   1094       if (!isa<ICmpInst>(LoadUse.getUser())) {
   1095         LoadUse = RepValue;
   1096         continue;
   1097       }
   1098 
   1099       ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
   1100       // Replace the cmp X, 0 with a use of the bool value.
   1101       // Sink the load to where the compare was, if atomic rules allow us to.
   1102       Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0,
   1103                                LI->getOrdering(), LI->getSynchScope(),
   1104                                LI->isUnordered() ? (Instruction*)ICI : LI);
   1105       InitBoolUsed = true;
   1106       switch (ICI->getPredicate()) {
   1107       default: llvm_unreachable("Unknown ICmp Predicate!");
   1108       case ICmpInst::ICMP_ULT:
   1109       case ICmpInst::ICMP_SLT:   // X < null -> always false
   1110         LV = ConstantInt::getFalse(GV->getContext());
   1111         break;
   1112       case ICmpInst::ICMP_ULE:
   1113       case ICmpInst::ICMP_SLE:
   1114       case ICmpInst::ICMP_EQ:
   1115         LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
   1116         break;
   1117       case ICmpInst::ICMP_NE:
   1118       case ICmpInst::ICMP_UGE:
   1119       case ICmpInst::ICMP_SGE:
   1120       case ICmpInst::ICMP_UGT:
   1121       case ICmpInst::ICMP_SGT:
   1122         break;  // no change.
   1123       }
   1124       ICI->replaceAllUsesWith(LV);
   1125       ICI->eraseFromParent();
   1126     }
   1127     LI->eraseFromParent();
   1128   }
   1129 
   1130   // If the initialization boolean was used, insert it, otherwise delete it.
   1131   if (!InitBoolUsed) {
   1132     while (!InitBool->use_empty())  // Delete initializations
   1133       cast<StoreInst>(InitBool->use_back())->eraseFromParent();
   1134     delete InitBool;
   1135   } else
   1136     GV->getParent()->getGlobalList().insert(GV, InitBool);
   1137 
   1138   // Now the GV is dead, nuke it and the malloc..
   1139   GV->eraseFromParent();
   1140   CI->eraseFromParent();
   1141 
   1142   // To further other optimizations, loop over all users of NewGV and try to
   1143   // constant prop them.  This will promote GEP instructions with constant
   1144   // indices into GEP constant-exprs, which will allow global-opt to hack on it.
   1145   ConstantPropUsersOf(NewGV, TD, TLI);
   1146   if (RepValue != NewGV)
   1147     ConstantPropUsersOf(RepValue, TD, TLI);
   1148 
   1149   return NewGV;
   1150 }
   1151 
   1152 /// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking
   1153 /// to make sure that there are no complex uses of V.  We permit simple things
   1154 /// like dereferencing the pointer, but not storing through the address, unless
   1155 /// it is to the specified global.
   1156 static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
   1157                                                       const GlobalVariable *GV,
   1158                                          SmallPtrSet<const PHINode*, 8> &PHIs) {
   1159   for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
   1160        UI != E; ++UI) {
   1161     const Instruction *Inst = cast<Instruction>(*UI);
   1162 
   1163     if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
   1164       continue; // Fine, ignore.
   1165     }
   1166 
   1167     if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
   1168       if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
   1169         return false;  // Storing the pointer itself... bad.
   1170       continue; // Otherwise, storing through it, or storing into GV... fine.
   1171     }
   1172 
   1173     // Must index into the array and into the struct.
   1174     if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
   1175       if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
   1176         return false;
   1177       continue;
   1178     }
   1179 
   1180     if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
   1181       // PHIs are ok if all uses are ok.  Don't infinitely recurse through PHI
   1182       // cycles.
   1183       if (PHIs.insert(PN))
   1184         if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs))
   1185           return false;
   1186       continue;
   1187     }
   1188 
   1189     if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
   1190       if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
   1191         return false;
   1192       continue;
   1193     }
   1194 
   1195     return false;
   1196   }
   1197   return true;
   1198 }
   1199 
   1200 /// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV
   1201 /// somewhere.  Transform all uses of the allocation into loads from the
   1202 /// global and uses of the resultant pointer.  Further, delete the store into
   1203 /// GV.  This assumes that these value pass the
   1204 /// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
   1205 static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
   1206                                           GlobalVariable *GV) {
   1207   while (!Alloc->use_empty()) {
   1208     Instruction *U = cast<Instruction>(*Alloc->use_begin());
   1209     Instruction *InsertPt = U;
   1210     if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
   1211       // If this is the store of the allocation into the global, remove it.
   1212       if (SI->getOperand(1) == GV) {
   1213         SI->eraseFromParent();
   1214         continue;
   1215       }
   1216     } else if (PHINode *PN = dyn_cast<PHINode>(U)) {
   1217       // Insert the load in the corresponding predecessor, not right before the
   1218       // PHI.
   1219       InsertPt = PN->getIncomingBlock(Alloc->use_begin())->getTerminator();
   1220     } else if (isa<BitCastInst>(U)) {
   1221       // Must be bitcast between the malloc and store to initialize the global.
   1222       ReplaceUsesOfMallocWithGlobal(U, GV);
   1223       U->eraseFromParent();
   1224       continue;
   1225     } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
   1226       // If this is a "GEP bitcast" and the user is a store to the global, then
   1227       // just process it as a bitcast.
   1228       if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse())
   1229         if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->use_back()))
   1230           if (SI->getOperand(1) == GV) {
   1231             // Must be bitcast GEP between the malloc and store to initialize
   1232             // the global.
   1233             ReplaceUsesOfMallocWithGlobal(GEPI, GV);
   1234             GEPI->eraseFromParent();
   1235             continue;
   1236           }
   1237     }
   1238 
   1239     // Insert a load from the global, and use it instead of the malloc.
   1240     Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
   1241     U->replaceUsesOfWith(Alloc, NL);
   1242   }
   1243 }
   1244 
   1245 /// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi
   1246 /// of a load) are simple enough to perform heap SRA on.  This permits GEP's
   1247 /// that index through the array and struct field, icmps of null, and PHIs.
   1248 static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
   1249                         SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs,
   1250                         SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) {
   1251   // We permit two users of the load: setcc comparing against the null
   1252   // pointer, and a getelementptr of a specific form.
   1253   for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
   1254        ++UI) {
   1255     const Instruction *User = cast<Instruction>(*UI);
   1256 
   1257     // Comparison against null is ok.
   1258     if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) {
   1259       if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
   1260         return false;
   1261       continue;
   1262     }
   1263 
   1264     // getelementptr is also ok, but only a simple form.
   1265     if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
   1266       // Must index into the array and into the struct.
   1267       if (GEPI->getNumOperands() < 3)
   1268         return false;
   1269 
   1270       // Otherwise the GEP is ok.
   1271       continue;
   1272     }
   1273 
   1274     if (const PHINode *PN = dyn_cast<PHINode>(User)) {
   1275       if (!LoadUsingPHIsPerLoad.insert(PN))
   1276         // This means some phi nodes are dependent on each other.
   1277         // Avoid infinite looping!
   1278         return false;
   1279       if (!LoadUsingPHIs.insert(PN))
   1280         // If we have already analyzed this PHI, then it is safe.
   1281         continue;
   1282 
   1283       // Make sure all uses of the PHI are simple enough to transform.
   1284       if (!LoadUsesSimpleEnoughForHeapSRA(PN,
   1285                                           LoadUsingPHIs, LoadUsingPHIsPerLoad))
   1286         return false;
   1287 
   1288       continue;
   1289     }
   1290 
   1291     // Otherwise we don't know what this is, not ok.
   1292     return false;
   1293   }
   1294 
   1295   return true;
   1296 }
   1297 
   1298 
   1299 /// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
   1300 /// GV are simple enough to perform HeapSRA, return true.
   1301 static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
   1302                                                     Instruction *StoredVal) {
   1303   SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
   1304   SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad;
   1305   for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
   1306        UI != E; ++UI)
   1307     if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
   1308       if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
   1309                                           LoadUsingPHIsPerLoad))
   1310         return false;
   1311       LoadUsingPHIsPerLoad.clear();
   1312     }
   1313 
   1314   // If we reach here, we know that all uses of the loads and transitive uses
   1315   // (through PHI nodes) are simple enough to transform.  However, we don't know
   1316   // that all inputs the to the PHI nodes are in the same equivalence sets.
   1317   // Check to verify that all operands of the PHIs are either PHIS that can be
   1318   // transformed, loads from GV, or MI itself.
   1319   for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin()
   1320        , E = LoadUsingPHIs.end(); I != E; ++I) {
   1321     const PHINode *PN = *I;
   1322     for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
   1323       Value *InVal = PN->getIncomingValue(op);
   1324 
   1325       // PHI of the stored value itself is ok.
   1326       if (InVal == StoredVal) continue;
   1327 
   1328       if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
   1329         // One of the PHIs in our set is (optimistically) ok.
   1330         if (LoadUsingPHIs.count(InPN))
   1331           continue;
   1332         return false;
   1333       }
   1334 
   1335       // Load from GV is ok.
   1336       if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
   1337         if (LI->getOperand(0) == GV)
   1338           continue;
   1339 
   1340       // UNDEF? NULL?
   1341 
   1342       // Anything else is rejected.
   1343       return false;
   1344     }
   1345   }
   1346 
   1347   return true;
   1348 }
   1349 
   1350 static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
   1351                DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
   1352                    std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
   1353   std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
   1354 
   1355   if (FieldNo >= FieldVals.size())
   1356     FieldVals.resize(FieldNo+1);
   1357 
   1358   // If we already have this value, just reuse the previously scalarized
   1359   // version.
   1360   if (Value *FieldVal = FieldVals[FieldNo])
   1361     return FieldVal;
   1362 
   1363   // Depending on what instruction this is, we have several cases.
   1364   Value *Result;
   1365   if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
   1366     // This is a scalarized version of the load from the global.  Just create
   1367     // a new Load of the scalarized global.
   1368     Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
   1369                                            InsertedScalarizedValues,
   1370                                            PHIsToRewrite),
   1371                           LI->getName()+".f"+Twine(FieldNo), LI);
   1372   } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
   1373     // PN's type is pointer to struct.  Make a new PHI of pointer to struct
   1374     // field.
   1375     StructType *ST =
   1376       cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
   1377 
   1378     PHINode *NewPN =
   1379      PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
   1380                      PN->getNumIncomingValues(),
   1381                      PN->getName()+".f"+Twine(FieldNo), PN);
   1382     Result = NewPN;
   1383     PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
   1384   } else {
   1385     llvm_unreachable("Unknown usable value");
   1386   }
   1387 
   1388   return FieldVals[FieldNo] = Result;
   1389 }
   1390 
   1391 /// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
   1392 /// the load, rewrite the derived value to use the HeapSRoA'd load.
   1393 static void RewriteHeapSROALoadUser(Instruction *LoadUser,
   1394              DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
   1395                    std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
   1396   // If this is a comparison against null, handle it.
   1397   if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
   1398     assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
   1399     // If we have a setcc of the loaded pointer, we can use a setcc of any
   1400     // field.
   1401     Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
   1402                                    InsertedScalarizedValues, PHIsToRewrite);
   1403 
   1404     Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
   1405                               Constant::getNullValue(NPtr->getType()),
   1406                               SCI->getName());
   1407     SCI->replaceAllUsesWith(New);
   1408     SCI->eraseFromParent();
   1409     return;
   1410   }
   1411 
   1412   // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
   1413   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
   1414     assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
   1415            && "Unexpected GEPI!");
   1416 
   1417     // Load the pointer for this field.
   1418     unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
   1419     Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
   1420                                      InsertedScalarizedValues, PHIsToRewrite);
   1421 
   1422     // Create the new GEP idx vector.
   1423     SmallVector<Value*, 8> GEPIdx;
   1424     GEPIdx.push_back(GEPI->getOperand(1));
   1425     GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
   1426 
   1427     Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx,
   1428                                              GEPI->getName(), GEPI);
   1429     GEPI->replaceAllUsesWith(NGEPI);
   1430     GEPI->eraseFromParent();
   1431     return;
   1432   }
   1433 
   1434   // Recursively transform the users of PHI nodes.  This will lazily create the
   1435   // PHIs that are needed for individual elements.  Keep track of what PHIs we
   1436   // see in InsertedScalarizedValues so that we don't get infinite loops (very
   1437   // antisocial).  If the PHI is already in InsertedScalarizedValues, it has
   1438   // already been seen first by another load, so its uses have already been
   1439   // processed.
   1440   PHINode *PN = cast<PHINode>(LoadUser);
   1441   if (!InsertedScalarizedValues.insert(std::make_pair(PN,
   1442                                               std::vector<Value*>())).second)
   1443     return;
   1444 
   1445   // If this is the first time we've seen this PHI, recursively process all
   1446   // users.
   1447   for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) {
   1448     Instruction *User = cast<Instruction>(*UI++);
   1449     RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
   1450   }
   1451 }
   1452 
   1453 /// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global.  Ptr
   1454 /// is a value loaded from the global.  Eliminate all uses of Ptr, making them
   1455 /// use FieldGlobals instead.  All uses of loaded values satisfy
   1456 /// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
   1457 static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
   1458                DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
   1459                    std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
   1460   for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();
   1461        UI != E; ) {
   1462     Instruction *User = cast<Instruction>(*UI++);
   1463     RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
   1464   }
   1465 
   1466   if (Load->use_empty()) {
   1467     Load->eraseFromParent();
   1468     InsertedScalarizedValues.erase(Load);
   1469   }
   1470 }
   1471 
   1472 /// PerformHeapAllocSRoA - CI is an allocation of an array of structures.  Break
   1473 /// it up into multiple allocations of arrays of the fields.
   1474 static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
   1475                                             Value *NElems, DataLayout *TD,
   1476                                             const TargetLibraryInfo *TLI) {
   1477   DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *CI << '\n');
   1478   Type *MAT = getMallocAllocatedType(CI, TLI);
   1479   StructType *STy = cast<StructType>(MAT);
   1480 
   1481   // There is guaranteed to be at least one use of the malloc (storing
   1482   // it into GV).  If there are other uses, change them to be uses of
   1483   // the global to simplify later code.  This also deletes the store
   1484   // into GV.
   1485   ReplaceUsesOfMallocWithGlobal(CI, GV);
   1486 
   1487   // Okay, at this point, there are no users of the malloc.  Insert N
   1488   // new mallocs at the same place as CI, and N globals.
   1489   std::vector<Value*> FieldGlobals;
   1490   std::vector<Value*> FieldMallocs;
   1491 
   1492   for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
   1493     Type *FieldTy = STy->getElementType(FieldNo);
   1494     PointerType *PFieldTy = PointerType::getUnqual(FieldTy);
   1495 
   1496     GlobalVariable *NGV =
   1497       new GlobalVariable(*GV->getParent(),
   1498                          PFieldTy, false, GlobalValue::InternalLinkage,
   1499                          Constant::getNullValue(PFieldTy),
   1500                          GV->getName() + ".f" + Twine(FieldNo), GV,
   1501                          GV->getThreadLocalMode());
   1502     FieldGlobals.push_back(NGV);
   1503 
   1504     unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
   1505     if (StructType *ST = dyn_cast<StructType>(FieldTy))
   1506       TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
   1507     Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
   1508     Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
   1509                                         ConstantInt::get(IntPtrTy, TypeSize),
   1510                                         NElems, 0,
   1511                                         CI->getName() + ".f" + Twine(FieldNo));
   1512     FieldMallocs.push_back(NMI);
   1513     new StoreInst(NMI, NGV, CI);
   1514   }
   1515 
   1516   // The tricky aspect of this transformation is handling the case when malloc
   1517   // fails.  In the original code, malloc failing would set the result pointer
   1518   // of malloc to null.  In this case, some mallocs could succeed and others
   1519   // could fail.  As such, we emit code that looks like this:
   1520   //    F0 = malloc(field0)
   1521   //    F1 = malloc(field1)
   1522   //    F2 = malloc(field2)
   1523   //    if (F0 == 0 || F1 == 0 || F2 == 0) {
   1524   //      if (F0) { free(F0); F0 = 0; }
   1525   //      if (F1) { free(F1); F1 = 0; }
   1526   //      if (F2) { free(F2); F2 = 0; }
   1527   //    }
   1528   // The malloc can also fail if its argument is too large.
   1529   Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0);
   1530   Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0),
   1531                                   ConstantZero, "isneg");
   1532   for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
   1533     Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
   1534                              Constant::getNullValue(FieldMallocs[i]->getType()),
   1535                                "isnull");
   1536     RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
   1537   }
   1538 
   1539   // Split the basic block at the old malloc.
   1540   BasicBlock *OrigBB = CI->getParent();
   1541   BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
   1542 
   1543   // Create the block to check the first condition.  Put all these blocks at the
   1544   // end of the function as they are unlikely to be executed.
   1545   BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
   1546                                                 "malloc_ret_null",
   1547                                                 OrigBB->getParent());
   1548 
   1549   // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
   1550   // branch on RunningOr.
   1551   OrigBB->getTerminator()->eraseFromParent();
   1552   BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
   1553 
   1554   // Within the NullPtrBlock, we need to emit a comparison and branch for each
   1555   // pointer, because some may be null while others are not.
   1556   for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
   1557     Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
   1558     Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
   1559                               Constant::getNullValue(GVVal->getType()));
   1560     BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
   1561                                                OrigBB->getParent());
   1562     BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
   1563                                                OrigBB->getParent());
   1564     Instruction *BI = BranchInst::Create(FreeBlock, NextBlock,
   1565                                          Cmp, NullPtrBlock);
   1566 
   1567     // Fill in FreeBlock.
   1568     CallInst::CreateFree(GVVal, BI);
   1569     new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
   1570                   FreeBlock);
   1571     BranchInst::Create(NextBlock, FreeBlock);
   1572 
   1573     NullPtrBlock = NextBlock;
   1574   }
   1575 
   1576   BranchInst::Create(ContBB, NullPtrBlock);
   1577 
   1578   // CI is no longer needed, remove it.
   1579   CI->eraseFromParent();
   1580 
   1581   /// InsertedScalarizedLoads - As we process loads, if we can't immediately
   1582   /// update all uses of the load, keep track of what scalarized loads are
   1583   /// inserted for a given load.
   1584   DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
   1585   InsertedScalarizedValues[GV] = FieldGlobals;
   1586 
   1587   std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
   1588 
   1589   // Okay, the malloc site is completely handled.  All of the uses of GV are now
   1590   // loads, and all uses of those loads are simple.  Rewrite them to use loads
   1591   // of the per-field globals instead.
   1592   for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) {
   1593     Instruction *User = cast<Instruction>(*UI++);
   1594 
   1595     if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
   1596       RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
   1597       continue;
   1598     }
   1599 
   1600     // Must be a store of null.
   1601     StoreInst *SI = cast<StoreInst>(User);
   1602     assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
   1603            "Unexpected heap-sra user!");
   1604 
   1605     // Insert a store of null into each global.
   1606     for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
   1607       PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
   1608       Constant *Null = Constant::getNullValue(PT->getElementType());
   1609       new StoreInst(Null, FieldGlobals[i], SI);
   1610     }
   1611     // Erase the original store.
   1612     SI->eraseFromParent();
   1613   }
   1614 
   1615   // While we have PHIs that are interesting to rewrite, do it.
   1616   while (!PHIsToRewrite.empty()) {
   1617     PHINode *PN = PHIsToRewrite.back().first;
   1618     unsigned FieldNo = PHIsToRewrite.back().second;
   1619     PHIsToRewrite.pop_back();
   1620     PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
   1621     assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
   1622 
   1623     // Add all the incoming values.  This can materialize more phis.
   1624     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
   1625       Value *InVal = PN->getIncomingValue(i);
   1626       InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
   1627                                PHIsToRewrite);
   1628       FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
   1629     }
   1630   }
   1631 
   1632   // Drop all inter-phi links and any loads that made it this far.
   1633   for (DenseMap<Value*, std::vector<Value*> >::iterator
   1634        I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
   1635        I != E; ++I) {
   1636     if (PHINode *PN = dyn_cast<PHINode>(I->first))
   1637       PN->dropAllReferences();
   1638     else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
   1639       LI->dropAllReferences();
   1640   }
   1641 
   1642   // Delete all the phis and loads now that inter-references are dead.
   1643   for (DenseMap<Value*, std::vector<Value*> >::iterator
   1644        I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
   1645        I != E; ++I) {
   1646     if (PHINode *PN = dyn_cast<PHINode>(I->first))
   1647       PN->eraseFromParent();
   1648     else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
   1649       LI->eraseFromParent();
   1650   }
   1651 
   1652   // The old global is now dead, remove it.
   1653   GV->eraseFromParent();
   1654 
   1655   ++NumHeapSRA;
   1656   return cast<GlobalVariable>(FieldGlobals[0]);
   1657 }
   1658 
   1659 /// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
   1660 /// pointer global variable with a single value stored it that is a malloc or
   1661 /// cast of malloc.
   1662 static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
   1663                                                CallInst *CI,
   1664                                                Type *AllocTy,
   1665                                                AtomicOrdering Ordering,
   1666                                                Module::global_iterator &GVI,
   1667                                                DataLayout *TD,
   1668                                                TargetLibraryInfo *TLI) {
   1669   if (!TD)
   1670     return false;
   1671 
   1672   // If this is a malloc of an abstract type, don't touch it.
   1673   if (!AllocTy->isSized())
   1674     return false;
   1675 
   1676   // We can't optimize this global unless all uses of it are *known* to be
   1677   // of the malloc value, not of the null initializer value (consider a use
   1678   // that compares the global's value against zero to see if the malloc has
   1679   // been reached).  To do this, we check to see if all uses of the global
   1680   // would trap if the global were null: this proves that they must all
   1681   // happen after the malloc.
   1682   if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
   1683     return false;
   1684 
   1685   // We can't optimize this if the malloc itself is used in a complex way,
   1686   // for example, being stored into multiple globals.  This allows the
   1687   // malloc to be stored into the specified global, loaded icmp'd, and
   1688   // GEP'd.  These are all things we could transform to using the global
   1689   // for.
   1690   SmallPtrSet<const PHINode*, 8> PHIs;
   1691   if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
   1692     return false;
   1693 
   1694   // If we have a global that is only initialized with a fixed size malloc,
   1695   // transform the program to use global memory instead of malloc'd memory.
   1696   // This eliminates dynamic allocation, avoids an indirection accessing the
   1697   // data, and exposes the resultant global to further GlobalOpt.
   1698   // We cannot optimize the malloc if we cannot determine malloc array size.
   1699   Value *NElems = getMallocArraySize(CI, TD, TLI, true);
   1700   if (!NElems)
   1701     return false;
   1702 
   1703   if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
   1704     // Restrict this transformation to only working on small allocations
   1705     // (2048 bytes currently), as we don't want to introduce a 16M global or
   1706     // something.
   1707     if (NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
   1708       GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD, TLI);
   1709       return true;
   1710     }
   1711 
   1712   // If the allocation is an array of structures, consider transforming this
   1713   // into multiple malloc'd arrays, one for each field.  This is basically
   1714   // SRoA for malloc'd memory.
   1715 
   1716   if (Ordering != NotAtomic)
   1717     return false;
   1718 
   1719   // If this is an allocation of a fixed size array of structs, analyze as a
   1720   // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
   1721   if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
   1722     if (ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
   1723       AllocTy = AT->getElementType();
   1724 
   1725   StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
   1726   if (!AllocSTy)
   1727     return false;
   1728 
   1729   // This the structure has an unreasonable number of fields, leave it
   1730   // alone.
   1731   if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
   1732       AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
   1733 
   1734     // If this is a fixed size array, transform the Malloc to be an alloc of
   1735     // structs.  malloc [100 x struct],1 -> malloc struct, 100
   1736     if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
   1737       Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
   1738       unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
   1739       Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
   1740       Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
   1741       Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
   1742                                                    AllocSize, NumElements,
   1743                                                    0, CI->getName());
   1744       Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
   1745       CI->replaceAllUsesWith(Cast);
   1746       CI->eraseFromParent();
   1747       if (BitCastInst *BCI = dyn_cast<BitCastInst>(Malloc))
   1748         CI = cast<CallInst>(BCI->getOperand(0));
   1749       else
   1750         CI = cast<CallInst>(Malloc);
   1751     }
   1752 
   1753     GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, TLI, true),
   1754                                TD, TLI);
   1755     return true;
   1756   }
   1757 
   1758   return false;
   1759 }
   1760 
   1761 // OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
   1762 // that only one value (besides its initializer) is ever stored to the global.
   1763 static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
   1764                                      AtomicOrdering Ordering,
   1765                                      Module::global_iterator &GVI,
   1766                                      DataLayout *TD, TargetLibraryInfo *TLI) {
   1767   // Ignore no-op GEPs and bitcasts.
   1768   StoredOnceVal = StoredOnceVal->stripPointerCasts();
   1769 
   1770   // If we are dealing with a pointer global that is initialized to null and
   1771   // only has one (non-null) value stored into it, then we can optimize any
   1772   // users of the loaded value (often calls and loads) that would trap if the
   1773   // value was null.
   1774   if (GV->getInitializer()->getType()->isPointerTy() &&
   1775       GV->getInitializer()->isNullValue()) {
   1776     if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
   1777       if (GV->getInitializer()->getType() != SOVC->getType())
   1778         SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
   1779 
   1780       // Optimize away any trapping uses of the loaded value.
   1781       if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, TD, TLI))
   1782         return true;
   1783     } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) {
   1784       Type *MallocType = getMallocAllocatedType(CI, TLI);
   1785       if (MallocType &&
   1786           TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, GVI,
   1787                                              TD, TLI))
   1788         return true;
   1789     }
   1790   }
   1791 
   1792   return false;
   1793 }
   1794 
   1795 /// TryToShrinkGlobalToBoolean - At this point, we have learned that the only
   1796 /// two values ever stored into GV are its initializer and OtherVal.  See if we
   1797 /// can shrink the global into a boolean and select between the two values
   1798 /// whenever it is used.  This exposes the values to other scalar optimizations.
   1799 static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
   1800   Type *GVElType = GV->getType()->getElementType();
   1801 
   1802   // If GVElType is already i1, it is already shrunk.  If the type of the GV is
   1803   // an FP value, pointer or vector, don't do this optimization because a select
   1804   // between them is very expensive and unlikely to lead to later
   1805   // simplification.  In these cases, we typically end up with "cond ? v1 : v2"
   1806   // where v1 and v2 both require constant pool loads, a big loss.
   1807   if (GVElType == Type::getInt1Ty(GV->getContext()) ||
   1808       GVElType->isFloatingPointTy() ||
   1809       GVElType->isPointerTy() || GVElType->isVectorTy())
   1810     return false;
   1811 
   1812   // Walk the use list of the global seeing if all the uses are load or store.
   1813   // If there is anything else, bail out.
   1814   for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){
   1815     User *U = *I;
   1816     if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
   1817       return false;
   1818   }
   1819 
   1820   DEBUG(dbgs() << "   *** SHRINKING TO BOOL: " << *GV);
   1821 
   1822   // Create the new global, initializing it to false.
   1823   GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
   1824                                              false,
   1825                                              GlobalValue::InternalLinkage,
   1826                                         ConstantInt::getFalse(GV->getContext()),
   1827                                              GV->getName()+".b",
   1828                                              GV->getThreadLocalMode(),
   1829                                              GV->getType()->getAddressSpace());
   1830   GV->getParent()->getGlobalList().insert(GV, NewGV);
   1831 
   1832   Constant *InitVal = GV->getInitializer();
   1833   assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
   1834          "No reason to shrink to bool!");
   1835 
   1836   // If initialized to zero and storing one into the global, we can use a cast
   1837   // instead of a select to synthesize the desired value.
   1838   bool IsOneZero = false;
   1839   if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal))
   1840     IsOneZero = InitVal->isNullValue() && CI->isOne();
   1841 
   1842   while (!GV->use_empty()) {
   1843     Instruction *UI = cast<Instruction>(GV->use_back());
   1844     if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
   1845       // Change the store into a boolean store.
   1846       bool StoringOther = SI->getOperand(0) == OtherVal;
   1847       // Only do this if we weren't storing a loaded value.
   1848       Value *StoreVal;
   1849       if (StoringOther || SI->getOperand(0) == InitVal) {
   1850         StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
   1851                                     StoringOther);
   1852       } else {
   1853         // Otherwise, we are storing a previously loaded copy.  To do this,
   1854         // change the copy from copying the original value to just copying the
   1855         // bool.
   1856         Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
   1857 
   1858         // If we've already replaced the input, StoredVal will be a cast or
   1859         // select instruction.  If not, it will be a load of the original
   1860         // global.
   1861         if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
   1862           assert(LI->getOperand(0) == GV && "Not a copy!");
   1863           // Insert a new load, to preserve the saved value.
   1864           StoreVal = new LoadInst(NewGV, LI->getName()+".b", false, 0,
   1865                                   LI->getOrdering(), LI->getSynchScope(), LI);
   1866         } else {
   1867           assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
   1868                  "This is not a form that we understand!");
   1869           StoreVal = StoredVal->getOperand(0);
   1870           assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
   1871         }
   1872       }
   1873       new StoreInst(StoreVal, NewGV, false, 0,
   1874                     SI->getOrdering(), SI->getSynchScope(), SI);
   1875     } else {
   1876       // Change the load into a load of bool then a select.
   1877       LoadInst *LI = cast<LoadInst>(UI);
   1878       LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0,
   1879                                    LI->getOrdering(), LI->getSynchScope(), LI);
   1880       Value *NSI;
   1881       if (IsOneZero)
   1882         NSI = new ZExtInst(NLI, LI->getType(), "", LI);
   1883       else
   1884         NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
   1885       NSI->takeName(LI);
   1886       LI->replaceAllUsesWith(NSI);
   1887     }
   1888     UI->eraseFromParent();
   1889   }
   1890 
   1891   // Retain the name of the old global variable. People who are debugging their
   1892   // programs may expect these variables to be named the same.
   1893   NewGV->takeName(GV);
   1894   GV->eraseFromParent();
   1895   return true;
   1896 }
   1897 
   1898 
   1899 /// ProcessGlobal - Analyze the specified global variable and optimize it if
   1900 /// possible.  If we make a change, return true.
   1901 bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
   1902                               Module::global_iterator &GVI) {
   1903   if (!GV->isDiscardableIfUnused())
   1904     return false;
   1905 
   1906   // Do more involved optimizations if the global is internal.
   1907   GV->removeDeadConstantUsers();
   1908 
   1909   if (GV->use_empty()) {
   1910     DEBUG(dbgs() << "GLOBAL DEAD: " << *GV);
   1911     GV->eraseFromParent();
   1912     ++NumDeleted;
   1913     return true;
   1914   }
   1915 
   1916   if (!GV->hasLocalLinkage())
   1917     return false;
   1918 
   1919   SmallPtrSet<const PHINode*, 16> PHIUsers;
   1920   GlobalStatus GS;
   1921 
   1922   if (AnalyzeGlobal(GV, GS, PHIUsers))
   1923     return false;
   1924 
   1925   if (!GS.isCompared && !GV->hasUnnamedAddr()) {
   1926     GV->setUnnamedAddr(true);
   1927     NumUnnamed++;
   1928   }
   1929 
   1930   if (GV->isConstant() || !GV->hasInitializer())
   1931     return false;
   1932 
   1933   return ProcessInternalGlobal(GV, GVI, PHIUsers, GS);
   1934 }
   1935 
   1936 /// ProcessInternalGlobal - Analyze the specified global variable and optimize
   1937 /// it if possible.  If we make a change, return true.
   1938 bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
   1939                                       Module::global_iterator &GVI,
   1940                                 const SmallPtrSet<const PHINode*, 16> &PHIUsers,
   1941                                       const GlobalStatus &GS) {
   1942   // If this is a first class global and has only one accessing function
   1943   // and this function is main (which we know is not recursive we can make
   1944   // this global a local variable) we replace the global with a local alloca
   1945   // in this function.
   1946   //
   1947   // NOTE: It doesn't make sense to promote non single-value types since we
   1948   // are just replacing static memory to stack memory.
   1949   //
   1950   // If the global is in different address space, don't bring it to stack.
   1951   if (!GS.HasMultipleAccessingFunctions &&
   1952       GS.AccessingFunction && !GS.HasNonInstructionUser &&
   1953       GV->getType()->getElementType()->isSingleValueType() &&
   1954       GS.AccessingFunction->getName() == "main" &&
   1955       GS.AccessingFunction->hasExternalLinkage() &&
   1956       GV->getType()->getAddressSpace() == 0) {
   1957     DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
   1958     Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
   1959                                                    ->getEntryBlock().begin());
   1960     Type *ElemTy = GV->getType()->getElementType();
   1961     // FIXME: Pass Global's alignment when globals have alignment
   1962     AllocaInst *Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
   1963     if (!isa<UndefValue>(GV->getInitializer()))
   1964       new StoreInst(GV->getInitializer(), Alloca, &FirstI);
   1965 
   1966     GV->replaceAllUsesWith(Alloca);
   1967     GV->eraseFromParent();
   1968     ++NumLocalized;
   1969     return true;
   1970   }
   1971 
   1972   // If the global is never loaded (but may be stored to), it is dead.
   1973   // Delete it now.
   1974   if (!GS.isLoaded) {
   1975     DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
   1976 
   1977     bool Changed;
   1978     if (isLeakCheckerRoot(GV)) {
   1979       // Delete any constant stores to the global.
   1980       Changed = CleanupPointerRootUsers(GV, TLI);
   1981     } else {
   1982       // Delete any stores we can find to the global.  We may not be able to
   1983       // make it completely dead though.
   1984       Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI);
   1985     }
   1986 
   1987     // If the global is dead now, delete it.
   1988     if (GV->use_empty()) {
   1989       GV->eraseFromParent();
   1990       ++NumDeleted;
   1991       Changed = true;
   1992     }
   1993     return Changed;
   1994 
   1995   } else if (GS.StoredType <= GlobalStatus::isInitializerStored) {
   1996     DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
   1997     GV->setConstant(true);
   1998 
   1999     // Clean up any obviously simplifiable users now.
   2000     CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI);
   2001 
   2002     // If the global is dead now, just nuke it.
   2003     if (GV->use_empty()) {
   2004       DEBUG(dbgs() << "   *** Marking constant allowed us to simplify "
   2005             << "all users and delete global!\n");
   2006       GV->eraseFromParent();
   2007       ++NumDeleted;
   2008     }
   2009 
   2010     ++NumMarked;
   2011     return true;
   2012   } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
   2013     if (DataLayout *TD = getAnalysisIfAvailable<DataLayout>())
   2014       if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) {
   2015         GVI = FirstNewGV;  // Don't skip the newly produced globals!
   2016         return true;
   2017       }
   2018   } else if (GS.StoredType == GlobalStatus::isStoredOnce) {
   2019     // If the initial value for the global was an undef value, and if only
   2020     // one other value was stored into it, we can just change the
   2021     // initializer to be the stored value, then delete all stores to the
   2022     // global.  This allows us to mark it constant.
   2023     if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
   2024       if (isa<UndefValue>(GV->getInitializer())) {
   2025         // Change the initial value here.
   2026         GV->setInitializer(SOVConstant);
   2027 
   2028         // Clean up any obviously simplifiable users now.
   2029         CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI);
   2030 
   2031         if (GV->use_empty()) {
   2032           DEBUG(dbgs() << "   *** Substituting initializer allowed us to "
   2033                        << "simplify all users and delete global!\n");
   2034           GV->eraseFromParent();
   2035           ++NumDeleted;
   2036         } else {
   2037           GVI = GV;
   2038         }
   2039         ++NumSubstitute;
   2040         return true;
   2041       }
   2042 
   2043     // Try to optimize globals based on the knowledge that only one value
   2044     // (besides its initializer) is ever stored to the global.
   2045     if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, GVI,
   2046                                  TD, TLI))
   2047       return true;
   2048 
   2049     // Otherwise, if the global was not a boolean, we can shrink it to be a
   2050     // boolean.
   2051     if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
   2052       if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
   2053         ++NumShrunkToBool;
   2054         return true;
   2055       }
   2056   }
   2057 
   2058   return false;
   2059 }
   2060 
   2061 /// ChangeCalleesToFastCall - Walk all of the direct calls of the specified
   2062 /// function, changing them to FastCC.
   2063 static void ChangeCalleesToFastCall(Function *F) {
   2064   for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
   2065     if (isa<BlockAddress>(*UI))
   2066       continue;
   2067     CallSite User(cast<Instruction>(*UI));
   2068     User.setCallingConv(CallingConv::Fast);
   2069   }
   2070 }
   2071 
   2072 static AttributeSet StripNest(LLVMContext &C, const AttributeSet &Attrs) {
   2073   for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
   2074     unsigned Index = Attrs.getSlotIndex(i);
   2075     if (!Attrs.getSlotAttributes(i).hasAttribute(Index, Attribute::Nest))
   2076       continue;
   2077 
   2078     // There can be only one.
   2079     return Attrs.removeAttribute(C, Index, Attribute::Nest);
   2080   }
   2081 
   2082   return Attrs;
   2083 }
   2084 
   2085 static void RemoveNestAttribute(Function *F) {
   2086   F->setAttributes(StripNest(F->getContext(), F->getAttributes()));
   2087   for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
   2088     if (isa<BlockAddress>(*UI))
   2089       continue;
   2090     CallSite User(cast<Instruction>(*UI));
   2091     User.setAttributes(StripNest(F->getContext(), User.getAttributes()));
   2092   }
   2093 }
   2094 
   2095 bool GlobalOpt::OptimizeFunctions(Module &M) {
   2096   bool Changed = false;
   2097   // Optimize functions.
   2098   for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
   2099     Function *F = FI++;
   2100     // Functions without names cannot be referenced outside this module.
   2101     if (!F->hasName() && !F->isDeclaration())
   2102       F->setLinkage(GlobalValue::InternalLinkage);
   2103     F->removeDeadConstantUsers();
   2104     if (F->isDefTriviallyDead()) {
   2105       F->eraseFromParent();
   2106       Changed = true;
   2107       ++NumFnDeleted;
   2108     } else if (F->hasLocalLinkage()) {
   2109       if (F->getCallingConv() == CallingConv::C && !F->isVarArg() &&
   2110           !F->hasAddressTaken()) {
   2111         // If this function has C calling conventions, is not a varargs
   2112         // function, and is only called directly, promote it to use the Fast
   2113         // calling convention.
   2114         F->setCallingConv(CallingConv::Fast);
   2115         ChangeCalleesToFastCall(F);
   2116         ++NumFastCallFns;
   2117         Changed = true;
   2118       }
   2119 
   2120       if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
   2121           !F->hasAddressTaken()) {
   2122         // The function is not used by a trampoline intrinsic, so it is safe
   2123         // to remove the 'nest' attribute.
   2124         RemoveNestAttribute(F);
   2125         ++NumNestRemoved;
   2126         Changed = true;
   2127       }
   2128     }
   2129   }
   2130   return Changed;
   2131 }
   2132 
   2133 bool GlobalOpt::OptimizeGlobalVars(Module &M) {
   2134   bool Changed = false;
   2135   for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
   2136        GVI != E; ) {
   2137     GlobalVariable *GV = GVI++;
   2138     // Global variables without names cannot be referenced outside this module.
   2139     if (!GV->hasName() && !GV->isDeclaration())
   2140       GV->setLinkage(GlobalValue::InternalLinkage);
   2141     // Simplify the initializer.
   2142     if (GV->hasInitializer())
   2143       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
   2144         Constant *New = ConstantFoldConstantExpression(CE, TD, TLI);
   2145         if (New && New != CE)
   2146           GV->setInitializer(New);
   2147       }
   2148 
   2149     Changed |= ProcessGlobal(GV, GVI);
   2150   }
   2151   return Changed;
   2152 }
   2153 
   2154 /// FindGlobalCtors - Find the llvm.global_ctors list, verifying that all
   2155 /// initializers have an init priority of 65535.
   2156 GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
   2157   GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
   2158   if (GV == 0) return 0;
   2159 
   2160   // Verify that the initializer is simple enough for us to handle. We are
   2161   // only allowed to optimize the initializer if it is unique.
   2162   if (!GV->hasUniqueInitializer()) return 0;
   2163 
   2164   if (isa<ConstantAggregateZero>(GV->getInitializer()))
   2165     return GV;
   2166   ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
   2167 
   2168   for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
   2169     if (isa<ConstantAggregateZero>(*i))
   2170       continue;
   2171     ConstantStruct *CS = cast<ConstantStruct>(*i);
   2172     if (isa<ConstantPointerNull>(CS->getOperand(1)))
   2173       continue;
   2174 
   2175     // Must have a function or null ptr.
   2176     if (!isa<Function>(CS->getOperand(1)))
   2177       return 0;
   2178 
   2179     // Init priority must be standard.
   2180     ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0));
   2181     if (CI->getZExtValue() != 65535)
   2182       return 0;
   2183   }
   2184 
   2185   return GV;
   2186 }
   2187 
   2188 /// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand,
   2189 /// return a list of the functions and null terminator as a vector.
   2190 static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
   2191   if (GV->getInitializer()->isNullValue())
   2192     return std::vector<Function*>();
   2193   ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
   2194   std::vector<Function*> Result;
   2195   Result.reserve(CA->getNumOperands());
   2196   for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
   2197     ConstantStruct *CS = cast<ConstantStruct>(*i);
   2198     Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
   2199   }
   2200   return Result;
   2201 }
   2202 
   2203 /// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the
   2204 /// specified array, returning the new global to use.
   2205 static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
   2206                                           const std::vector<Function*> &Ctors) {
   2207   // If we made a change, reassemble the initializer list.
   2208   Constant *CSVals[2];
   2209   CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535);
   2210   CSVals[1] = 0;
   2211 
   2212   StructType *StructTy =
   2213     cast <StructType>(
   2214     cast<ArrayType>(GCL->getType()->getElementType())->getElementType());
   2215 
   2216   // Create the new init list.
   2217   std::vector<Constant*> CAList;
   2218   for (unsigned i = 0, e = Ctors.size(); i != e; ++i) {
   2219     if (Ctors[i]) {
   2220       CSVals[1] = Ctors[i];
   2221     } else {
   2222       Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()),
   2223                                           false);
   2224       PointerType *PFTy = PointerType::getUnqual(FTy);
   2225       CSVals[1] = Constant::getNullValue(PFTy);
   2226       CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()),
   2227                                    0x7fffffff);
   2228     }
   2229     CAList.push_back(ConstantStruct::get(StructTy, CSVals));
   2230   }
   2231 
   2232   // Create the array initializer.
   2233   Constant *CA = ConstantArray::get(ArrayType::get(StructTy,
   2234                                                    CAList.size()), CAList);
   2235 
   2236   // If we didn't change the number of elements, don't create a new GV.
   2237   if (CA->getType() == GCL->getInitializer()->getType()) {
   2238     GCL->setInitializer(CA);
   2239     return GCL;
   2240   }
   2241 
   2242   // Create the new global and insert it next to the existing list.
   2243   GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(),
   2244                                            GCL->getLinkage(), CA, "",
   2245                                            GCL->getThreadLocalMode());
   2246   GCL->getParent()->getGlobalList().insert(GCL, NGV);
   2247   NGV->takeName(GCL);
   2248 
   2249   // Nuke the old list, replacing any uses with the new one.
   2250   if (!GCL->use_empty()) {
   2251     Constant *V = NGV;
   2252     if (V->getType() != GCL->getType())
   2253       V = ConstantExpr::getBitCast(V, GCL->getType());
   2254     GCL->replaceAllUsesWith(V);
   2255   }
   2256   GCL->eraseFromParent();
   2257 
   2258   if (Ctors.size())
   2259     return NGV;
   2260   else
   2261     return 0;
   2262 }
   2263 
   2264 
   2265 static inline bool
   2266 isSimpleEnoughValueToCommit(Constant *C,
   2267                             SmallPtrSet<Constant*, 8> &SimpleConstants,
   2268                             const DataLayout *TD);
   2269 
   2270 
   2271 /// isSimpleEnoughValueToCommit - Return true if the specified constant can be
   2272 /// handled by the code generator.  We don't want to generate something like:
   2273 ///   void *X = &X/42;
   2274 /// because the code generator doesn't have a relocation that can handle that.
   2275 ///
   2276 /// This function should be called if C was not found (but just got inserted)
   2277 /// in SimpleConstants to avoid having to rescan the same constants all the
   2278 /// time.
   2279 static bool isSimpleEnoughValueToCommitHelper(Constant *C,
   2280                                    SmallPtrSet<Constant*, 8> &SimpleConstants,
   2281                                    const DataLayout *TD) {
   2282   // Simple integer, undef, constant aggregate zero, global addresses, etc are
   2283   // all supported.
   2284   if (C->getNumOperands() == 0 || isa<BlockAddress>(C) ||
   2285       isa<GlobalValue>(C))
   2286     return true;
   2287 
   2288   // Aggregate values are safe if all their elements are.
   2289   if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
   2290       isa<ConstantVector>(C)) {
   2291     for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
   2292       Constant *Op = cast<Constant>(C->getOperand(i));
   2293       if (!isSimpleEnoughValueToCommit(Op, SimpleConstants, TD))
   2294         return false;
   2295     }
   2296     return true;
   2297   }
   2298 
   2299   // We don't know exactly what relocations are allowed in constant expressions,
   2300   // so we allow &global+constantoffset, which is safe and uniformly supported
   2301   // across targets.
   2302   ConstantExpr *CE = cast<ConstantExpr>(C);
   2303   switch (CE->getOpcode()) {
   2304   case Instruction::BitCast:
   2305     // Bitcast is fine if the casted value is fine.
   2306     return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
   2307 
   2308   case Instruction::IntToPtr:
   2309   case Instruction::PtrToInt:
   2310     // int <=> ptr is fine if the int type is the same size as the
   2311     // pointer type.
   2312     if (!TD || TD->getTypeSizeInBits(CE->getType()) !=
   2313                TD->getTypeSizeInBits(CE->getOperand(0)->getType()))
   2314       return false;
   2315     return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
   2316 
   2317   // GEP is fine if it is simple + constant offset.
   2318   case Instruction::GetElementPtr:
   2319     for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
   2320       if (!isa<ConstantInt>(CE->getOperand(i)))
   2321         return false;
   2322     return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
   2323 
   2324   case Instruction::Add:
   2325     // We allow simple+cst.
   2326     if (!isa<ConstantInt>(CE->getOperand(1)))
   2327       return false;
   2328     return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
   2329   }
   2330   return false;
   2331 }
   2332 
   2333 static inline bool
   2334 isSimpleEnoughValueToCommit(Constant *C,
   2335                             SmallPtrSet<Constant*, 8> &SimpleConstants,
   2336                             const DataLayout *TD) {
   2337   // If we already checked this constant, we win.
   2338   if (!SimpleConstants.insert(C)) return true;
   2339   // Check the constant.
   2340   return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, TD);
   2341 }
   2342 
   2343 
   2344 /// isSimpleEnoughPointerToCommit - Return true if this constant is simple
   2345 /// enough for us to understand.  In particular, if it is a cast to anything
   2346 /// other than from one pointer type to another pointer type, we punt.
   2347 /// We basically just support direct accesses to globals and GEP's of
   2348 /// globals.  This should be kept up to date with CommitValueTo.
   2349 static bool isSimpleEnoughPointerToCommit(Constant *C) {
   2350   // Conservatively, avoid aggregate types. This is because we don't
   2351   // want to worry about them partially overlapping other stores.
   2352   if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
   2353     return false;
   2354 
   2355   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
   2356     // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
   2357     // external globals.
   2358     return GV->hasUniqueInitializer();
   2359 
   2360   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
   2361     // Handle a constantexpr gep.
   2362     if (CE->getOpcode() == Instruction::GetElementPtr &&
   2363         isa<GlobalVariable>(CE->getOperand(0)) &&
   2364         cast<GEPOperator>(CE)->isInBounds()) {
   2365       GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
   2366       // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
   2367       // external globals.
   2368       if (!GV->hasUniqueInitializer())
   2369         return false;
   2370 
   2371       // The first index must be zero.
   2372       ConstantInt *CI = dyn_cast<ConstantInt>(*llvm::next(CE->op_begin()));
   2373       if (!CI || !CI->isZero()) return false;
   2374 
   2375       // The remaining indices must be compile-time known integers within the
   2376       // notional bounds of the corresponding static array types.
   2377       if (!CE->isGEPWithNoNotionalOverIndexing())
   2378         return false;
   2379 
   2380       return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
   2381 
   2382     // A constantexpr bitcast from a pointer to another pointer is a no-op,
   2383     // and we know how to evaluate it by moving the bitcast from the pointer
   2384     // operand to the value operand.
   2385     } else if (CE->getOpcode() == Instruction::BitCast &&
   2386                isa<GlobalVariable>(CE->getOperand(0))) {
   2387       // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
   2388       // external globals.
   2389       return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer();
   2390     }
   2391   }
   2392 
   2393   return false;
   2394 }
   2395 
   2396 /// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global
   2397 /// initializer.  This returns 'Init' modified to reflect 'Val' stored into it.
   2398 /// At this point, the GEP operands of Addr [0, OpNo) have been stepped into.
   2399 static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
   2400                                    ConstantExpr *Addr, unsigned OpNo) {
   2401   // Base case of the recursion.
   2402   if (OpNo == Addr->getNumOperands()) {
   2403     assert(Val->getType() == Init->getType() && "Type mismatch!");
   2404     return Val;
   2405   }
   2406 
   2407   SmallVector<Constant*, 32> Elts;
   2408   if (StructType *STy = dyn_cast<StructType>(Init->getType())) {
   2409     // Break up the constant into its elements.
   2410     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
   2411       Elts.push_back(Init->getAggregateElement(i));
   2412 
   2413     // Replace the element that we are supposed to.
   2414     ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
   2415     unsigned Idx = CU->getZExtValue();
   2416     assert(Idx < STy->getNumElements() && "Struct index out of range!");
   2417     Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
   2418 
   2419     // Return the modified struct.
   2420     return ConstantStruct::get(STy, Elts);
   2421   }
   2422 
   2423   ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
   2424   SequentialType *InitTy = cast<SequentialType>(Init->getType());
   2425 
   2426   uint64_t NumElts;
   2427   if (ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
   2428     NumElts = ATy->getNumElements();
   2429   else
   2430     NumElts = InitTy->getVectorNumElements();
   2431 
   2432   // Break up the array into elements.
   2433   for (uint64_t i = 0, e = NumElts; i != e; ++i)
   2434     Elts.push_back(Init->getAggregateElement(i));
   2435 
   2436   assert(CI->getZExtValue() < NumElts);
   2437   Elts[CI->getZExtValue()] =
   2438     EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
   2439 
   2440   if (Init->getType()->isArrayTy())
   2441     return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
   2442   return ConstantVector::get(Elts);
   2443 }
   2444 
   2445 /// CommitValueTo - We have decided that Addr (which satisfies the predicate
   2446 /// isSimpleEnoughPointerToCommit) should get Val as its value.  Make it happen.
   2447 static void CommitValueTo(Constant *Val, Constant *Addr) {
   2448   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
   2449     assert(GV->hasInitializer());
   2450     GV->setInitializer(Val);
   2451     return;
   2452   }
   2453 
   2454   ConstantExpr *CE = cast<ConstantExpr>(Addr);
   2455   GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
   2456   GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2));
   2457 }
   2458 
   2459 namespace {
   2460 
   2461 /// Evaluator - This class evaluates LLVM IR, producing the Constant
   2462 /// representing each SSA instruction.  Changes to global variables are stored
   2463 /// in a mapping that can be iterated over after the evaluation is complete.
   2464 /// Once an evaluation call fails, the evaluation object should not be reused.
   2465 class Evaluator {
   2466 public:
   2467   Evaluator(const DataLayout *TD, const TargetLibraryInfo *TLI)
   2468     : TD(TD), TLI(TLI) {
   2469     ValueStack.push_back(new DenseMap<Value*, Constant*>);
   2470   }
   2471 
   2472   ~Evaluator() {
   2473     DeleteContainerPointers(ValueStack);
   2474     while (!AllocaTmps.empty()) {
   2475       GlobalVariable *Tmp = AllocaTmps.back();
   2476       AllocaTmps.pop_back();
   2477 
   2478       // If there are still users of the alloca, the program is doing something
   2479       // silly, e.g. storing the address of the alloca somewhere and using it
   2480       // later.  Since this is undefined, we'll just make it be null.
   2481       if (!Tmp->use_empty())
   2482         Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
   2483       delete Tmp;
   2484     }
   2485   }
   2486 
   2487   /// EvaluateFunction - Evaluate a call to function F, returning true if
   2488   /// successful, false if we can't evaluate it.  ActualArgs contains the formal
   2489   /// arguments for the function.
   2490   bool EvaluateFunction(Function *F, Constant *&RetVal,
   2491                         const SmallVectorImpl<Constant*> &ActualArgs);
   2492 
   2493   /// EvaluateBlock - Evaluate all instructions in block BB, returning true if
   2494   /// successful, false if we can't evaluate it.  NewBB returns the next BB that
   2495   /// control flows into, or null upon return.
   2496   bool EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB);
   2497 
   2498   Constant *getVal(Value *V) {
   2499     if (Constant *CV = dyn_cast<Constant>(V)) return CV;
   2500     Constant *R = ValueStack.back()->lookup(V);
   2501     assert(R && "Reference to an uncomputed value!");
   2502     return R;
   2503   }
   2504 
   2505   void setVal(Value *V, Constant *C) {
   2506     ValueStack.back()->operator[](V) = C;
   2507   }
   2508 
   2509   const DenseMap<Constant*, Constant*> &getMutatedMemory() const {
   2510     return MutatedMemory;
   2511   }
   2512 
   2513   const SmallPtrSet<GlobalVariable*, 8> &getInvariants() const {
   2514     return Invariants;
   2515   }
   2516 
   2517 private:
   2518   Constant *ComputeLoadResult(Constant *P);
   2519 
   2520   /// ValueStack - As we compute SSA register values, we store their contents
   2521   /// here. The back of the vector contains the current function and the stack
   2522   /// contains the values in the calling frames.
   2523   SmallVector<DenseMap<Value*, Constant*>*, 4> ValueStack;
   2524 
   2525   /// CallStack - This is used to detect recursion.  In pathological situations
   2526   /// we could hit exponential behavior, but at least there is nothing
   2527   /// unbounded.
   2528   SmallVector<Function*, 4> CallStack;
   2529 
   2530   /// MutatedMemory - For each store we execute, we update this map.  Loads
   2531   /// check this to get the most up-to-date value.  If evaluation is successful,
   2532   /// this state is committed to the process.
   2533   DenseMap<Constant*, Constant*> MutatedMemory;
   2534 
   2535   /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
   2536   /// to represent its body.  This vector is needed so we can delete the
   2537   /// temporary globals when we are done.
   2538   SmallVector<GlobalVariable*, 32> AllocaTmps;
   2539 
   2540   /// Invariants - These global variables have been marked invariant by the
   2541   /// static constructor.
   2542   SmallPtrSet<GlobalVariable*, 8> Invariants;
   2543 
   2544   /// SimpleConstants - These are constants we have checked and know to be
   2545   /// simple enough to live in a static initializer of a global.
   2546   SmallPtrSet<Constant*, 8> SimpleConstants;
   2547 
   2548   const DataLayout *TD;
   2549   const TargetLibraryInfo *TLI;
   2550 };
   2551 
   2552 }  // anonymous namespace
   2553 
   2554 /// ComputeLoadResult - Return the value that would be computed by a load from
   2555 /// P after the stores reflected by 'memory' have been performed.  If we can't
   2556 /// decide, return null.
   2557 Constant *Evaluator::ComputeLoadResult(Constant *P) {
   2558   // If this memory location has been recently stored, use the stored value: it
   2559   // is the most up-to-date.
   2560   DenseMap<Constant*, Constant*>::const_iterator I = MutatedMemory.find(P);
   2561   if (I != MutatedMemory.end()) return I->second;
   2562 
   2563   // Access it.
   2564   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
   2565     if (GV->hasDefinitiveInitializer())
   2566       return GV->getInitializer();
   2567     return 0;
   2568   }
   2569 
   2570   // Handle a constantexpr getelementptr.
   2571   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P))
   2572     if (CE->getOpcode() == Instruction::GetElementPtr &&
   2573         isa<GlobalVariable>(CE->getOperand(0))) {
   2574       GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
   2575       if (GV->hasDefinitiveInitializer())
   2576         return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
   2577     }
   2578 
   2579   return 0;  // don't know how to evaluate.
   2580 }
   2581 
   2582 /// EvaluateBlock - Evaluate all instructions in block BB, returning true if
   2583 /// successful, false if we can't evaluate it.  NewBB returns the next BB that
   2584 /// control flows into, or null upon return.
   2585 bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
   2586                               BasicBlock *&NextBB) {
   2587   // This is the main evaluation loop.
   2588   while (1) {
   2589     Constant *InstResult = 0;
   2590 
   2591     DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n");
   2592 
   2593     if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
   2594       if (!SI->isSimple()) {
   2595         DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n");
   2596         return false;  // no volatile/atomic accesses.
   2597       }
   2598       Constant *Ptr = getVal(SI->getOperand(1));
   2599       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
   2600         DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr);
   2601         Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
   2602         DEBUG(dbgs() << "; To: " << *Ptr << "\n");
   2603       }
   2604       if (!isSimpleEnoughPointerToCommit(Ptr)) {
   2605         // If this is too complex for us to commit, reject it.
   2606         DEBUG(dbgs() << "Pointer is too complex for us to evaluate store.");
   2607         return false;
   2608       }
   2609 
   2610       Constant *Val = getVal(SI->getOperand(0));
   2611 
   2612       // If this might be too difficult for the backend to handle (e.g. the addr
   2613       // of one global variable divided by another) then we can't commit it.
   2614       if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, TD)) {
   2615         DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val
   2616               << "\n");
   2617         return false;
   2618       }
   2619 
   2620       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
   2621         if (CE->getOpcode() == Instruction::BitCast) {
   2622           DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n");
   2623           // If we're evaluating a store through a bitcast, then we need
   2624           // to pull the bitcast off the pointer type and push it onto the
   2625           // stored value.
   2626           Ptr = CE->getOperand(0);
   2627 
   2628           Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType();
   2629 
   2630           // In order to push the bitcast onto the stored value, a bitcast
   2631           // from NewTy to Val's type must be legal.  If it's not, we can try
   2632           // introspecting NewTy to find a legal conversion.
   2633           while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) {
   2634             // If NewTy is a struct, we can convert the pointer to the struct
   2635             // into a pointer to its first member.
   2636             // FIXME: This could be extended to support arrays as well.
   2637             if (StructType *STy = dyn_cast<StructType>(NewTy)) {
   2638               NewTy = STy->getTypeAtIndex(0U);
   2639 
   2640               IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32);
   2641               Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
   2642               Constant * const IdxList[] = {IdxZero, IdxZero};
   2643 
   2644               Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList);
   2645               if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
   2646                 Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
   2647 
   2648             // If we can't improve the situation by introspecting NewTy,
   2649             // we have to give up.
   2650             } else {
   2651               DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
   2652                     "evaluate.\n");
   2653               return false;
   2654             }
   2655           }
   2656 
   2657           // If we found compatible types, go ahead and push the bitcast
   2658           // onto the stored value.
   2659           Val = ConstantExpr::getBitCast(Val, NewTy);
   2660 
   2661           DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n");
   2662         }
   2663       }
   2664 
   2665       MutatedMemory[Ptr] = Val;
   2666     } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
   2667       InstResult = ConstantExpr::get(BO->getOpcode(),
   2668                                      getVal(BO->getOperand(0)),
   2669                                      getVal(BO->getOperand(1)));
   2670       DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult
   2671             << "\n");
   2672     } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
   2673       InstResult = ConstantExpr::getCompare(CI->getPredicate(),
   2674                                             getVal(CI->getOperand(0)),
   2675                                             getVal(CI->getOperand(1)));
   2676       DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult
   2677             << "\n");
   2678     } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
   2679       InstResult = ConstantExpr::getCast(CI->getOpcode(),
   2680                                          getVal(CI->getOperand(0)),
   2681                                          CI->getType());
   2682       DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult
   2683             << "\n");
   2684     } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
   2685       InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)),
   2686                                            getVal(SI->getOperand(1)),
   2687                                            getVal(SI->getOperand(2)));
   2688       DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult
   2689             << "\n");
   2690     } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
   2691       Constant *P = getVal(GEP->getOperand(0));
   2692       SmallVector<Constant*, 8> GEPOps;
   2693       for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
   2694            i != e; ++i)
   2695         GEPOps.push_back(getVal(*i));
   2696       InstResult =
   2697         ConstantExpr::getGetElementPtr(P, GEPOps,
   2698                                        cast<GEPOperator>(GEP)->isInBounds());
   2699       DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult
   2700             << "\n");
   2701     } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
   2702 
   2703       if (!LI->isSimple()) {
   2704         DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n");
   2705         return false;  // no volatile/atomic accesses.
   2706       }
   2707 
   2708       Constant *Ptr = getVal(LI->getOperand(0));
   2709       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
   2710         Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
   2711         DEBUG(dbgs() << "Found a constant pointer expression, constant "
   2712               "folding: " << *Ptr << "\n");
   2713       }
   2714       InstResult = ComputeLoadResult(Ptr);
   2715       if (InstResult == 0) {
   2716         DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load."
   2717               "\n");
   2718         return false; // Could not evaluate load.
   2719       }
   2720 
   2721       DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n");
   2722     } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
   2723       if (AI->isArrayAllocation()) {
   2724         DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n");
   2725         return false;  // Cannot handle array allocs.
   2726       }
   2727       Type *Ty = AI->getType()->getElementType();
   2728       AllocaTmps.push_back(new GlobalVariable(Ty, false,
   2729                                               GlobalValue::InternalLinkage,
   2730                                               UndefValue::get(Ty),
   2731                                               AI->getName()));
   2732       InstResult = AllocaTmps.back();
   2733       DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
   2734     } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
   2735       CallSite CS(CurInst);
   2736 
   2737       // Debug info can safely be ignored here.
   2738       if (isa<DbgInfoIntrinsic>(CS.getInstruction())) {
   2739         DEBUG(dbgs() << "Ignoring debug info.\n");
   2740         ++CurInst;
   2741         continue;
   2742       }
   2743 
   2744       // Cannot handle inline asm.
   2745       if (isa<InlineAsm>(CS.getCalledValue())) {
   2746         DEBUG(dbgs() << "Found inline asm, can not evaluate.\n");
   2747         return false;
   2748       }
   2749 
   2750       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
   2751         if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) {
   2752           if (MSI->isVolatile()) {
   2753             DEBUG(dbgs() << "Can not optimize a volatile memset " <<
   2754                   "intrinsic.\n");
   2755             return false;
   2756           }
   2757           Constant *Ptr = getVal(MSI->getDest());
   2758           Constant *Val = getVal(MSI->getValue());
   2759           Constant *DestVal = ComputeLoadResult(getVal(Ptr));
   2760           if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
   2761             // This memset is a no-op.
   2762             DEBUG(dbgs() << "Ignoring no-op memset.\n");
   2763             ++CurInst;
   2764             continue;
   2765           }
   2766         }
   2767 
   2768         if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
   2769             II->getIntrinsicID() == Intrinsic::lifetime_end) {
   2770           DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n");
   2771           ++CurInst;
   2772           continue;
   2773         }
   2774 
   2775         if (II->getIntrinsicID() == Intrinsic::invariant_start) {
   2776           // We don't insert an entry into Values, as it doesn't have a
   2777           // meaningful return value.
   2778           if (!II->use_empty()) {
   2779             DEBUG(dbgs() << "Found unused invariant_start. Cant evaluate.\n");
   2780             return false;
   2781           }
   2782           ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0));
   2783           Value *PtrArg = getVal(II->getArgOperand(1));
   2784           Value *Ptr = PtrArg->stripPointerCasts();
   2785           if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
   2786             Type *ElemTy = cast<PointerType>(GV->getType())->getElementType();
   2787             if (!Size->isAllOnesValue() &&
   2788                 Size->getValue().getLimitedValue() >=
   2789                 TD->getTypeStoreSize(ElemTy)) {
   2790               Invariants.insert(GV);
   2791               DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV
   2792                     << "\n");
   2793             } else {
   2794               DEBUG(dbgs() << "Found a global var, but can not treat it as an "
   2795                     "invariant.\n");
   2796             }
   2797           }
   2798           // Continue even if we do nothing.
   2799           ++CurInst;
   2800           continue;
   2801         }
   2802 
   2803         DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
   2804         return false;
   2805       }
   2806 
   2807       // Resolve function pointers.
   2808       Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue()));
   2809       if (!Callee || Callee->mayBeOverridden()) {
   2810         DEBUG(dbgs() << "Can not resolve function pointer.\n");
   2811         return false;  // Cannot resolve.
   2812       }
   2813 
   2814       SmallVector<Constant*, 8> Formals;
   2815       for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i)
   2816         Formals.push_back(getVal(*i));
   2817 
   2818       if (Callee->isDeclaration()) {
   2819         // If this is a function we can constant fold, do it.
   2820         if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) {
   2821           InstResult = C;
   2822           DEBUG(dbgs() << "Constant folded function call. Result: " <<
   2823                 *InstResult << "\n");
   2824         } else {
   2825           DEBUG(dbgs() << "Can not constant fold function call.\n");
   2826           return false;
   2827         }
   2828       } else {
   2829         if (Callee->getFunctionType()->isVarArg()) {
   2830           DEBUG(dbgs() << "Can not constant fold vararg function call.\n");
   2831           return false;
   2832         }
   2833 
   2834         Constant *RetVal = 0;
   2835         // Execute the call, if successful, use the return value.
   2836         ValueStack.push_back(new DenseMap<Value*, Constant*>);
   2837         if (!EvaluateFunction(Callee, RetVal, Formals)) {
   2838           DEBUG(dbgs() << "Failed to evaluate function.\n");
   2839           return false;
   2840         }
   2841         delete ValueStack.pop_back_val();
   2842         InstResult = RetVal;
   2843 
   2844         if (InstResult != NULL) {
   2845           DEBUG(dbgs() << "Successfully evaluated function. Result: " <<
   2846                 InstResult << "\n\n");
   2847         } else {
   2848           DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n");
   2849         }
   2850       }
   2851     } else if (isa<TerminatorInst>(CurInst)) {
   2852       DEBUG(dbgs() << "Found a terminator instruction.\n");
   2853 
   2854       if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
   2855         if (BI->isUnconditional()) {
   2856           NextBB = BI->getSuccessor(0);
   2857         } else {
   2858           ConstantInt *Cond =
   2859             dyn_cast<ConstantInt>(getVal(BI->getCondition()));
   2860           if (!Cond) return false;  // Cannot determine.
   2861 
   2862           NextBB = BI->getSuccessor(!Cond->getZExtValue());
   2863         }
   2864       } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
   2865         ConstantInt *Val =
   2866           dyn_cast<ConstantInt>(getVal(SI->getCondition()));
   2867         if (!Val) return false;  // Cannot determine.
   2868         NextBB = SI->findCaseValue(Val).getCaseSuccessor();
   2869       } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
   2870         Value *Val = getVal(IBI->getAddress())->stripPointerCasts();
   2871         if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
   2872           NextBB = BA->getBasicBlock();
   2873         else
   2874           return false;  // Cannot determine.
   2875       } else if (isa<ReturnInst>(CurInst)) {
   2876         NextBB = 0;
   2877       } else {
   2878         // invoke, unwind, resume, unreachable.
   2879         DEBUG(dbgs() << "Can not handle terminator.");
   2880         return false;  // Cannot handle this terminator.
   2881       }
   2882 
   2883       // We succeeded at evaluating this block!
   2884       DEBUG(dbgs() << "Successfully evaluated block.\n");
   2885       return true;
   2886     } else {
   2887       // Did not know how to evaluate this!
   2888       DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction."
   2889             "\n");
   2890       return false;
   2891     }
   2892 
   2893     if (!CurInst->use_empty()) {
   2894       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
   2895         InstResult = ConstantFoldConstantExpression(CE, TD, TLI);
   2896 
   2897       setVal(CurInst, InstResult);
   2898     }
   2899 
   2900     // If we just processed an invoke, we finished evaluating the block.
   2901     if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {
   2902       NextBB = II->getNormalDest();
   2903       DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n");
   2904       return true;
   2905     }
   2906 
   2907     // Advance program counter.
   2908     ++CurInst;
   2909   }
   2910 }
   2911 
   2912 /// EvaluateFunction - Evaluate a call to function F, returning true if
   2913 /// successful, false if we can't evaluate it.  ActualArgs contains the formal
   2914 /// arguments for the function.
   2915 bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
   2916                                  const SmallVectorImpl<Constant*> &ActualArgs) {
   2917   // Check to see if this function is already executing (recursion).  If so,
   2918   // bail out.  TODO: we might want to accept limited recursion.
   2919   if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
   2920     return false;
   2921 
   2922   CallStack.push_back(F);
   2923 
   2924   // Initialize arguments to the incoming values specified.
   2925   unsigned ArgNo = 0;
   2926   for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
   2927        ++AI, ++ArgNo)
   2928     setVal(AI, ActualArgs[ArgNo]);
   2929 
   2930   // ExecutedBlocks - We only handle non-looping, non-recursive code.  As such,
   2931   // we can only evaluate any one basic block at most once.  This set keeps
   2932   // track of what we have executed so we can detect recursive cases etc.
   2933   SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
   2934 
   2935   // CurBB - The current basic block we're evaluating.
   2936   BasicBlock *CurBB = F->begin();
   2937 
   2938   BasicBlock::iterator CurInst = CurBB->begin();
   2939 
   2940   while (1) {
   2941     BasicBlock *NextBB = 0; // Initialized to avoid compiler warnings.
   2942     DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n");
   2943 
   2944     if (!EvaluateBlock(CurInst, NextBB))
   2945       return false;
   2946 
   2947     if (NextBB == 0) {
   2948       // Successfully running until there's no next block means that we found
   2949       // the return.  Fill it the return value and pop the call stack.
   2950       ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator());
   2951       if (RI->getNumOperands())
   2952         RetVal = getVal(RI->getOperand(0));
   2953       CallStack.pop_back();
   2954       return true;
   2955     }
   2956 
   2957     // Okay, we succeeded in evaluating this control flow.  See if we have
   2958     // executed the new block before.  If so, we have a looping function,
   2959     // which we cannot evaluate in reasonable time.
   2960     if (!ExecutedBlocks.insert(NextBB))
   2961       return false;  // looped!
   2962 
   2963     // Okay, we have never been in this block before.  Check to see if there
   2964     // are any PHI nodes.  If so, evaluate them with information about where
   2965     // we came from.
   2966     PHINode *PN = 0;
   2967     for (CurInst = NextBB->begin();
   2968          (PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
   2969       setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB)));
   2970 
   2971     // Advance to the next block.
   2972     CurBB = NextBB;
   2973   }
   2974 }
   2975 
   2976 /// EvaluateStaticConstructor - Evaluate static constructors in the function, if
   2977 /// we can.  Return true if we can, false otherwise.
   2978 static bool EvaluateStaticConstructor(Function *F, const DataLayout *TD,
   2979                                       const TargetLibraryInfo *TLI) {
   2980   // Call the function.
   2981   Evaluator Eval(TD, TLI);
   2982   Constant *RetValDummy;
   2983   bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
   2984                                            SmallVector<Constant*, 0>());
   2985 
   2986   if (EvalSuccess) {
   2987     // We succeeded at evaluation: commit the result.
   2988     DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
   2989           << F->getName() << "' to " << Eval.getMutatedMemory().size()
   2990           << " stores.\n");
   2991     for (DenseMap<Constant*, Constant*>::const_iterator I =
   2992            Eval.getMutatedMemory().begin(), E = Eval.getMutatedMemory().end();
   2993          I != E; ++I)
   2994       CommitValueTo(I->second, I->first);
   2995     for (SmallPtrSet<GlobalVariable*, 8>::const_iterator I =
   2996            Eval.getInvariants().begin(), E = Eval.getInvariants().end();
   2997          I != E; ++I)
   2998       (*I)->setConstant(true);
   2999   }
   3000 
   3001   return EvalSuccess;
   3002 }
   3003 
   3004 /// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible.
   3005 /// Return true if anything changed.
   3006 bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
   3007   std::vector<Function*> Ctors = ParseGlobalCtors(GCL);
   3008   bool MadeChange = false;
   3009   if (Ctors.empty()) return false;
   3010 
   3011   // Loop over global ctors, optimizing them when we can.
   3012   for (unsigned i = 0; i != Ctors.size(); ++i) {
   3013     Function *F = Ctors[i];
   3014     // Found a null terminator in the middle of the list, prune off the rest of
   3015     // the list.
   3016     if (F == 0) {
   3017       if (i != Ctors.size()-1) {
   3018         Ctors.resize(i+1);
   3019         MadeChange = true;
   3020       }
   3021       break;
   3022     }
   3023     DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n");
   3024 
   3025     // We cannot simplify external ctor functions.
   3026     if (F->empty()) continue;
   3027 
   3028     // If we can evaluate the ctor at compile time, do.
   3029     if (EvaluateStaticConstructor(F, TD, TLI)) {
   3030       Ctors.erase(Ctors.begin()+i);
   3031       MadeChange = true;
   3032       --i;
   3033       ++NumCtorsEvaluated;
   3034       continue;
   3035     }
   3036   }
   3037 
   3038   if (!MadeChange) return false;
   3039 
   3040   GCL = InstallGlobalCtors(GCL, Ctors);
   3041   return true;
   3042 }
   3043 
   3044 bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
   3045   bool Changed = false;
   3046 
   3047   for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
   3048        I != E;) {
   3049     Module::alias_iterator J = I++;
   3050     // Aliases without names cannot be referenced outside this module.
   3051     if (!J->hasName() && !J->isDeclaration())
   3052       J->setLinkage(GlobalValue::InternalLinkage);
   3053     // If the aliasee may change at link time, nothing can be done - bail out.
   3054     if (J->mayBeOverridden())
   3055       continue;
   3056 
   3057     Constant *Aliasee = J->getAliasee();
   3058     GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
   3059     Target->removeDeadConstantUsers();
   3060     bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse();
   3061 
   3062     // Make all users of the alias use the aliasee instead.
   3063     if (!J->use_empty()) {
   3064       J->replaceAllUsesWith(Aliasee);
   3065       ++NumAliasesResolved;
   3066       Changed = true;
   3067     }
   3068 
   3069     // If the alias is externally visible, we may still be able to simplify it.
   3070     if (!J->hasLocalLinkage()) {
   3071       // If the aliasee has internal linkage, give it the name and linkage
   3072       // of the alias, and delete the alias.  This turns:
   3073       //   define internal ... @f(...)
   3074       //   @a = alias ... @f
   3075       // into:
   3076       //   define ... @a(...)
   3077       if (!Target->hasLocalLinkage())
   3078         continue;
   3079 
   3080       // Do not perform the transform if multiple aliases potentially target the
   3081       // aliasee. This check also ensures that it is safe to replace the section
   3082       // and other attributes of the aliasee with those of the alias.
   3083       if (!hasOneUse)
   3084         continue;
   3085 
   3086       // Give the aliasee the name, linkage and other attributes of the alias.
   3087       Target->takeName(J);
   3088       Target->setLinkage(J->getLinkage());
   3089       Target->GlobalValue::copyAttributesFrom(J);
   3090     }
   3091 
   3092     // Delete the alias.
   3093     M.getAliasList().erase(J);
   3094     ++NumAliasesRemoved;
   3095     Changed = true;
   3096   }
   3097 
   3098   return Changed;
   3099 }
   3100 
   3101 static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
   3102   if (!TLI->has(LibFunc::cxa_atexit))
   3103     return 0;
   3104 
   3105   Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit));
   3106 
   3107   if (!Fn)
   3108     return 0;
   3109 
   3110   FunctionType *FTy = Fn->getFunctionType();
   3111 
   3112   // Checking that the function has the right return type, the right number of
   3113   // parameters and that they all have pointer types should be enough.
   3114   if (!FTy->getReturnType()->isIntegerTy() ||
   3115       FTy->getNumParams() != 3 ||
   3116       !FTy->getParamType(0)->isPointerTy() ||
   3117       !FTy->getParamType(1)->isPointerTy() ||
   3118       !FTy->getParamType(2)->isPointerTy())
   3119     return 0;
   3120 
   3121   return Fn;
   3122 }
   3123 
   3124 /// cxxDtorIsEmpty - Returns whether the given function is an empty C++
   3125 /// destructor and can therefore be eliminated.
   3126 /// Note that we assume that other optimization passes have already simplified
   3127 /// the code so we only look for a function with a single basic block, where
   3128 /// the only allowed instructions are 'ret', 'call' to an empty C++ dtor and
   3129 /// other side-effect free instructions.
   3130 static bool cxxDtorIsEmpty(const Function &Fn,
   3131                            SmallPtrSet<const Function *, 8> &CalledFunctions) {
   3132   // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
   3133   // nounwind, but that doesn't seem worth doing.
   3134   if (Fn.isDeclaration())
   3135     return false;
   3136 
   3137   if (++Fn.begin() != Fn.end())
   3138     return false;
   3139 
   3140   const BasicBlock &EntryBlock = Fn.getEntryBlock();
   3141   for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end();
   3142        I != E; ++I) {
   3143     if (const CallInst *CI = dyn_cast<CallInst>(I)) {
   3144       // Ignore debug intrinsics.
   3145       if (isa<DbgInfoIntrinsic>(CI))
   3146         continue;
   3147 
   3148       const Function *CalledFn = CI->getCalledFunction();
   3149 
   3150       if (!CalledFn)
   3151         return false;
   3152 
   3153       SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions);
   3154 
   3155       // Don't treat recursive functions as empty.
   3156       if (!NewCalledFunctions.insert(CalledFn))
   3157         return false;
   3158 
   3159       if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions))
   3160         return false;
   3161     } else if (isa<ReturnInst>(*I))
   3162       return true; // We're done.
   3163     else if (I->mayHaveSideEffects())
   3164       return false; // Destructor with side effects, bail.
   3165   }
   3166 
   3167   return false;
   3168 }
   3169 
   3170 bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
   3171   /// Itanium C++ ABI p3.3.5:
   3172   ///
   3173   ///   After constructing a global (or local static) object, that will require
   3174   ///   destruction on exit, a termination function is registered as follows:
   3175   ///
   3176   ///   extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
   3177   ///
   3178   ///   This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
   3179   ///   call f(p) when DSO d is unloaded, before all such termination calls
   3180   ///   registered before this one. It returns zero if registration is
   3181   ///   successful, nonzero on failure.
   3182 
   3183   // This pass will look for calls to __cxa_atexit where the function is trivial
   3184   // and remove them.
   3185   bool Changed = false;
   3186 
   3187   for (Function::use_iterator I = CXAAtExitFn->use_begin(),
   3188        E = CXAAtExitFn->use_end(); I != E;) {
   3189     // We're only interested in calls. Theoretically, we could handle invoke
   3190     // instructions as well, but neither llvm-gcc nor clang generate invokes
   3191     // to __cxa_atexit.
   3192     CallInst *CI = dyn_cast<CallInst>(*I++);
   3193     if (!CI)
   3194       continue;
   3195 
   3196     Function *DtorFn =
   3197       dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
   3198     if (!DtorFn)
   3199       continue;
   3200 
   3201     SmallPtrSet<const Function *, 8> CalledFunctions;
   3202     if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions))
   3203       continue;
   3204 
   3205     // Just remove the call.
   3206     CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
   3207     CI->eraseFromParent();
   3208 
   3209     ++NumCXXDtorsRemoved;
   3210 
   3211     Changed |= true;
   3212   }
   3213 
   3214   return Changed;
   3215 }
   3216 
   3217 bool GlobalOpt::runOnModule(Module &M) {
   3218   bool Changed = false;
   3219 
   3220   TD = getAnalysisIfAvailable<DataLayout>();
   3221   TLI = &getAnalysis<TargetLibraryInfo>();
   3222 
   3223   // Try to find the llvm.globalctors list.
   3224   GlobalVariable *GlobalCtors = FindGlobalCtors(M);
   3225 
   3226   Function *CXAAtExitFn = FindCXAAtExit(M, TLI);
   3227 
   3228   bool LocalChange = true;
   3229   while (LocalChange) {
   3230     LocalChange = false;
   3231 
   3232     // Delete functions that are trivially dead, ccc -> fastcc
   3233     LocalChange |= OptimizeFunctions(M);
   3234 
   3235     // Optimize global_ctors list.
   3236     if (GlobalCtors)
   3237       LocalChange |= OptimizeGlobalCtorsList(GlobalCtors);
   3238 
   3239     // Optimize non-address-taken globals.
   3240     LocalChange |= OptimizeGlobalVars(M);
   3241 
   3242     // Resolve aliases, when possible.
   3243     LocalChange |= OptimizeGlobalAliases(M);
   3244 
   3245     // Try to remove trivial global destructors.
   3246     if (CXAAtExitFn)
   3247       LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
   3248 
   3249     Changed |= LocalChange;
   3250   }
   3251 
   3252   // TODO: Move all global ctors functions to the end of the module for code
   3253   // layout.
   3254 
   3255   return Changed;
   3256 }
   3257