Home | History | Annotate | Download | only in IPO
      1 //===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This pass transforms simple global variables that never have their address
     11 // taken.  If obviously true, it marks read/write globals as constant, deletes
     12 // variables only stored to, etc.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #define DEBUG_TYPE "globalopt"
     17 #include "llvm/Transforms/IPO.h"
     18 #include "llvm/ADT/DenseMap.h"
     19 #include "llvm/ADT/STLExtras.h"
     20 #include "llvm/ADT/SmallPtrSet.h"
     21 #include "llvm/ADT/SmallVector.h"
     22 #include "llvm/ADT/Statistic.h"
     23 #include "llvm/Analysis/ConstantFolding.h"
     24 #include "llvm/Analysis/MemoryBuiltins.h"
     25 #include "llvm/IR/CallingConv.h"
     26 #include "llvm/IR/Constants.h"
     27 #include "llvm/IR/DataLayout.h"
     28 #include "llvm/IR/DerivedTypes.h"
     29 #include "llvm/IR/Instructions.h"
     30 #include "llvm/IR/IntrinsicInst.h"
     31 #include "llvm/IR/Module.h"
     32 #include "llvm/IR/Operator.h"
     33 #include "llvm/Pass.h"
     34 #include "llvm/Support/CallSite.h"
     35 #include "llvm/Support/Debug.h"
     36 #include "llvm/Support/ErrorHandling.h"
     37 #include "llvm/Support/GetElementPtrTypeIterator.h"
     38 #include "llvm/Support/MathExtras.h"
     39 #include "llvm/Support/raw_ostream.h"
     40 #include "llvm/Target/TargetLibraryInfo.h"
     41 #include "llvm/Transforms/Utils/ModuleUtils.h"
     42 #include <algorithm>
     43 using namespace llvm;
     44 
     45 STATISTIC(NumMarked    , "Number of globals marked constant");
     46 STATISTIC(NumUnnamed   , "Number of globals marked unnamed_addr");
     47 STATISTIC(NumSRA       , "Number of aggregate globals broken into scalars");
     48 STATISTIC(NumHeapSRA   , "Number of heap objects SRA'd");
     49 STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
     50 STATISTIC(NumDeleted   , "Number of globals deleted");
     51 STATISTIC(NumFnDeleted , "Number of functions deleted");
     52 STATISTIC(NumGlobUses  , "Number of global uses devirtualized");
     53 STATISTIC(NumLocalized , "Number of globals localized");
     54 STATISTIC(NumShrunkToBool  , "Number of global vars shrunk to booleans");
     55 STATISTIC(NumFastCallFns   , "Number of functions converted to fastcc");
     56 STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
     57 STATISTIC(NumNestRemoved   , "Number of nest attributes removed");
     58 STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
     59 STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
     60 STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
     61 
     62 namespace {
     63   struct GlobalStatus;
     64   struct GlobalOpt : public ModulePass {
     65     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
     66       AU.addRequired<TargetLibraryInfo>();
     67     }
     68     static char ID; // Pass identification, replacement for typeid
     69     GlobalOpt() : ModulePass(ID) {
     70       initializeGlobalOptPass(*PassRegistry::getPassRegistry());
     71     }
     72 
     73     bool runOnModule(Module &M);
     74 
     75   private:
     76     GlobalVariable *FindGlobalCtors(Module &M);
     77     bool OptimizeFunctions(Module &M);
     78     bool OptimizeGlobalVars(Module &M);
     79     bool OptimizeGlobalAliases(Module &M);
     80     bool OptimizeGlobalCtorsList(GlobalVariable *&GCL);
     81     bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
     82     bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
     83                                const SmallPtrSet<const PHINode*, 16> &PHIUsers,
     84                                const GlobalStatus &GS);
     85     bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
     86 
     87     DataLayout *TD;
     88     TargetLibraryInfo *TLI;
     89   };
     90 }
     91 
     92 char GlobalOpt::ID = 0;
     93 INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
     94                 "Global Variable Optimizer", false, false)
     95 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
     96 INITIALIZE_PASS_END(GlobalOpt, "globalopt",
     97                 "Global Variable Optimizer", false, false)
     98 
     99 ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
    100 
    101 namespace {
    102 
    103 /// GlobalStatus - As we analyze each global, keep track of some information
    104 /// about it.  If we find out that the address of the global is taken, none of
    105 /// this info will be accurate.
    106 struct GlobalStatus {
    107   /// isCompared - True if the global's address is used in a comparison.
    108   bool isCompared;
    109 
    110   /// isLoaded - True if the global is ever loaded.  If the global isn't ever
    111   /// loaded it can be deleted.
    112   bool isLoaded;
    113 
    114   /// StoredType - Keep track of what stores to the global look like.
    115   ///
    116   enum StoredType {
    117     /// NotStored - There is no store to this global.  It can thus be marked
    118     /// constant.
    119     NotStored,
    120 
    121     /// isInitializerStored - This global is stored to, but the only thing
    122     /// stored is the constant it was initialized with.  This is only tracked
    123     /// for scalar globals.
    124     isInitializerStored,
    125 
    126     /// isStoredOnce - This global is stored to, but only its initializer and
    127     /// one other value is ever stored to it.  If this global isStoredOnce, we
    128     /// track the value stored to it in StoredOnceValue below.  This is only
    129     /// tracked for scalar globals.
    130     isStoredOnce,
    131 
    132     /// isStored - This global is stored to by multiple values or something else
    133     /// that we cannot track.
    134     isStored
    135   } StoredType;
    136 
    137   /// StoredOnceValue - If only one value (besides the initializer constant) is
    138   /// ever stored to this global, keep track of what value it is.
    139   Value *StoredOnceValue;
    140 
    141   /// AccessingFunction/HasMultipleAccessingFunctions - These start out
    142   /// null/false.  When the first accessing function is noticed, it is recorded.
    143   /// When a second different accessing function is noticed,
    144   /// HasMultipleAccessingFunctions is set to true.
    145   const Function *AccessingFunction;
    146   bool HasMultipleAccessingFunctions;
    147 
    148   /// HasNonInstructionUser - Set to true if this global has a user that is not
    149   /// an instruction (e.g. a constant expr or GV initializer).
    150   bool HasNonInstructionUser;
    151 
    152   /// AtomicOrdering - Set to the strongest atomic ordering requirement.
    153   AtomicOrdering Ordering;
    154 
    155   GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored),
    156                    StoredOnceValue(0), AccessingFunction(0),
    157                    HasMultipleAccessingFunctions(false),
    158                    HasNonInstructionUser(false), Ordering(NotAtomic) {}
    159 };
    160 
    161 }
    162 
    163 /// StrongerOrdering - Return the stronger of the two ordering. If the two
    164 /// orderings are acquire and release, then return AcquireRelease.
    165 ///
    166 static AtomicOrdering StrongerOrdering(AtomicOrdering X, AtomicOrdering Y) {
    167   if (X == Acquire && Y == Release) return AcquireRelease;
    168   if (Y == Acquire && X == Release) return AcquireRelease;
    169   return (AtomicOrdering)std::max(X, Y);
    170 }
    171 
    172 /// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used
    173 /// by constants itself.  Note that constants cannot be cyclic, so this test is
    174 /// pretty easy to implement recursively.
    175 ///
    176 static bool SafeToDestroyConstant(const Constant *C) {
    177   if (isa<GlobalValue>(C)) return false;
    178 
    179   for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E;
    180        ++UI)
    181     if (const Constant *CU = dyn_cast<Constant>(*UI)) {
    182       if (!SafeToDestroyConstant(CU)) return false;
    183     } else
    184       return false;
    185   return true;
    186 }
    187 
    188 
    189 /// AnalyzeGlobal - Look at all uses of the global and fill in the GlobalStatus
    190 /// structure.  If the global has its address taken, return true to indicate we
    191 /// can't do anything with it.
    192 ///
    193 static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
    194                           SmallPtrSet<const PHINode*, 16> &PHIUsers) {
    195   for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
    196        ++UI) {
    197     const User *U = *UI;
    198     if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
    199       GS.HasNonInstructionUser = true;
    200 
    201       // If the result of the constantexpr isn't pointer type, then we won't
    202       // know to expect it in various places.  Just reject early.
    203       if (!isa<PointerType>(CE->getType())) return true;
    204 
    205       if (AnalyzeGlobal(CE, GS, PHIUsers)) return true;
    206     } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
    207       if (!GS.HasMultipleAccessingFunctions) {
    208         const Function *F = I->getParent()->getParent();
    209         if (GS.AccessingFunction == 0)
    210           GS.AccessingFunction = F;
    211         else if (GS.AccessingFunction != F)
    212           GS.HasMultipleAccessingFunctions = true;
    213       }
    214       if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
    215         GS.isLoaded = true;
    216         // Don't hack on volatile loads.
    217         if (LI->isVolatile()) return true;
    218         GS.Ordering = StrongerOrdering(GS.Ordering, LI->getOrdering());
    219       } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
    220         // Don't allow a store OF the address, only stores TO the address.
    221         if (SI->getOperand(0) == V) return true;
    222 
    223         // Don't hack on volatile stores.
    224         if (SI->isVolatile()) return true;
    225 
    226         GS.Ordering = StrongerOrdering(GS.Ordering, SI->getOrdering());
    227 
    228         // If this is a direct store to the global (i.e., the global is a scalar
    229         // value, not an aggregate), keep more specific information about
    230         // stores.
    231         if (GS.StoredType != GlobalStatus::isStored) {
    232           if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(
    233                                                            SI->getOperand(1))) {
    234             Value *StoredVal = SI->getOperand(0);
    235 
    236             if (Constant *C = dyn_cast<Constant>(StoredVal)) {
    237               if (C->isThreadDependent()) {
    238                 // The stored value changes between threads; don't track it.
    239                 return true;
    240               }
    241             }
    242 
    243             if (StoredVal == GV->getInitializer()) {
    244               if (GS.StoredType < GlobalStatus::isInitializerStored)
    245                 GS.StoredType = GlobalStatus::isInitializerStored;
    246             } else if (isa<LoadInst>(StoredVal) &&
    247                        cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
    248               if (GS.StoredType < GlobalStatus::isInitializerStored)
    249                 GS.StoredType = GlobalStatus::isInitializerStored;
    250             } else if (GS.StoredType < GlobalStatus::isStoredOnce) {
    251               GS.StoredType = GlobalStatus::isStoredOnce;
    252               GS.StoredOnceValue = StoredVal;
    253             } else if (GS.StoredType == GlobalStatus::isStoredOnce &&
    254                        GS.StoredOnceValue == StoredVal) {
    255               // noop.
    256             } else {
    257               GS.StoredType = GlobalStatus::isStored;
    258             }
    259           } else {
    260             GS.StoredType = GlobalStatus::isStored;
    261           }
    262         }
    263       } else if (isa<BitCastInst>(I)) {
    264         if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
    265       } else if (isa<GetElementPtrInst>(I)) {
    266         if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
    267       } else if (isa<SelectInst>(I)) {
    268         if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
    269       } else if (const PHINode *PN = dyn_cast<PHINode>(I)) {
    270         // PHI nodes we can check just like select or GEP instructions, but we
    271         // have to be careful about infinite recursion.
    272         if (PHIUsers.insert(PN))  // Not already visited.
    273           if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
    274       } else if (isa<CmpInst>(I)) {
    275         GS.isCompared = true;
    276       } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
    277         if (MTI->isVolatile()) return true;
    278         if (MTI->getArgOperand(0) == V)
    279           GS.StoredType = GlobalStatus::isStored;
    280         if (MTI->getArgOperand(1) == V)
    281           GS.isLoaded = true;
    282       } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) {
    283         assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!");
    284         if (MSI->isVolatile()) return true;
    285         GS.StoredType = GlobalStatus::isStored;
    286       } else {
    287         return true;  // Any other non-load instruction might take address!
    288       }
    289     } else if (const Constant *C = dyn_cast<Constant>(U)) {
    290       GS.HasNonInstructionUser = true;
    291       // We might have a dead and dangling constant hanging off of here.
    292       if (!SafeToDestroyConstant(C))
    293         return true;
    294     } else {
    295       GS.HasNonInstructionUser = true;
    296       // Otherwise must be some other user.
    297       return true;
    298     }
    299   }
    300 
    301   return false;
    302 }
    303 
    304 /// isLeakCheckerRoot - Is this global variable possibly used by a leak checker
    305 /// as a root?  If so, we might not really want to eliminate the stores to it.
    306 static bool isLeakCheckerRoot(GlobalVariable *GV) {
    307   // A global variable is a root if it is a pointer, or could plausibly contain
    308   // a pointer.  There are two challenges; one is that we could have a struct
    309   // the has an inner member which is a pointer.  We recurse through the type to
    310   // detect these (up to a point).  The other is that we may actually be a union
    311   // of a pointer and another type, and so our LLVM type is an integer which
    312   // gets converted into a pointer, or our type is an [i8 x #] with a pointer
    313   // potentially contained here.
    314 
    315   if (GV->hasPrivateLinkage())
    316     return false;
    317 
    318   SmallVector<Type *, 4> Types;
    319   Types.push_back(cast<PointerType>(GV->getType())->getElementType());
    320 
    321   unsigned Limit = 20;
    322   do {
    323     Type *Ty = Types.pop_back_val();
    324     switch (Ty->getTypeID()) {
    325       default: break;
    326       case Type::PointerTyID: return true;
    327       case Type::ArrayTyID:
    328       case Type::VectorTyID: {
    329         SequentialType *STy = cast<SequentialType>(Ty);
    330         Types.push_back(STy->getElementType());
    331         break;
    332       }
    333       case Type::StructTyID: {
    334         StructType *STy = cast<StructType>(Ty);
    335         if (STy->isOpaque()) return true;
    336         for (StructType::element_iterator I = STy->element_begin(),
    337                  E = STy->element_end(); I != E; ++I) {
    338           Type *InnerTy = *I;
    339           if (isa<PointerType>(InnerTy)) return true;
    340           if (isa<CompositeType>(InnerTy))
    341             Types.push_back(InnerTy);
    342         }
    343         break;
    344       }
    345     }
    346     if (--Limit == 0) return true;
    347   } while (!Types.empty());
    348   return false;
    349 }
    350 
    351 /// Given a value that is stored to a global but never read, determine whether
    352 /// it's safe to remove the store and the chain of computation that feeds the
    353 /// store.
    354 static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
    355   do {
    356     if (isa<Constant>(V))
    357       return true;
    358     if (!V->hasOneUse())
    359       return false;
    360     if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
    361         isa<GlobalValue>(V))
    362       return false;
    363     if (isAllocationFn(V, TLI))
    364       return true;
    365 
    366     Instruction *I = cast<Instruction>(V);
    367     if (I->mayHaveSideEffects())
    368       return false;
    369     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
    370       if (!GEP->hasAllConstantIndices())
    371         return false;
    372     } else if (I->getNumOperands() != 1) {
    373       return false;
    374     }
    375 
    376     V = I->getOperand(0);
    377   } while (1);
    378 }
    379 
    380 /// CleanupPointerRootUsers - This GV is a pointer root.  Loop over all users
    381 /// of the global and clean up any that obviously don't assign the global a
    382 /// value that isn't dynamically allocated.
    383 ///
    384 static bool CleanupPointerRootUsers(GlobalVariable *GV,
    385                                     const TargetLibraryInfo *TLI) {
    386   // A brief explanation of leak checkers.  The goal is to find bugs where
    387   // pointers are forgotten, causing an accumulating growth in memory
    388   // usage over time.  The common strategy for leak checkers is to whitelist the
    389   // memory pointed to by globals at exit.  This is popular because it also
    390   // solves another problem where the main thread of a C++ program may shut down
    391   // before other threads that are still expecting to use those globals.  To
    392   // handle that case, we expect the program may create a singleton and never
    393   // destroy it.
    394 
    395   bool Changed = false;
    396 
    397   // If Dead[n].first is the only use of a malloc result, we can delete its
    398   // chain of computation and the store to the global in Dead[n].second.
    399   SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
    400 
    401   // Constants can't be pointers to dynamically allocated memory.
    402   for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end();
    403        UI != E;) {
    404     User *U = *UI++;
    405     if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
    406       Value *V = SI->getValueOperand();
    407       if (isa<Constant>(V)) {
    408         Changed = true;
    409         SI->eraseFromParent();
    410       } else if (Instruction *I = dyn_cast<Instruction>(V)) {
    411         if (I->hasOneUse())
    412           Dead.push_back(std::make_pair(I, SI));
    413       }
    414     } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) {
    415       if (isa<Constant>(MSI->getValue())) {
    416         Changed = true;
    417         MSI->eraseFromParent();
    418       } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) {
    419         if (I->hasOneUse())
    420           Dead.push_back(std::make_pair(I, MSI));
    421       }
    422     } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) {
    423       GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource());
    424       if (MemSrc && MemSrc->isConstant()) {
    425         Changed = true;
    426         MTI->eraseFromParent();
    427       } else if (Instruction *I = dyn_cast<Instruction>(MemSrc)) {
    428         if (I->hasOneUse())
    429           Dead.push_back(std::make_pair(I, MTI));
    430       }
    431     } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
    432       if (CE->use_empty()) {
    433         CE->destroyConstant();
    434         Changed = true;
    435       }
    436     } else if (Constant *C = dyn_cast<Constant>(U)) {
    437       if (SafeToDestroyConstant(C)) {
    438         C->destroyConstant();
    439         // This could have invalidated UI, start over from scratch.
    440         Dead.clear();
    441         CleanupPointerRootUsers(GV, TLI);
    442         return true;
    443       }
    444     }
    445   }
    446 
    447   for (int i = 0, e = Dead.size(); i != e; ++i) {
    448     if (IsSafeComputationToRemove(Dead[i].first, TLI)) {
    449       Dead[i].second->eraseFromParent();
    450       Instruction *I = Dead[i].first;
    451       do {
    452         if (isAllocationFn(I, TLI))
    453           break;
    454         Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
    455         if (!J)
    456           break;
    457         I->eraseFromParent();
    458         I = J;
    459       } while (1);
    460       I->eraseFromParent();
    461     }
    462   }
    463 
    464   return Changed;
    465 }
    466 
    467 /// CleanupConstantGlobalUsers - We just marked GV constant.  Loop over all
    468 /// users of the global, cleaning up the obvious ones.  This is largely just a
    469 /// quick scan over the use list to clean up the easy and obvious cruft.  This
    470 /// returns true if it made a change.
    471 static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
    472                                        DataLayout *TD, TargetLibraryInfo *TLI) {
    473   bool Changed = false;
    474   SmallVector<User*, 8> WorkList(V->use_begin(), V->use_end());
    475   while (!WorkList.empty()) {
    476     User *U = WorkList.pop_back_val();
    477 
    478     if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
    479       if (Init) {
    480         // Replace the load with the initializer.
    481         LI->replaceAllUsesWith(Init);
    482         LI->eraseFromParent();
    483         Changed = true;
    484       }
    485     } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
    486       // Store must be unreachable or storing Init into the global.
    487       SI->eraseFromParent();
    488       Changed = true;
    489     } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
    490       if (CE->getOpcode() == Instruction::GetElementPtr) {
    491         Constant *SubInit = 0;
    492         if (Init)
    493           SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
    494         Changed |= CleanupConstantGlobalUsers(CE, SubInit, TD, TLI);
    495       } else if (CE->getOpcode() == Instruction::BitCast &&
    496                  CE->getType()->isPointerTy()) {
    497         // Pointer cast, delete any stores and memsets to the global.
    498         Changed |= CleanupConstantGlobalUsers(CE, 0, TD, TLI);
    499       }
    500 
    501       if (CE->use_empty()) {
    502         CE->destroyConstant();
    503         Changed = true;
    504       }
    505     } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
    506       // Do not transform "gepinst (gep constexpr (GV))" here, because forming
    507       // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
    508       // and will invalidate our notion of what Init is.
    509       Constant *SubInit = 0;
    510       if (!isa<ConstantExpr>(GEP->getOperand(0))) {
    511         ConstantExpr *CE =
    512           dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, TD, TLI));
    513         if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
    514           SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
    515 
    516         // If the initializer is an all-null value and we have an inbounds GEP,
    517         // we already know what the result of any load from that GEP is.
    518         // TODO: Handle splats.
    519         if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds())
    520           SubInit = Constant::getNullValue(GEP->getType()->getElementType());
    521       }
    522       Changed |= CleanupConstantGlobalUsers(GEP, SubInit, TD, TLI);
    523 
    524       if (GEP->use_empty()) {
    525         GEP->eraseFromParent();
    526         Changed = true;
    527       }
    528     } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
    529       if (MI->getRawDest() == V) {
    530         MI->eraseFromParent();
    531         Changed = true;
    532       }
    533 
    534     } else if (Constant *C = dyn_cast<Constant>(U)) {
    535       // If we have a chain of dead constantexprs or other things dangling from
    536       // us, and if they are all dead, nuke them without remorse.
    537       if (SafeToDestroyConstant(C)) {
    538         C->destroyConstant();
    539         CleanupConstantGlobalUsers(V, Init, TD, TLI);
    540         return true;
    541       }
    542     }
    543   }
    544   return Changed;
    545 }
    546 
    547 /// isSafeSROAElementUse - Return true if the specified instruction is a safe
    548 /// user of a derived expression from a global that we want to SROA.
    549 static bool isSafeSROAElementUse(Value *V) {
    550   // We might have a dead and dangling constant hanging off of here.
    551   if (Constant *C = dyn_cast<Constant>(V))
    552     return SafeToDestroyConstant(C);
    553 
    554   Instruction *I = dyn_cast<Instruction>(V);
    555   if (!I) return false;
    556 
    557   // Loads are ok.
    558   if (isa<LoadInst>(I)) return true;
    559 
    560   // Stores *to* the pointer are ok.
    561   if (StoreInst *SI = dyn_cast<StoreInst>(I))
    562     return SI->getOperand(0) != V;
    563 
    564   // Otherwise, it must be a GEP.
    565   GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
    566   if (GEPI == 0) return false;
    567 
    568   if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
    569       !cast<Constant>(GEPI->getOperand(1))->isNullValue())
    570     return false;
    571 
    572   for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end();
    573        I != E; ++I)
    574     if (!isSafeSROAElementUse(*I))
    575       return false;
    576   return true;
    577 }
    578 
    579 
    580 /// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value.
    581 /// Look at it and its uses and decide whether it is safe to SROA this global.
    582 ///
    583 static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
    584   // The user of the global must be a GEP Inst or a ConstantExpr GEP.
    585   if (!isa<GetElementPtrInst>(U) &&
    586       (!isa<ConstantExpr>(U) ||
    587        cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
    588     return false;
    589 
    590   // Check to see if this ConstantExpr GEP is SRA'able.  In particular, we
    591   // don't like < 3 operand CE's, and we don't like non-constant integer
    592   // indices.  This enforces that all uses are 'gep GV, 0, C, ...' for some
    593   // value of C.
    594   if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
    595       !cast<Constant>(U->getOperand(1))->isNullValue() ||
    596       !isa<ConstantInt>(U->getOperand(2)))
    597     return false;
    598 
    599   gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
    600   ++GEPI;  // Skip over the pointer index.
    601 
    602   // If this is a use of an array allocation, do a bit more checking for sanity.
    603   if (ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) {
    604     uint64_t NumElements = AT->getNumElements();
    605     ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
    606 
    607     // Check to make sure that index falls within the array.  If not,
    608     // something funny is going on, so we won't do the optimization.
    609     //
    610     if (Idx->getZExtValue() >= NumElements)
    611       return false;
    612 
    613     // We cannot scalar repl this level of the array unless any array
    614     // sub-indices are in-range constants.  In particular, consider:
    615     // A[0][i].  We cannot know that the user isn't doing invalid things like
    616     // allowing i to index an out-of-range subscript that accesses A[1].
    617     //
    618     // Scalar replacing *just* the outer index of the array is probably not
    619     // going to be a win anyway, so just give up.
    620     for (++GEPI; // Skip array index.
    621          GEPI != E;
    622          ++GEPI) {
    623       uint64_t NumElements;
    624       if (ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI))
    625         NumElements = SubArrayTy->getNumElements();
    626       else if (VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI))
    627         NumElements = SubVectorTy->getNumElements();
    628       else {
    629         assert((*GEPI)->isStructTy() &&
    630                "Indexed GEP type is not array, vector, or struct!");
    631         continue;
    632       }
    633 
    634       ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
    635       if (!IdxVal || IdxVal->getZExtValue() >= NumElements)
    636         return false;
    637     }
    638   }
    639 
    640   for (Value::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I)
    641     if (!isSafeSROAElementUse(*I))
    642       return false;
    643   return true;
    644 }
    645 
    646 /// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it
    647 /// is safe for us to perform this transformation.
    648 ///
    649 static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
    650   for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end();
    651        UI != E; ++UI) {
    652     if (!IsUserOfGlobalSafeForSRA(*UI, GV))
    653       return false;
    654   }
    655   return true;
    656 }
    657 
    658 
    659 /// SRAGlobal - Perform scalar replacement of aggregates on the specified global
    660 /// variable.  This opens the door for other optimizations by exposing the
    661 /// behavior of the program in a more fine-grained way.  We have determined that
    662 /// this transformation is safe already.  We return the first global variable we
    663 /// insert so that the caller can reprocess it.
    664 static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &TD) {
    665   // Make sure this global only has simple uses that we can SRA.
    666   if (!GlobalUsersSafeToSRA(GV))
    667     return 0;
    668 
    669   assert(GV->hasLocalLinkage() && !GV->isConstant());
    670   Constant *Init = GV->getInitializer();
    671   Type *Ty = Init->getType();
    672 
    673   std::vector<GlobalVariable*> NewGlobals;
    674   Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
    675 
    676   // Get the alignment of the global, either explicit or target-specific.
    677   unsigned StartAlignment = GV->getAlignment();
    678   if (StartAlignment == 0)
    679     StartAlignment = TD.getABITypeAlignment(GV->getType());
    680 
    681   if (StructType *STy = dyn_cast<StructType>(Ty)) {
    682     NewGlobals.reserve(STy->getNumElements());
    683     const StructLayout &Layout = *TD.getStructLayout(STy);
    684     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
    685       Constant *In = Init->getAggregateElement(i);
    686       assert(In && "Couldn't get element of initializer?");
    687       GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
    688                                                GlobalVariable::InternalLinkage,
    689                                                In, GV->getName()+"."+Twine(i),
    690                                                GV->getThreadLocalMode(),
    691                                               GV->getType()->getAddressSpace());
    692       Globals.insert(GV, NGV);
    693       NewGlobals.push_back(NGV);
    694 
    695       // Calculate the known alignment of the field.  If the original aggregate
    696       // had 256 byte alignment for example, something might depend on that:
    697       // propagate info to each field.
    698       uint64_t FieldOffset = Layout.getElementOffset(i);
    699       unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset);
    700       if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i)))
    701         NGV->setAlignment(NewAlign);
    702     }
    703   } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
    704     unsigned NumElements = 0;
    705     if (ArrayType *ATy = dyn_cast<ArrayType>(STy))
    706       NumElements = ATy->getNumElements();
    707     else
    708       NumElements = cast<VectorType>(STy)->getNumElements();
    709 
    710     if (NumElements > 16 && GV->hasNUsesOrMore(16))
    711       return 0; // It's not worth it.
    712     NewGlobals.reserve(NumElements);
    713 
    714     uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType());
    715     unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
    716     for (unsigned i = 0, e = NumElements; i != e; ++i) {
    717       Constant *In = Init->getAggregateElement(i);
    718       assert(In && "Couldn't get element of initializer?");
    719 
    720       GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
    721                                                GlobalVariable::InternalLinkage,
    722                                                In, GV->getName()+"."+Twine(i),
    723                                                GV->getThreadLocalMode(),
    724                                               GV->getType()->getAddressSpace());
    725       Globals.insert(GV, NGV);
    726       NewGlobals.push_back(NGV);
    727 
    728       // Calculate the known alignment of the field.  If the original aggregate
    729       // had 256 byte alignment for example, something might depend on that:
    730       // propagate info to each field.
    731       unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i);
    732       if (NewAlign > EltAlign)
    733         NGV->setAlignment(NewAlign);
    734     }
    735   }
    736 
    737   if (NewGlobals.empty())
    738     return 0;
    739 
    740   DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
    741 
    742   Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
    743 
    744   // Loop over all of the uses of the global, replacing the constantexpr geps,
    745   // with smaller constantexpr geps or direct references.
    746   while (!GV->use_empty()) {
    747     User *GEP = GV->use_back();
    748     assert(((isa<ConstantExpr>(GEP) &&
    749              cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)||
    750             isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!");
    751 
    752     // Ignore the 1th operand, which has to be zero or else the program is quite
    753     // broken (undefined).  Get the 2nd operand, which is the structure or array
    754     // index.
    755     unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
    756     if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access.
    757 
    758     Value *NewPtr = NewGlobals[Val];
    759 
    760     // Form a shorter GEP if needed.
    761     if (GEP->getNumOperands() > 3) {
    762       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) {
    763         SmallVector<Constant*, 8> Idxs;
    764         Idxs.push_back(NullInt);
    765         for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
    766           Idxs.push_back(CE->getOperand(i));
    767         NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), Idxs);
    768       } else {
    769         GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
    770         SmallVector<Value*, 8> Idxs;
    771         Idxs.push_back(NullInt);
    772         for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
    773           Idxs.push_back(GEPI->getOperand(i));
    774         NewPtr = GetElementPtrInst::Create(NewPtr, Idxs,
    775                                            GEPI->getName()+"."+Twine(Val),GEPI);
    776       }
    777     }
    778     GEP->replaceAllUsesWith(NewPtr);
    779 
    780     if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP))
    781       GEPI->eraseFromParent();
    782     else
    783       cast<ConstantExpr>(GEP)->destroyConstant();
    784   }
    785 
    786   // Delete the old global, now that it is dead.
    787   Globals.erase(GV);
    788   ++NumSRA;
    789 
    790   // Loop over the new globals array deleting any globals that are obviously
    791   // dead.  This can arise due to scalarization of a structure or an array that
    792   // has elements that are dead.
    793   unsigned FirstGlobal = 0;
    794   for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i)
    795     if (NewGlobals[i]->use_empty()) {
    796       Globals.erase(NewGlobals[i]);
    797       if (FirstGlobal == i) ++FirstGlobal;
    798     }
    799 
    800   return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : 0;
    801 }
    802 
    803 /// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
    804 /// value will trap if the value is dynamically null.  PHIs keeps track of any
    805 /// phi nodes we've seen to avoid reprocessing them.
    806 static bool AllUsesOfValueWillTrapIfNull(const Value *V,
    807                                          SmallPtrSet<const PHINode*, 8> &PHIs) {
    808   for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
    809        ++UI) {
    810     const User *U = *UI;
    811 
    812     if (isa<LoadInst>(U)) {
    813       // Will trap.
    814     } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
    815       if (SI->getOperand(0) == V) {
    816         //cerr << "NONTRAPPING USE: " << *U;
    817         return false;  // Storing the value.
    818       }
    819     } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
    820       if (CI->getCalledValue() != V) {
    821         //cerr << "NONTRAPPING USE: " << *U;
    822         return false;  // Not calling the ptr
    823       }
    824     } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
    825       if (II->getCalledValue() != V) {
    826         //cerr << "NONTRAPPING USE: " << *U;
    827         return false;  // Not calling the ptr
    828       }
    829     } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
    830       if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
    831     } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
    832       if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
    833     } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
    834       // If we've already seen this phi node, ignore it, it has already been
    835       // checked.
    836       if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
    837         return false;
    838     } else if (isa<ICmpInst>(U) &&
    839                isa<ConstantPointerNull>(UI->getOperand(1))) {
    840       // Ignore icmp X, null
    841     } else {
    842       //cerr << "NONTRAPPING USE: " << *U;
    843       return false;
    844     }
    845   }
    846   return true;
    847 }
    848 
    849 /// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads
    850 /// from GV will trap if the loaded value is null.  Note that this also permits
    851 /// comparisons of the loaded value against null, as a special case.
    852 static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
    853   for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
    854        UI != E; ++UI) {
    855     const User *U = *UI;
    856 
    857     if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
    858       SmallPtrSet<const PHINode*, 8> PHIs;
    859       if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
    860         return false;
    861     } else if (isa<StoreInst>(U)) {
    862       // Ignore stores to the global.
    863     } else {
    864       // We don't know or understand this user, bail out.
    865       //cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
    866       return false;
    867     }
    868   }
    869   return true;
    870 }
    871 
    872 static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
    873   bool Changed = false;
    874   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) {
    875     Instruction *I = cast<Instruction>(*UI++);
    876     if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
    877       LI->setOperand(0, NewV);
    878       Changed = true;
    879     } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
    880       if (SI->getOperand(1) == V) {
    881         SI->setOperand(1, NewV);
    882         Changed = true;
    883       }
    884     } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
    885       CallSite CS(I);
    886       if (CS.getCalledValue() == V) {
    887         // Calling through the pointer!  Turn into a direct call, but be careful
    888         // that the pointer is not also being passed as an argument.
    889         CS.setCalledFunction(NewV);
    890         Changed = true;
    891         bool PassedAsArg = false;
    892         for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
    893           if (CS.getArgument(i) == V) {
    894             PassedAsArg = true;
    895             CS.setArgument(i, NewV);
    896           }
    897 
    898         if (PassedAsArg) {
    899           // Being passed as an argument also.  Be careful to not invalidate UI!
    900           UI = V->use_begin();
    901         }
    902       }
    903     } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
    904       Changed |= OptimizeAwayTrappingUsesOfValue(CI,
    905                                 ConstantExpr::getCast(CI->getOpcode(),
    906                                                       NewV, CI->getType()));
    907       if (CI->use_empty()) {
    908         Changed = true;
    909         CI->eraseFromParent();
    910       }
    911     } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
    912       // Should handle GEP here.
    913       SmallVector<Constant*, 8> Idxs;
    914       Idxs.reserve(GEPI->getNumOperands()-1);
    915       for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
    916            i != e; ++i)
    917         if (Constant *C = dyn_cast<Constant>(*i))
    918           Idxs.push_back(C);
    919         else
    920           break;
    921       if (Idxs.size() == GEPI->getNumOperands()-1)
    922         Changed |= OptimizeAwayTrappingUsesOfValue(GEPI,
    923                           ConstantExpr::getGetElementPtr(NewV, Idxs));
    924       if (GEPI->use_empty()) {
    925         Changed = true;
    926         GEPI->eraseFromParent();
    927       }
    928     }
    929   }
    930 
    931   return Changed;
    932 }
    933 
    934 
    935 /// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null
    936 /// value stored into it.  If there are uses of the loaded value that would trap
    937 /// if the loaded value is dynamically null, then we know that they cannot be
    938 /// reachable with a null optimize away the load.
    939 static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
    940                                             DataLayout *TD,
    941                                             TargetLibraryInfo *TLI) {
    942   bool Changed = false;
    943 
    944   // Keep track of whether we are able to remove all the uses of the global
    945   // other than the store that defines it.
    946   bool AllNonStoreUsesGone = true;
    947 
    948   // Replace all uses of loads with uses of uses of the stored value.
    949   for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){
    950     User *GlobalUser = *GUI++;
    951     if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
    952       Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
    953       // If we were able to delete all uses of the loads
    954       if (LI->use_empty()) {
    955         LI->eraseFromParent();
    956         Changed = true;
    957       } else {
    958         AllNonStoreUsesGone = false;
    959       }
    960     } else if (isa<StoreInst>(GlobalUser)) {
    961       // Ignore the store that stores "LV" to the global.
    962       assert(GlobalUser->getOperand(1) == GV &&
    963              "Must be storing *to* the global");
    964     } else {
    965       AllNonStoreUsesGone = false;
    966 
    967       // If we get here we could have other crazy uses that are transitively
    968       // loaded.
    969       assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
    970               isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||
    971               isa<BitCastInst>(GlobalUser) ||
    972               isa<GetElementPtrInst>(GlobalUser)) &&
    973              "Only expect load and stores!");
    974     }
    975   }
    976 
    977   if (Changed) {
    978     DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV);
    979     ++NumGlobUses;
    980   }
    981 
    982   // If we nuked all of the loads, then none of the stores are needed either,
    983   // nor is the global.
    984   if (AllNonStoreUsesGone) {
    985     if (isLeakCheckerRoot(GV)) {
    986       Changed |= CleanupPointerRootUsers(GV, TLI);
    987     } else {
    988       Changed = true;
    989       CleanupConstantGlobalUsers(GV, 0, TD, TLI);
    990     }
    991     if (GV->use_empty()) {
    992       DEBUG(dbgs() << "  *** GLOBAL NOW DEAD!\n");
    993       Changed = true;
    994       GV->eraseFromParent();
    995       ++NumDeleted;
    996     }
    997   }
    998   return Changed;
    999 }
   1000 
   1001 /// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
   1002 /// instructions that are foldable.
   1003 static void ConstantPropUsersOf(Value *V,
   1004                                 DataLayout *TD, TargetLibraryInfo *TLI) {
   1005   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; )
   1006     if (Instruction *I = dyn_cast<Instruction>(*UI++))
   1007       if (Constant *NewC = ConstantFoldInstruction(I, TD, TLI)) {
   1008         I->replaceAllUsesWith(NewC);
   1009 
   1010         // Advance UI to the next non-I use to avoid invalidating it!
   1011         // Instructions could multiply use V.
   1012         while (UI != E && *UI == I)
   1013           ++UI;
   1014         I->eraseFromParent();
   1015       }
   1016 }
   1017 
   1018 /// OptimizeGlobalAddressOfMalloc - This function takes the specified global
   1019 /// variable, and transforms the program as if it always contained the result of
   1020 /// the specified malloc.  Because it is always the result of the specified
   1021 /// malloc, there is no reason to actually DO the malloc.  Instead, turn the
   1022 /// malloc into a global, and any loads of GV as uses of the new global.
   1023 static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
   1024                                                      CallInst *CI,
   1025                                                      Type *AllocTy,
   1026                                                      ConstantInt *NElements,
   1027                                                      DataLayout *TD,
   1028                                                      TargetLibraryInfo *TLI) {
   1029   DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI << '\n');
   1030 
   1031   Type *GlobalType;
   1032   if (NElements->getZExtValue() == 1)
   1033     GlobalType = AllocTy;
   1034   else
   1035     // If we have an array allocation, the global variable is of an array.
   1036     GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue());
   1037 
   1038   // Create the new global variable.  The contents of the malloc'd memory is
   1039   // undefined, so initialize with an undef value.
   1040   GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
   1041                                              GlobalType, false,
   1042                                              GlobalValue::InternalLinkage,
   1043                                              UndefValue::get(GlobalType),
   1044                                              GV->getName()+".body",
   1045                                              GV,
   1046                                              GV->getThreadLocalMode());
   1047 
   1048   // If there are bitcast users of the malloc (which is typical, usually we have
   1049   // a malloc + bitcast) then replace them with uses of the new global.  Update
   1050   // other users to use the global as well.
   1051   BitCastInst *TheBC = 0;
   1052   while (!CI->use_empty()) {
   1053     Instruction *User = cast<Instruction>(CI->use_back());
   1054     if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
   1055       if (BCI->getType() == NewGV->getType()) {
   1056         BCI->replaceAllUsesWith(NewGV);
   1057         BCI->eraseFromParent();
   1058       } else {
   1059         BCI->setOperand(0, NewGV);
   1060       }
   1061     } else {
   1062       if (TheBC == 0)
   1063         TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
   1064       User->replaceUsesOfWith(CI, TheBC);
   1065     }
   1066   }
   1067 
   1068   Constant *RepValue = NewGV;
   1069   if (NewGV->getType() != GV->getType()->getElementType())
   1070     RepValue = ConstantExpr::getBitCast(RepValue,
   1071                                         GV->getType()->getElementType());
   1072 
   1073   // If there is a comparison against null, we will insert a global bool to
   1074   // keep track of whether the global was initialized yet or not.
   1075   GlobalVariable *InitBool =
   1076     new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
   1077                        GlobalValue::InternalLinkage,
   1078                        ConstantInt::getFalse(GV->getContext()),
   1079                        GV->getName()+".init", GV->getThreadLocalMode());
   1080   bool InitBoolUsed = false;
   1081 
   1082   // Loop over all uses of GV, processing them in turn.
   1083   while (!GV->use_empty()) {
   1084     if (StoreInst *SI = dyn_cast<StoreInst>(GV->use_back())) {
   1085       // The global is initialized when the store to it occurs.
   1086       new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0,
   1087                     SI->getOrdering(), SI->getSynchScope(), SI);
   1088       SI->eraseFromParent();
   1089       continue;
   1090     }
   1091 
   1092     LoadInst *LI = cast<LoadInst>(GV->use_back());
   1093     while (!LI->use_empty()) {
   1094       Use &LoadUse = LI->use_begin().getUse();
   1095       if (!isa<ICmpInst>(LoadUse.getUser())) {
   1096         LoadUse = RepValue;
   1097         continue;
   1098       }
   1099 
   1100       ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
   1101       // Replace the cmp X, 0 with a use of the bool value.
   1102       // Sink the load to where the compare was, if atomic rules allow us to.
   1103       Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0,
   1104                                LI->getOrdering(), LI->getSynchScope(),
   1105                                LI->isUnordered() ? (Instruction*)ICI : LI);
   1106       InitBoolUsed = true;
   1107       switch (ICI->getPredicate()) {
   1108       default: llvm_unreachable("Unknown ICmp Predicate!");
   1109       case ICmpInst::ICMP_ULT:
   1110       case ICmpInst::ICMP_SLT:   // X < null -> always false
   1111         LV = ConstantInt::getFalse(GV->getContext());
   1112         break;
   1113       case ICmpInst::ICMP_ULE:
   1114       case ICmpInst::ICMP_SLE:
   1115       case ICmpInst::ICMP_EQ:
   1116         LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
   1117         break;
   1118       case ICmpInst::ICMP_NE:
   1119       case ICmpInst::ICMP_UGE:
   1120       case ICmpInst::ICMP_SGE:
   1121       case ICmpInst::ICMP_UGT:
   1122       case ICmpInst::ICMP_SGT:
   1123         break;  // no change.
   1124       }
   1125       ICI->replaceAllUsesWith(LV);
   1126       ICI->eraseFromParent();
   1127     }
   1128     LI->eraseFromParent();
   1129   }
   1130 
   1131   // If the initialization boolean was used, insert it, otherwise delete it.
   1132   if (!InitBoolUsed) {
   1133     while (!InitBool->use_empty())  // Delete initializations
   1134       cast<StoreInst>(InitBool->use_back())->eraseFromParent();
   1135     delete InitBool;
   1136   } else
   1137     GV->getParent()->getGlobalList().insert(GV, InitBool);
   1138 
   1139   // Now the GV is dead, nuke it and the malloc..
   1140   GV->eraseFromParent();
   1141   CI->eraseFromParent();
   1142 
   1143   // To further other optimizations, loop over all users of NewGV and try to
   1144   // constant prop them.  This will promote GEP instructions with constant
   1145   // indices into GEP constant-exprs, which will allow global-opt to hack on it.
   1146   ConstantPropUsersOf(NewGV, TD, TLI);
   1147   if (RepValue != NewGV)
   1148     ConstantPropUsersOf(RepValue, TD, TLI);
   1149 
   1150   return NewGV;
   1151 }
   1152 
   1153 /// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking
   1154 /// to make sure that there are no complex uses of V.  We permit simple things
   1155 /// like dereferencing the pointer, but not storing through the address, unless
   1156 /// it is to the specified global.
   1157 static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
   1158                                                       const GlobalVariable *GV,
   1159                                          SmallPtrSet<const PHINode*, 8> &PHIs) {
   1160   for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
   1161        UI != E; ++UI) {
   1162     const Instruction *Inst = cast<Instruction>(*UI);
   1163 
   1164     if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
   1165       continue; // Fine, ignore.
   1166     }
   1167 
   1168     if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
   1169       if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
   1170         return false;  // Storing the pointer itself... bad.
   1171       continue; // Otherwise, storing through it, or storing into GV... fine.
   1172     }
   1173 
   1174     // Must index into the array and into the struct.
   1175     if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
   1176       if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
   1177         return false;
   1178       continue;
   1179     }
   1180 
   1181     if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
   1182       // PHIs are ok if all uses are ok.  Don't infinitely recurse through PHI
   1183       // cycles.
   1184       if (PHIs.insert(PN))
   1185         if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs))
   1186           return false;
   1187       continue;
   1188     }
   1189 
   1190     if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
   1191       if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
   1192         return false;
   1193       continue;
   1194     }
   1195 
   1196     return false;
   1197   }
   1198   return true;
   1199 }
   1200 
   1201 /// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV
   1202 /// somewhere.  Transform all uses of the allocation into loads from the
   1203 /// global and uses of the resultant pointer.  Further, delete the store into
   1204 /// GV.  This assumes that these value pass the
   1205 /// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
   1206 static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
   1207                                           GlobalVariable *GV) {
   1208   while (!Alloc->use_empty()) {
   1209     Instruction *U = cast<Instruction>(*Alloc->use_begin());
   1210     Instruction *InsertPt = U;
   1211     if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
   1212       // If this is the store of the allocation into the global, remove it.
   1213       if (SI->getOperand(1) == GV) {
   1214         SI->eraseFromParent();
   1215         continue;
   1216       }
   1217     } else if (PHINode *PN = dyn_cast<PHINode>(U)) {
   1218       // Insert the load in the corresponding predecessor, not right before the
   1219       // PHI.
   1220       InsertPt = PN->getIncomingBlock(Alloc->use_begin())->getTerminator();
   1221     } else if (isa<BitCastInst>(U)) {
   1222       // Must be bitcast between the malloc and store to initialize the global.
   1223       ReplaceUsesOfMallocWithGlobal(U, GV);
   1224       U->eraseFromParent();
   1225       continue;
   1226     } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
   1227       // If this is a "GEP bitcast" and the user is a store to the global, then
   1228       // just process it as a bitcast.
   1229       if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse())
   1230         if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->use_back()))
   1231           if (SI->getOperand(1) == GV) {
   1232             // Must be bitcast GEP between the malloc and store to initialize
   1233             // the global.
   1234             ReplaceUsesOfMallocWithGlobal(GEPI, GV);
   1235             GEPI->eraseFromParent();
   1236             continue;
   1237           }
   1238     }
   1239 
   1240     // Insert a load from the global, and use it instead of the malloc.
   1241     Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
   1242     U->replaceUsesOfWith(Alloc, NL);
   1243   }
   1244 }
   1245 
   1246 /// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi
   1247 /// of a load) are simple enough to perform heap SRA on.  This permits GEP's
   1248 /// that index through the array and struct field, icmps of null, and PHIs.
   1249 static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
   1250                         SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs,
   1251                         SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) {
   1252   // We permit two users of the load: setcc comparing against the null
   1253   // pointer, and a getelementptr of a specific form.
   1254   for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
   1255        ++UI) {
   1256     const Instruction *User = cast<Instruction>(*UI);
   1257 
   1258     // Comparison against null is ok.
   1259     if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) {
   1260       if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
   1261         return false;
   1262       continue;
   1263     }
   1264 
   1265     // getelementptr is also ok, but only a simple form.
   1266     if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
   1267       // Must index into the array and into the struct.
   1268       if (GEPI->getNumOperands() < 3)
   1269         return false;
   1270 
   1271       // Otherwise the GEP is ok.
   1272       continue;
   1273     }
   1274 
   1275     if (const PHINode *PN = dyn_cast<PHINode>(User)) {
   1276       if (!LoadUsingPHIsPerLoad.insert(PN))
   1277         // This means some phi nodes are dependent on each other.
   1278         // Avoid infinite looping!
   1279         return false;
   1280       if (!LoadUsingPHIs.insert(PN))
   1281         // If we have already analyzed this PHI, then it is safe.
   1282         continue;
   1283 
   1284       // Make sure all uses of the PHI are simple enough to transform.
   1285       if (!LoadUsesSimpleEnoughForHeapSRA(PN,
   1286                                           LoadUsingPHIs, LoadUsingPHIsPerLoad))
   1287         return false;
   1288 
   1289       continue;
   1290     }
   1291 
   1292     // Otherwise we don't know what this is, not ok.
   1293     return false;
   1294   }
   1295 
   1296   return true;
   1297 }
   1298 
   1299 
   1300 /// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
   1301 /// GV are simple enough to perform HeapSRA, return true.
   1302 static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
   1303                                                     Instruction *StoredVal) {
   1304   SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
   1305   SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad;
   1306   for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
   1307        UI != E; ++UI)
   1308     if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
   1309       if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
   1310                                           LoadUsingPHIsPerLoad))
   1311         return false;
   1312       LoadUsingPHIsPerLoad.clear();
   1313     }
   1314 
   1315   // If we reach here, we know that all uses of the loads and transitive uses
   1316   // (through PHI nodes) are simple enough to transform.  However, we don't know
   1317   // that all inputs the to the PHI nodes are in the same equivalence sets.
   1318   // Check to verify that all operands of the PHIs are either PHIS that can be
   1319   // transformed, loads from GV, or MI itself.
   1320   for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin()
   1321        , E = LoadUsingPHIs.end(); I != E; ++I) {
   1322     const PHINode *PN = *I;
   1323     for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
   1324       Value *InVal = PN->getIncomingValue(op);
   1325 
   1326       // PHI of the stored value itself is ok.
   1327       if (InVal == StoredVal) continue;
   1328 
   1329       if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
   1330         // One of the PHIs in our set is (optimistically) ok.
   1331         if (LoadUsingPHIs.count(InPN))
   1332           continue;
   1333         return false;
   1334       }
   1335 
   1336       // Load from GV is ok.
   1337       if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
   1338         if (LI->getOperand(0) == GV)
   1339           continue;
   1340 
   1341       // UNDEF? NULL?
   1342 
   1343       // Anything else is rejected.
   1344       return false;
   1345     }
   1346   }
   1347 
   1348   return true;
   1349 }
   1350 
   1351 static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
   1352                DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
   1353                    std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
   1354   std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
   1355 
   1356   if (FieldNo >= FieldVals.size())
   1357     FieldVals.resize(FieldNo+1);
   1358 
   1359   // If we already have this value, just reuse the previously scalarized
   1360   // version.
   1361   if (Value *FieldVal = FieldVals[FieldNo])
   1362     return FieldVal;
   1363 
   1364   // Depending on what instruction this is, we have several cases.
   1365   Value *Result;
   1366   if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
   1367     // This is a scalarized version of the load from the global.  Just create
   1368     // a new Load of the scalarized global.
   1369     Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
   1370                                            InsertedScalarizedValues,
   1371                                            PHIsToRewrite),
   1372                           LI->getName()+".f"+Twine(FieldNo), LI);
   1373   } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
   1374     // PN's type is pointer to struct.  Make a new PHI of pointer to struct
   1375     // field.
   1376     StructType *ST =
   1377       cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
   1378 
   1379     PHINode *NewPN =
   1380      PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
   1381                      PN->getNumIncomingValues(),
   1382                      PN->getName()+".f"+Twine(FieldNo), PN);
   1383     Result = NewPN;
   1384     PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
   1385   } else {
   1386     llvm_unreachable("Unknown usable value");
   1387   }
   1388 
   1389   return FieldVals[FieldNo] = Result;
   1390 }
   1391 
   1392 /// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
   1393 /// the load, rewrite the derived value to use the HeapSRoA'd load.
   1394 static void RewriteHeapSROALoadUser(Instruction *LoadUser,
   1395              DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
   1396                    std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
   1397   // If this is a comparison against null, handle it.
   1398   if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
   1399     assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
   1400     // If we have a setcc of the loaded pointer, we can use a setcc of any
   1401     // field.
   1402     Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
   1403                                    InsertedScalarizedValues, PHIsToRewrite);
   1404 
   1405     Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
   1406                               Constant::getNullValue(NPtr->getType()),
   1407                               SCI->getName());
   1408     SCI->replaceAllUsesWith(New);
   1409     SCI->eraseFromParent();
   1410     return;
   1411   }
   1412 
   1413   // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
   1414   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
   1415     assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
   1416            && "Unexpected GEPI!");
   1417 
   1418     // Load the pointer for this field.
   1419     unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
   1420     Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
   1421                                      InsertedScalarizedValues, PHIsToRewrite);
   1422 
   1423     // Create the new GEP idx vector.
   1424     SmallVector<Value*, 8> GEPIdx;
   1425     GEPIdx.push_back(GEPI->getOperand(1));
   1426     GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
   1427 
   1428     Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx,
   1429                                              GEPI->getName(), GEPI);
   1430     GEPI->replaceAllUsesWith(NGEPI);
   1431     GEPI->eraseFromParent();
   1432     return;
   1433   }
   1434 
   1435   // Recursively transform the users of PHI nodes.  This will lazily create the
   1436   // PHIs that are needed for individual elements.  Keep track of what PHIs we
   1437   // see in InsertedScalarizedValues so that we don't get infinite loops (very
   1438   // antisocial).  If the PHI is already in InsertedScalarizedValues, it has
   1439   // already been seen first by another load, so its uses have already been
   1440   // processed.
   1441   PHINode *PN = cast<PHINode>(LoadUser);
   1442   if (!InsertedScalarizedValues.insert(std::make_pair(PN,
   1443                                               std::vector<Value*>())).second)
   1444     return;
   1445 
   1446   // If this is the first time we've seen this PHI, recursively process all
   1447   // users.
   1448   for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) {
   1449     Instruction *User = cast<Instruction>(*UI++);
   1450     RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
   1451   }
   1452 }
   1453 
   1454 /// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global.  Ptr
   1455 /// is a value loaded from the global.  Eliminate all uses of Ptr, making them
   1456 /// use FieldGlobals instead.  All uses of loaded values satisfy
   1457 /// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
   1458 static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
   1459                DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
   1460                    std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
   1461   for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();
   1462        UI != E; ) {
   1463     Instruction *User = cast<Instruction>(*UI++);
   1464     RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
   1465   }
   1466 
   1467   if (Load->use_empty()) {
   1468     Load->eraseFromParent();
   1469     InsertedScalarizedValues.erase(Load);
   1470   }
   1471 }
   1472 
   1473 /// PerformHeapAllocSRoA - CI is an allocation of an array of structures.  Break
   1474 /// it up into multiple allocations of arrays of the fields.
   1475 static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
   1476                                             Value *NElems, DataLayout *TD,
   1477                                             const TargetLibraryInfo *TLI) {
   1478   DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *CI << '\n');
   1479   Type *MAT = getMallocAllocatedType(CI, TLI);
   1480   StructType *STy = cast<StructType>(MAT);
   1481 
   1482   // There is guaranteed to be at least one use of the malloc (storing
   1483   // it into GV).  If there are other uses, change them to be uses of
   1484   // the global to simplify later code.  This also deletes the store
   1485   // into GV.
   1486   ReplaceUsesOfMallocWithGlobal(CI, GV);
   1487 
   1488   // Okay, at this point, there are no users of the malloc.  Insert N
   1489   // new mallocs at the same place as CI, and N globals.
   1490   std::vector<Value*> FieldGlobals;
   1491   std::vector<Value*> FieldMallocs;
   1492 
   1493   for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
   1494     Type *FieldTy = STy->getElementType(FieldNo);
   1495     PointerType *PFieldTy = PointerType::getUnqual(FieldTy);
   1496 
   1497     GlobalVariable *NGV =
   1498       new GlobalVariable(*GV->getParent(),
   1499                          PFieldTy, false, GlobalValue::InternalLinkage,
   1500                          Constant::getNullValue(PFieldTy),
   1501                          GV->getName() + ".f" + Twine(FieldNo), GV,
   1502                          GV->getThreadLocalMode());
   1503     FieldGlobals.push_back(NGV);
   1504 
   1505     unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
   1506     if (StructType *ST = dyn_cast<StructType>(FieldTy))
   1507       TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
   1508     Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
   1509     Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
   1510                                         ConstantInt::get(IntPtrTy, TypeSize),
   1511                                         NElems, 0,
   1512                                         CI->getName() + ".f" + Twine(FieldNo));
   1513     FieldMallocs.push_back(NMI);
   1514     new StoreInst(NMI, NGV, CI);
   1515   }
   1516 
   1517   // The tricky aspect of this transformation is handling the case when malloc
   1518   // fails.  In the original code, malloc failing would set the result pointer
   1519   // of malloc to null.  In this case, some mallocs could succeed and others
   1520   // could fail.  As such, we emit code that looks like this:
   1521   //    F0 = malloc(field0)
   1522   //    F1 = malloc(field1)
   1523   //    F2 = malloc(field2)
   1524   //    if (F0 == 0 || F1 == 0 || F2 == 0) {
   1525   //      if (F0) { free(F0); F0 = 0; }
   1526   //      if (F1) { free(F1); F1 = 0; }
   1527   //      if (F2) { free(F2); F2 = 0; }
   1528   //    }
   1529   // The malloc can also fail if its argument is too large.
   1530   Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0);
   1531   Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0),
   1532                                   ConstantZero, "isneg");
   1533   for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
   1534     Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
   1535                              Constant::getNullValue(FieldMallocs[i]->getType()),
   1536                                "isnull");
   1537     RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
   1538   }
   1539 
   1540   // Split the basic block at the old malloc.
   1541   BasicBlock *OrigBB = CI->getParent();
   1542   BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
   1543 
   1544   // Create the block to check the first condition.  Put all these blocks at the
   1545   // end of the function as they are unlikely to be executed.
   1546   BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
   1547                                                 "malloc_ret_null",
   1548                                                 OrigBB->getParent());
   1549 
   1550   // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
   1551   // branch on RunningOr.
   1552   OrigBB->getTerminator()->eraseFromParent();
   1553   BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
   1554 
   1555   // Within the NullPtrBlock, we need to emit a comparison and branch for each
   1556   // pointer, because some may be null while others are not.
   1557   for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
   1558     Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
   1559     Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
   1560                               Constant::getNullValue(GVVal->getType()));
   1561     BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
   1562                                                OrigBB->getParent());
   1563     BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
   1564                                                OrigBB->getParent());
   1565     Instruction *BI = BranchInst::Create(FreeBlock, NextBlock,
   1566                                          Cmp, NullPtrBlock);
   1567 
   1568     // Fill in FreeBlock.
   1569     CallInst::CreateFree(GVVal, BI);
   1570     new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
   1571                   FreeBlock);
   1572     BranchInst::Create(NextBlock, FreeBlock);
   1573 
   1574     NullPtrBlock = NextBlock;
   1575   }
   1576 
   1577   BranchInst::Create(ContBB, NullPtrBlock);
   1578 
   1579   // CI is no longer needed, remove it.
   1580   CI->eraseFromParent();
   1581 
   1582   /// InsertedScalarizedLoads - As we process loads, if we can't immediately
   1583   /// update all uses of the load, keep track of what scalarized loads are
   1584   /// inserted for a given load.
   1585   DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
   1586   InsertedScalarizedValues[GV] = FieldGlobals;
   1587 
   1588   std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
   1589 
   1590   // Okay, the malloc site is completely handled.  All of the uses of GV are now
   1591   // loads, and all uses of those loads are simple.  Rewrite them to use loads
   1592   // of the per-field globals instead.
   1593   for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) {
   1594     Instruction *User = cast<Instruction>(*UI++);
   1595 
   1596     if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
   1597       RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
   1598       continue;
   1599     }
   1600 
   1601     // Must be a store of null.
   1602     StoreInst *SI = cast<StoreInst>(User);
   1603     assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
   1604            "Unexpected heap-sra user!");
   1605 
   1606     // Insert a store of null into each global.
   1607     for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
   1608       PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
   1609       Constant *Null = Constant::getNullValue(PT->getElementType());
   1610       new StoreInst(Null, FieldGlobals[i], SI);
   1611     }
   1612     // Erase the original store.
   1613     SI->eraseFromParent();
   1614   }
   1615 
   1616   // While we have PHIs that are interesting to rewrite, do it.
   1617   while (!PHIsToRewrite.empty()) {
   1618     PHINode *PN = PHIsToRewrite.back().first;
   1619     unsigned FieldNo = PHIsToRewrite.back().second;
   1620     PHIsToRewrite.pop_back();
   1621     PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
   1622     assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
   1623 
   1624     // Add all the incoming values.  This can materialize more phis.
   1625     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
   1626       Value *InVal = PN->getIncomingValue(i);
   1627       InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
   1628                                PHIsToRewrite);
   1629       FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
   1630     }
   1631   }
   1632 
   1633   // Drop all inter-phi links and any loads that made it this far.
   1634   for (DenseMap<Value*, std::vector<Value*> >::iterator
   1635        I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
   1636        I != E; ++I) {
   1637     if (PHINode *PN = dyn_cast<PHINode>(I->first))
   1638       PN->dropAllReferences();
   1639     else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
   1640       LI->dropAllReferences();
   1641   }
   1642 
   1643   // Delete all the phis and loads now that inter-references are dead.
   1644   for (DenseMap<Value*, std::vector<Value*> >::iterator
   1645        I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
   1646        I != E; ++I) {
   1647     if (PHINode *PN = dyn_cast<PHINode>(I->first))
   1648       PN->eraseFromParent();
   1649     else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
   1650       LI->eraseFromParent();
   1651   }
   1652 
   1653   // The old global is now dead, remove it.
   1654   GV->eraseFromParent();
   1655 
   1656   ++NumHeapSRA;
   1657   return cast<GlobalVariable>(FieldGlobals[0]);
   1658 }
   1659 
   1660 /// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
   1661 /// pointer global variable with a single value stored it that is a malloc or
   1662 /// cast of malloc.
   1663 static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
   1664                                                CallInst *CI,
   1665                                                Type *AllocTy,
   1666                                                AtomicOrdering Ordering,
   1667                                                Module::global_iterator &GVI,
   1668                                                DataLayout *TD,
   1669                                                TargetLibraryInfo *TLI) {
   1670   if (!TD)
   1671     return false;
   1672 
   1673   // If this is a malloc of an abstract type, don't touch it.
   1674   if (!AllocTy->isSized())
   1675     return false;
   1676 
   1677   // We can't optimize this global unless all uses of it are *known* to be
   1678   // of the malloc value, not of the null initializer value (consider a use
   1679   // that compares the global's value against zero to see if the malloc has
   1680   // been reached).  To do this, we check to see if all uses of the global
   1681   // would trap if the global were null: this proves that they must all
   1682   // happen after the malloc.
   1683   if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
   1684     return false;
   1685 
   1686   // We can't optimize this if the malloc itself is used in a complex way,
   1687   // for example, being stored into multiple globals.  This allows the
   1688   // malloc to be stored into the specified global, loaded icmp'd, and
   1689   // GEP'd.  These are all things we could transform to using the global
   1690   // for.
   1691   SmallPtrSet<const PHINode*, 8> PHIs;
   1692   if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
   1693     return false;
   1694 
   1695   // If we have a global that is only initialized with a fixed size malloc,
   1696   // transform the program to use global memory instead of malloc'd memory.
   1697   // This eliminates dynamic allocation, avoids an indirection accessing the
   1698   // data, and exposes the resultant global to further GlobalOpt.
   1699   // We cannot optimize the malloc if we cannot determine malloc array size.
   1700   Value *NElems = getMallocArraySize(CI, TD, TLI, true);
   1701   if (!NElems)
   1702     return false;
   1703 
   1704   if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
   1705     // Restrict this transformation to only working on small allocations
   1706     // (2048 bytes currently), as we don't want to introduce a 16M global or
   1707     // something.
   1708     if (NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
   1709       GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD, TLI);
   1710       return true;
   1711     }
   1712 
   1713   // If the allocation is an array of structures, consider transforming this
   1714   // into multiple malloc'd arrays, one for each field.  This is basically
   1715   // SRoA for malloc'd memory.
   1716 
   1717   if (Ordering != NotAtomic)
   1718     return false;
   1719 
   1720   // If this is an allocation of a fixed size array of structs, analyze as a
   1721   // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
   1722   if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
   1723     if (ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
   1724       AllocTy = AT->getElementType();
   1725 
   1726   StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
   1727   if (!AllocSTy)
   1728     return false;
   1729 
   1730   // This the structure has an unreasonable number of fields, leave it
   1731   // alone.
   1732   if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
   1733       AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
   1734 
   1735     // If this is a fixed size array, transform the Malloc to be an alloc of
   1736     // structs.  malloc [100 x struct],1 -> malloc struct, 100
   1737     if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
   1738       Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
   1739       unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
   1740       Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
   1741       Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
   1742       Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
   1743                                                    AllocSize, NumElements,
   1744                                                    0, CI->getName());
   1745       Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
   1746       CI->replaceAllUsesWith(Cast);
   1747       CI->eraseFromParent();
   1748       if (BitCastInst *BCI = dyn_cast<BitCastInst>(Malloc))
   1749         CI = cast<CallInst>(BCI->getOperand(0));
   1750       else
   1751         CI = cast<CallInst>(Malloc);
   1752     }
   1753 
   1754     GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, TLI, true),
   1755                                TD, TLI);
   1756     return true;
   1757   }
   1758 
   1759   return false;
   1760 }
   1761 
   1762 // OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
   1763 // that only one value (besides its initializer) is ever stored to the global.
   1764 static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
   1765                                      AtomicOrdering Ordering,
   1766                                      Module::global_iterator &GVI,
   1767                                      DataLayout *TD, TargetLibraryInfo *TLI) {
   1768   // Ignore no-op GEPs and bitcasts.
   1769   StoredOnceVal = StoredOnceVal->stripPointerCasts();
   1770 
   1771   // If we are dealing with a pointer global that is initialized to null and
   1772   // only has one (non-null) value stored into it, then we can optimize any
   1773   // users of the loaded value (often calls and loads) that would trap if the
   1774   // value was null.
   1775   if (GV->getInitializer()->getType()->isPointerTy() &&
   1776       GV->getInitializer()->isNullValue()) {
   1777     if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
   1778       if (GV->getInitializer()->getType() != SOVC->getType())
   1779         SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
   1780 
   1781       // Optimize away any trapping uses of the loaded value.
   1782       if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, TD, TLI))
   1783         return true;
   1784     } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) {
   1785       Type *MallocType = getMallocAllocatedType(CI, TLI);
   1786       if (MallocType &&
   1787           TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, GVI,
   1788                                              TD, TLI))
   1789         return true;
   1790     }
   1791   }
   1792 
   1793   return false;
   1794 }
   1795 
   1796 /// TryToShrinkGlobalToBoolean - At this point, we have learned that the only
   1797 /// two values ever stored into GV are its initializer and OtherVal.  See if we
   1798 /// can shrink the global into a boolean and select between the two values
   1799 /// whenever it is used.  This exposes the values to other scalar optimizations.
   1800 static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
   1801   Type *GVElType = GV->getType()->getElementType();
   1802 
   1803   // If GVElType is already i1, it is already shrunk.  If the type of the GV is
   1804   // an FP value, pointer or vector, don't do this optimization because a select
   1805   // between them is very expensive and unlikely to lead to later
   1806   // simplification.  In these cases, we typically end up with "cond ? v1 : v2"
   1807   // where v1 and v2 both require constant pool loads, a big loss.
   1808   if (GVElType == Type::getInt1Ty(GV->getContext()) ||
   1809       GVElType->isFloatingPointTy() ||
   1810       GVElType->isPointerTy() || GVElType->isVectorTy())
   1811     return false;
   1812 
   1813   // Walk the use list of the global seeing if all the uses are load or store.
   1814   // If there is anything else, bail out.
   1815   for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){
   1816     User *U = *I;
   1817     if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
   1818       return false;
   1819   }
   1820 
   1821   DEBUG(dbgs() << "   *** SHRINKING TO BOOL: " << *GV);
   1822 
   1823   // Create the new global, initializing it to false.
   1824   GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
   1825                                              false,
   1826                                              GlobalValue::InternalLinkage,
   1827                                         ConstantInt::getFalse(GV->getContext()),
   1828                                              GV->getName()+".b",
   1829                                              GV->getThreadLocalMode(),
   1830                                              GV->getType()->getAddressSpace());
   1831   GV->getParent()->getGlobalList().insert(GV, NewGV);
   1832 
   1833   Constant *InitVal = GV->getInitializer();
   1834   assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
   1835          "No reason to shrink to bool!");
   1836 
   1837   // If initialized to zero and storing one into the global, we can use a cast
   1838   // instead of a select to synthesize the desired value.
   1839   bool IsOneZero = false;
   1840   if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal))
   1841     IsOneZero = InitVal->isNullValue() && CI->isOne();
   1842 
   1843   while (!GV->use_empty()) {
   1844     Instruction *UI = cast<Instruction>(GV->use_back());
   1845     if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
   1846       // Change the store into a boolean store.
   1847       bool StoringOther = SI->getOperand(0) == OtherVal;
   1848       // Only do this if we weren't storing a loaded value.
   1849       Value *StoreVal;
   1850       if (StoringOther || SI->getOperand(0) == InitVal) {
   1851         StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
   1852                                     StoringOther);
   1853       } else {
   1854         // Otherwise, we are storing a previously loaded copy.  To do this,
   1855         // change the copy from copying the original value to just copying the
   1856         // bool.
   1857         Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
   1858 
   1859         // If we've already replaced the input, StoredVal will be a cast or
   1860         // select instruction.  If not, it will be a load of the original
   1861         // global.
   1862         if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
   1863           assert(LI->getOperand(0) == GV && "Not a copy!");
   1864           // Insert a new load, to preserve the saved value.
   1865           StoreVal = new LoadInst(NewGV, LI->getName()+".b", false, 0,
   1866                                   LI->getOrdering(), LI->getSynchScope(), LI);
   1867         } else {
   1868           assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
   1869                  "This is not a form that we understand!");
   1870           StoreVal = StoredVal->getOperand(0);
   1871           assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
   1872         }
   1873       }
   1874       new StoreInst(StoreVal, NewGV, false, 0,
   1875                     SI->getOrdering(), SI->getSynchScope(), SI);
   1876     } else {
   1877       // Change the load into a load of bool then a select.
   1878       LoadInst *LI = cast<LoadInst>(UI);
   1879       LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0,
   1880                                    LI->getOrdering(), LI->getSynchScope(), LI);
   1881       Value *NSI;
   1882       if (IsOneZero)
   1883         NSI = new ZExtInst(NLI, LI->getType(), "", LI);
   1884       else
   1885         NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
   1886       NSI->takeName(LI);
   1887       LI->replaceAllUsesWith(NSI);
   1888     }
   1889     UI->eraseFromParent();
   1890   }
   1891 
   1892   // Retain the name of the old global variable. People who are debugging their
   1893   // programs may expect these variables to be named the same.
   1894   NewGV->takeName(GV);
   1895   GV->eraseFromParent();
   1896   return true;
   1897 }
   1898 
   1899 
   1900 /// ProcessGlobal - Analyze the specified global variable and optimize it if
   1901 /// possible.  If we make a change, return true.
   1902 bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
   1903                               Module::global_iterator &GVI) {
   1904   if (!GV->isDiscardableIfUnused())
   1905     return false;
   1906 
   1907   // Do more involved optimizations if the global is internal.
   1908   GV->removeDeadConstantUsers();
   1909 
   1910   if (GV->use_empty()) {
   1911     DEBUG(dbgs() << "GLOBAL DEAD: " << *GV);
   1912     GV->eraseFromParent();
   1913     ++NumDeleted;
   1914     return true;
   1915   }
   1916 
   1917   if (!GV->hasLocalLinkage())
   1918     return false;
   1919 
   1920   SmallPtrSet<const PHINode*, 16> PHIUsers;
   1921   GlobalStatus GS;
   1922 
   1923   if (AnalyzeGlobal(GV, GS, PHIUsers))
   1924     return false;
   1925 
   1926   if (!GS.isCompared && !GV->hasUnnamedAddr()) {
   1927     GV->setUnnamedAddr(true);
   1928     NumUnnamed++;
   1929   }
   1930 
   1931   if (GV->isConstant() || !GV->hasInitializer())
   1932     return false;
   1933 
   1934   return ProcessInternalGlobal(GV, GVI, PHIUsers, GS);
   1935 }
   1936 
   1937 /// ProcessInternalGlobal - Analyze the specified global variable and optimize
   1938 /// it if possible.  If we make a change, return true.
   1939 bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
   1940                                       Module::global_iterator &GVI,
   1941                                 const SmallPtrSet<const PHINode*, 16> &PHIUsers,
   1942                                       const GlobalStatus &GS) {
   1943   // If this is a first class global and has only one accessing function
   1944   // and this function is main (which we know is not recursive), we replace
   1945   // the global with a local alloca in this function.
   1946   //
   1947   // NOTE: It doesn't make sense to promote non single-value types since we
   1948   // are just replacing static memory to stack memory.
   1949   //
   1950   // If the global is in different address space, don't bring it to stack.
   1951   if (!GS.HasMultipleAccessingFunctions &&
   1952       GS.AccessingFunction && !GS.HasNonInstructionUser &&
   1953       GV->getType()->getElementType()->isSingleValueType() &&
   1954       GS.AccessingFunction->getName() == "main" &&
   1955       GS.AccessingFunction->hasExternalLinkage() &&
   1956       GV->getType()->getAddressSpace() == 0) {
   1957     DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
   1958     Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
   1959                                                    ->getEntryBlock().begin());
   1960     Type *ElemTy = GV->getType()->getElementType();
   1961     // FIXME: Pass Global's alignment when globals have alignment
   1962     AllocaInst *Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
   1963     if (!isa<UndefValue>(GV->getInitializer()))
   1964       new StoreInst(GV->getInitializer(), Alloca, &FirstI);
   1965 
   1966     GV->replaceAllUsesWith(Alloca);
   1967     GV->eraseFromParent();
   1968     ++NumLocalized;
   1969     return true;
   1970   }
   1971 
   1972   // If the global is never loaded (but may be stored to), it is dead.
   1973   // Delete it now.
   1974   if (!GS.isLoaded) {
   1975     DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
   1976 
   1977     bool Changed;
   1978     if (isLeakCheckerRoot(GV)) {
   1979       // Delete any constant stores to the global.
   1980       Changed = CleanupPointerRootUsers(GV, TLI);
   1981     } else {
   1982       // Delete any stores we can find to the global.  We may not be able to
   1983       // make it completely dead though.
   1984       Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI);
   1985     }
   1986 
   1987     // If the global is dead now, delete it.
   1988     if (GV->use_empty()) {
   1989       GV->eraseFromParent();
   1990       ++NumDeleted;
   1991       Changed = true;
   1992     }
   1993     return Changed;
   1994 
   1995   } else if (GS.StoredType <= GlobalStatus::isInitializerStored) {
   1996     DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
   1997     GV->setConstant(true);
   1998 
   1999     // Clean up any obviously simplifiable users now.
   2000     CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI);
   2001 
   2002     // If the global is dead now, just nuke it.
   2003     if (GV->use_empty()) {
   2004       DEBUG(dbgs() << "   *** Marking constant allowed us to simplify "
   2005             << "all users and delete global!\n");
   2006       GV->eraseFromParent();
   2007       ++NumDeleted;
   2008     }
   2009 
   2010     ++NumMarked;
   2011     return true;
   2012   } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
   2013     if (DataLayout *TD = getAnalysisIfAvailable<DataLayout>())
   2014       if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) {
   2015         GVI = FirstNewGV;  // Don't skip the newly produced globals!
   2016         return true;
   2017       }
   2018   } else if (GS.StoredType == GlobalStatus::isStoredOnce) {
   2019     // If the initial value for the global was an undef value, and if only
   2020     // one other value was stored into it, we can just change the
   2021     // initializer to be the stored value, then delete all stores to the
   2022     // global.  This allows us to mark it constant.
   2023     if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
   2024       if (isa<UndefValue>(GV->getInitializer())) {
   2025         // Change the initial value here.
   2026         GV->setInitializer(SOVConstant);
   2027 
   2028         // Clean up any obviously simplifiable users now.
   2029         CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI);
   2030 
   2031         if (GV->use_empty()) {
   2032           DEBUG(dbgs() << "   *** Substituting initializer allowed us to "
   2033                        << "simplify all users and delete global!\n");
   2034           GV->eraseFromParent();
   2035           ++NumDeleted;
   2036         } else {
   2037           GVI = GV;
   2038         }
   2039         ++NumSubstitute;
   2040         return true;
   2041       }
   2042 
   2043     // Try to optimize globals based on the knowledge that only one value
   2044     // (besides its initializer) is ever stored to the global.
   2045     if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, GVI,
   2046                                  TD, TLI))
   2047       return true;
   2048 
   2049     // Otherwise, if the global was not a boolean, we can shrink it to be a
   2050     // boolean.
   2051     if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
   2052       if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
   2053         ++NumShrunkToBool;
   2054         return true;
   2055       }
   2056   }
   2057 
   2058   return false;
   2059 }
   2060 
   2061 /// ChangeCalleesToFastCall - Walk all of the direct calls of the specified
   2062 /// function, changing them to FastCC.
   2063 static void ChangeCalleesToFastCall(Function *F) {
   2064   for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
   2065     if (isa<BlockAddress>(*UI))
   2066       continue;
   2067     CallSite User(cast<Instruction>(*UI));
   2068     User.setCallingConv(CallingConv::Fast);
   2069   }
   2070 }
   2071 
   2072 static AttributeSet StripNest(LLVMContext &C, const AttributeSet &Attrs) {
   2073   for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
   2074     unsigned Index = Attrs.getSlotIndex(i);
   2075     if (!Attrs.getSlotAttributes(i).hasAttribute(Index, Attribute::Nest))
   2076       continue;
   2077 
   2078     // There can be only one.
   2079     return Attrs.removeAttribute(C, Index, Attribute::Nest);
   2080   }
   2081 
   2082   return Attrs;
   2083 }
   2084 
   2085 static void RemoveNestAttribute(Function *F) {
   2086   F->setAttributes(StripNest(F->getContext(), F->getAttributes()));
   2087   for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
   2088     if (isa<BlockAddress>(*UI))
   2089       continue;
   2090     CallSite User(cast<Instruction>(*UI));
   2091     User.setAttributes(StripNest(F->getContext(), User.getAttributes()));
   2092   }
   2093 }
   2094 
   2095 bool GlobalOpt::OptimizeFunctions(Module &M) {
   2096   bool Changed = false;
   2097   // Optimize functions.
   2098   for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
   2099     Function *F = FI++;
   2100     // Functions without names cannot be referenced outside this module.
   2101     if (!F->hasName() && !F->isDeclaration())
   2102       F->setLinkage(GlobalValue::InternalLinkage);
   2103     F->removeDeadConstantUsers();
   2104     if (F->isDefTriviallyDead()) {
   2105       F->eraseFromParent();
   2106       Changed = true;
   2107       ++NumFnDeleted;
   2108     } else if (F->hasLocalLinkage()) {
   2109       if (F->getCallingConv() == CallingConv::C && !F->isVarArg() &&
   2110           !F->hasAddressTaken()) {
   2111         // If this function has C calling conventions, is not a varargs
   2112         // function, and is only called directly, promote it to use the Fast
   2113         // calling convention.
   2114         F->setCallingConv(CallingConv::Fast);
   2115         ChangeCalleesToFastCall(F);
   2116         ++NumFastCallFns;
   2117         Changed = true;
   2118       }
   2119 
   2120       if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
   2121           !F->hasAddressTaken()) {
   2122         // The function is not used by a trampoline intrinsic, so it is safe
   2123         // to remove the 'nest' attribute.
   2124         RemoveNestAttribute(F);
   2125         ++NumNestRemoved;
   2126         Changed = true;
   2127       }
   2128     }
   2129   }
   2130   return Changed;
   2131 }
   2132 
   2133 bool GlobalOpt::OptimizeGlobalVars(Module &M) {
   2134   bool Changed = false;
   2135   for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
   2136        GVI != E; ) {
   2137     GlobalVariable *GV = GVI++;
   2138     // Global variables without names cannot be referenced outside this module.
   2139     if (!GV->hasName() && !GV->isDeclaration())
   2140       GV->setLinkage(GlobalValue::InternalLinkage);
   2141     // Simplify the initializer.
   2142     if (GV->hasInitializer())
   2143       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
   2144         Constant *New = ConstantFoldConstantExpression(CE, TD, TLI);
   2145         if (New && New != CE)
   2146           GV->setInitializer(New);
   2147       }
   2148 
   2149     Changed |= ProcessGlobal(GV, GVI);
   2150   }
   2151   return Changed;
   2152 }
   2153 
   2154 /// FindGlobalCtors - Find the llvm.global_ctors list, verifying that all
   2155 /// initializers have an init priority of 65535.
   2156 GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
   2157   GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
   2158   if (GV == 0) return 0;
   2159 
   2160   // Verify that the initializer is simple enough for us to handle. We are
   2161   // only allowed to optimize the initializer if it is unique.
   2162   if (!GV->hasUniqueInitializer()) return 0;
   2163 
   2164   if (isa<ConstantAggregateZero>(GV->getInitializer()))
   2165     return GV;
   2166   ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
   2167 
   2168   for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
   2169     if (isa<ConstantAggregateZero>(*i))
   2170       continue;
   2171     ConstantStruct *CS = cast<ConstantStruct>(*i);
   2172     if (isa<ConstantPointerNull>(CS->getOperand(1)))
   2173       continue;
   2174 
   2175     // Must have a function or null ptr.
   2176     if (!isa<Function>(CS->getOperand(1)))
   2177       return 0;
   2178 
   2179     // Init priority must be standard.
   2180     ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0));
   2181     if (CI->getZExtValue() != 65535)
   2182       return 0;
   2183   }
   2184 
   2185   return GV;
   2186 }
   2187 
   2188 /// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand,
   2189 /// return a list of the functions and null terminator as a vector.
   2190 static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
   2191   if (GV->getInitializer()->isNullValue())
   2192     return std::vector<Function*>();
   2193   ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
   2194   std::vector<Function*> Result;
   2195   Result.reserve(CA->getNumOperands());
   2196   for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
   2197     ConstantStruct *CS = cast<ConstantStruct>(*i);
   2198     Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
   2199   }
   2200   return Result;
   2201 }
   2202 
   2203 /// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the
   2204 /// specified array, returning the new global to use.
   2205 static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
   2206                                           const std::vector<Function*> &Ctors) {
   2207   // If we made a change, reassemble the initializer list.
   2208   Constant *CSVals[2];
   2209   CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535);
   2210   CSVals[1] = 0;
   2211 
   2212   StructType *StructTy =
   2213     cast <StructType>(
   2214     cast<ArrayType>(GCL->getType()->getElementType())->getElementType());
   2215 
   2216   // Create the new init list.
   2217   std::vector<Constant*> CAList;
   2218   for (unsigned i = 0, e = Ctors.size(); i != e; ++i) {
   2219     if (Ctors[i]) {
   2220       CSVals[1] = Ctors[i];
   2221     } else {
   2222       Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()),
   2223                                           false);
   2224       PointerType *PFTy = PointerType::getUnqual(FTy);
   2225       CSVals[1] = Constant::getNullValue(PFTy);
   2226       CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()),
   2227                                    0x7fffffff);
   2228     }
   2229     CAList.push_back(ConstantStruct::get(StructTy, CSVals));
   2230   }
   2231 
   2232   // Create the array initializer.
   2233   Constant *CA = ConstantArray::get(ArrayType::get(StructTy,
   2234                                                    CAList.size()), CAList);
   2235 
   2236   // If we didn't change the number of elements, don't create a new GV.
   2237   if (CA->getType() == GCL->getInitializer()->getType()) {
   2238     GCL->setInitializer(CA);
   2239     return GCL;
   2240   }
   2241 
   2242   // Create the new global and insert it next to the existing list.
   2243   GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(),
   2244                                            GCL->getLinkage(), CA, "",
   2245                                            GCL->getThreadLocalMode());
   2246   GCL->getParent()->getGlobalList().insert(GCL, NGV);
   2247   NGV->takeName(GCL);
   2248 
   2249   // Nuke the old list, replacing any uses with the new one.
   2250   if (!GCL->use_empty()) {
   2251     Constant *V = NGV;
   2252     if (V->getType() != GCL->getType())
   2253       V = ConstantExpr::getBitCast(V, GCL->getType());
   2254     GCL->replaceAllUsesWith(V);
   2255   }
   2256   GCL->eraseFromParent();
   2257 
   2258   if (Ctors.size())
   2259     return NGV;
   2260   else
   2261     return 0;
   2262 }
   2263 
   2264 
   2265 static inline bool
   2266 isSimpleEnoughValueToCommit(Constant *C,
   2267                             SmallPtrSet<Constant*, 8> &SimpleConstants,
   2268                             const DataLayout *TD);
   2269 
   2270 
   2271 /// isSimpleEnoughValueToCommit - Return true if the specified constant can be
   2272 /// handled by the code generator.  We don't want to generate something like:
   2273 ///   void *X = &X/42;
   2274 /// because the code generator doesn't have a relocation that can handle that.
   2275 ///
   2276 /// This function should be called if C was not found (but just got inserted)
   2277 /// in SimpleConstants to avoid having to rescan the same constants all the
   2278 /// time.
   2279 static bool isSimpleEnoughValueToCommitHelper(Constant *C,
   2280                                    SmallPtrSet<Constant*, 8> &SimpleConstants,
   2281                                    const DataLayout *TD) {
   2282   // Simple integer, undef, constant aggregate zero, global addresses, etc are
   2283   // all supported.
   2284   if (C->getNumOperands() == 0 || isa<BlockAddress>(C) ||
   2285       isa<GlobalValue>(C))
   2286     return true;
   2287 
   2288   // Aggregate values are safe if all their elements are.
   2289   if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
   2290       isa<ConstantVector>(C)) {
   2291     for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
   2292       Constant *Op = cast<Constant>(C->getOperand(i));
   2293       if (!isSimpleEnoughValueToCommit(Op, SimpleConstants, TD))
   2294         return false;
   2295     }
   2296     return true;
   2297   }
   2298 
   2299   // We don't know exactly what relocations are allowed in constant expressions,
   2300   // so we allow &global+constantoffset, which is safe and uniformly supported
   2301   // across targets.
   2302   ConstantExpr *CE = cast<ConstantExpr>(C);
   2303   switch (CE->getOpcode()) {
   2304   case Instruction::BitCast:
   2305     // Bitcast is fine if the casted value is fine.
   2306     return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
   2307 
   2308   case Instruction::IntToPtr:
   2309   case Instruction::PtrToInt:
   2310     // int <=> ptr is fine if the int type is the same size as the
   2311     // pointer type.
   2312     if (!TD || TD->getTypeSizeInBits(CE->getType()) !=
   2313                TD->getTypeSizeInBits(CE->getOperand(0)->getType()))
   2314       return false;
   2315     return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
   2316 
   2317   // GEP is fine if it is simple + constant offset.
   2318   case Instruction::GetElementPtr:
   2319     for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
   2320       if (!isa<ConstantInt>(CE->getOperand(i)))
   2321         return false;
   2322     return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
   2323 
   2324   case Instruction::Add:
   2325     // We allow simple+cst.
   2326     if (!isa<ConstantInt>(CE->getOperand(1)))
   2327       return false;
   2328     return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
   2329   }
   2330   return false;
   2331 }
   2332 
   2333 static inline bool
   2334 isSimpleEnoughValueToCommit(Constant *C,
   2335                             SmallPtrSet<Constant*, 8> &SimpleConstants,
   2336                             const DataLayout *TD) {
   2337   // If we already checked this constant, we win.
   2338   if (!SimpleConstants.insert(C)) return true;
   2339   // Check the constant.
   2340   return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, TD);
   2341 }
   2342 
   2343 
   2344 /// isSimpleEnoughPointerToCommit - Return true if this constant is simple
   2345 /// enough for us to understand.  In particular, if it is a cast to anything
   2346 /// other than from one pointer type to another pointer type, we punt.
   2347 /// We basically just support direct accesses to globals and GEP's of
   2348 /// globals.  This should be kept up to date with CommitValueTo.
   2349 static bool isSimpleEnoughPointerToCommit(Constant *C) {
   2350   // Conservatively, avoid aggregate types. This is because we don't
   2351   // want to worry about them partially overlapping other stores.
   2352   if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
   2353     return false;
   2354 
   2355   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
   2356     // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
   2357     // external globals.
   2358     return GV->hasUniqueInitializer();
   2359 
   2360   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
   2361     // Handle a constantexpr gep.
   2362     if (CE->getOpcode() == Instruction::GetElementPtr &&
   2363         isa<GlobalVariable>(CE->getOperand(0)) &&
   2364         cast<GEPOperator>(CE)->isInBounds()) {
   2365       GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
   2366       // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
   2367       // external globals.
   2368       if (!GV->hasUniqueInitializer())
   2369         return false;
   2370 
   2371       // The first index must be zero.
   2372       ConstantInt *CI = dyn_cast<ConstantInt>(*llvm::next(CE->op_begin()));
   2373       if (!CI || !CI->isZero()) return false;
   2374 
   2375       // The remaining indices must be compile-time known integers within the
   2376       // notional bounds of the corresponding static array types.
   2377       if (!CE->isGEPWithNoNotionalOverIndexing())
   2378         return false;
   2379 
   2380       return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
   2381 
   2382     // A constantexpr bitcast from a pointer to another pointer is a no-op,
   2383     // and we know how to evaluate it by moving the bitcast from the pointer
   2384     // operand to the value operand.
   2385     } else if (CE->getOpcode() == Instruction::BitCast &&
   2386                isa<GlobalVariable>(CE->getOperand(0))) {
   2387       // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
   2388       // external globals.
   2389       return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer();
   2390     }
   2391   }
   2392 
   2393   return false;
   2394 }
   2395 
   2396 /// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global
   2397 /// initializer.  This returns 'Init' modified to reflect 'Val' stored into it.
   2398 /// At this point, the GEP operands of Addr [0, OpNo) have been stepped into.
   2399 static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
   2400                                    ConstantExpr *Addr, unsigned OpNo) {
   2401   // Base case of the recursion.
   2402   if (OpNo == Addr->getNumOperands()) {
   2403     assert(Val->getType() == Init->getType() && "Type mismatch!");
   2404     return Val;
   2405   }
   2406 
   2407   SmallVector<Constant*, 32> Elts;
   2408   if (StructType *STy = dyn_cast<StructType>(Init->getType())) {
   2409     // Break up the constant into its elements.
   2410     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
   2411       Elts.push_back(Init->getAggregateElement(i));
   2412 
   2413     // Replace the element that we are supposed to.
   2414     ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
   2415     unsigned Idx = CU->getZExtValue();
   2416     assert(Idx < STy->getNumElements() && "Struct index out of range!");
   2417     Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
   2418 
   2419     // Return the modified struct.
   2420     return ConstantStruct::get(STy, Elts);
   2421   }
   2422 
   2423   ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
   2424   SequentialType *InitTy = cast<SequentialType>(Init->getType());
   2425 
   2426   uint64_t NumElts;
   2427   if (ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
   2428     NumElts = ATy->getNumElements();
   2429   else
   2430     NumElts = InitTy->getVectorNumElements();
   2431 
   2432   // Break up the array into elements.
   2433   for (uint64_t i = 0, e = NumElts; i != e; ++i)
   2434     Elts.push_back(Init->getAggregateElement(i));
   2435 
   2436   assert(CI->getZExtValue() < NumElts);
   2437   Elts[CI->getZExtValue()] =
   2438     EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
   2439 
   2440   if (Init->getType()->isArrayTy())
   2441     return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
   2442   return ConstantVector::get(Elts);
   2443 }
   2444 
   2445 /// CommitValueTo - We have decided that Addr (which satisfies the predicate
   2446 /// isSimpleEnoughPointerToCommit) should get Val as its value.  Make it happen.
   2447 static void CommitValueTo(Constant *Val, Constant *Addr) {
   2448   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
   2449     assert(GV->hasInitializer());
   2450     GV->setInitializer(Val);
   2451     return;
   2452   }
   2453 
   2454   ConstantExpr *CE = cast<ConstantExpr>(Addr);
   2455   GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
   2456   GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2));
   2457 }
   2458 
   2459 namespace {
   2460 
   2461 /// Evaluator - This class evaluates LLVM IR, producing the Constant
   2462 /// representing each SSA instruction.  Changes to global variables are stored
   2463 /// in a mapping that can be iterated over after the evaluation is complete.
   2464 /// Once an evaluation call fails, the evaluation object should not be reused.
   2465 class Evaluator {
   2466 public:
   2467   Evaluator(const DataLayout *TD, const TargetLibraryInfo *TLI)
   2468     : TD(TD), TLI(TLI) {
   2469     ValueStack.push_back(new DenseMap<Value*, Constant*>);
   2470   }
   2471 
   2472   ~Evaluator() {
   2473     DeleteContainerPointers(ValueStack);
   2474     while (!AllocaTmps.empty()) {
   2475       GlobalVariable *Tmp = AllocaTmps.back();
   2476       AllocaTmps.pop_back();
   2477 
   2478       // If there are still users of the alloca, the program is doing something
   2479       // silly, e.g. storing the address of the alloca somewhere and using it
   2480       // later.  Since this is undefined, we'll just make it be null.
   2481       if (!Tmp->use_empty())
   2482         Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
   2483       delete Tmp;
   2484     }
   2485   }
   2486 
   2487   /// EvaluateFunction - Evaluate a call to function F, returning true if
   2488   /// successful, false if we can't evaluate it.  ActualArgs contains the formal
   2489   /// arguments for the function.
   2490   bool EvaluateFunction(Function *F, Constant *&RetVal,
   2491                         const SmallVectorImpl<Constant*> &ActualArgs);
   2492 
   2493   /// EvaluateBlock - Evaluate all instructions in block BB, returning true if
   2494   /// successful, false if we can't evaluate it.  NewBB returns the next BB that
   2495   /// control flows into, or null upon return.
   2496   bool EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB);
   2497 
   2498   Constant *getVal(Value *V) {
   2499     if (Constant *CV = dyn_cast<Constant>(V)) return CV;
   2500     Constant *R = ValueStack.back()->lookup(V);
   2501     assert(R && "Reference to an uncomputed value!");
   2502     return R;
   2503   }
   2504 
   2505   void setVal(Value *V, Constant *C) {
   2506     ValueStack.back()->operator[](V) = C;
   2507   }
   2508 
   2509   const DenseMap<Constant*, Constant*> &getMutatedMemory() const {
   2510     return MutatedMemory;
   2511   }
   2512 
   2513   const SmallPtrSet<GlobalVariable*, 8> &getInvariants() const {
   2514     return Invariants;
   2515   }
   2516 
   2517 private:
   2518   Constant *ComputeLoadResult(Constant *P);
   2519 
   2520   /// ValueStack - As we compute SSA register values, we store their contents
   2521   /// here. The back of the vector contains the current function and the stack
   2522   /// contains the values in the calling frames.
   2523   SmallVector<DenseMap<Value*, Constant*>*, 4> ValueStack;
   2524 
   2525   /// CallStack - This is used to detect recursion.  In pathological situations
   2526   /// we could hit exponential behavior, but at least there is nothing
   2527   /// unbounded.
   2528   SmallVector<Function*, 4> CallStack;
   2529 
   2530   /// MutatedMemory - For each store we execute, we update this map.  Loads
   2531   /// check this to get the most up-to-date value.  If evaluation is successful,
   2532   /// this state is committed to the process.
   2533   DenseMap<Constant*, Constant*> MutatedMemory;
   2534 
   2535   /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
   2536   /// to represent its body.  This vector is needed so we can delete the
   2537   /// temporary globals when we are done.
   2538   SmallVector<GlobalVariable*, 32> AllocaTmps;
   2539 
   2540   /// Invariants - These global variables have been marked invariant by the
   2541   /// static constructor.
   2542   SmallPtrSet<GlobalVariable*, 8> Invariants;
   2543 
   2544   /// SimpleConstants - These are constants we have checked and know to be
   2545   /// simple enough to live in a static initializer of a global.
   2546   SmallPtrSet<Constant*, 8> SimpleConstants;
   2547 
   2548   const DataLayout *TD;
   2549   const TargetLibraryInfo *TLI;
   2550 };
   2551 
   2552 }  // anonymous namespace
   2553 
   2554 /// ComputeLoadResult - Return the value that would be computed by a load from
   2555 /// P after the stores reflected by 'memory' have been performed.  If we can't
   2556 /// decide, return null.
   2557 Constant *Evaluator::ComputeLoadResult(Constant *P) {
   2558   // If this memory location has been recently stored, use the stored value: it
   2559   // is the most up-to-date.
   2560   DenseMap<Constant*, Constant*>::const_iterator I = MutatedMemory.find(P);
   2561   if (I != MutatedMemory.end()) return I->second;
   2562 
   2563   // Access it.
   2564   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
   2565     if (GV->hasDefinitiveInitializer())
   2566       return GV->getInitializer();
   2567     return 0;
   2568   }
   2569 
   2570   // Handle a constantexpr getelementptr.
   2571   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P))
   2572     if (CE->getOpcode() == Instruction::GetElementPtr &&
   2573         isa<GlobalVariable>(CE->getOperand(0))) {
   2574       GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
   2575       if (GV->hasDefinitiveInitializer())
   2576         return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
   2577     }
   2578 
   2579   return 0;  // don't know how to evaluate.
   2580 }
   2581 
   2582 /// EvaluateBlock - Evaluate all instructions in block BB, returning true if
   2583 /// successful, false if we can't evaluate it.  NewBB returns the next BB that
   2584 /// control flows into, or null upon return.
   2585 bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
   2586                               BasicBlock *&NextBB) {
   2587   // This is the main evaluation loop.
   2588   while (1) {
   2589     Constant *InstResult = 0;
   2590 
   2591     DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n");
   2592 
   2593     if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
   2594       if (!SI->isSimple()) {
   2595         DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n");
   2596         return false;  // no volatile/atomic accesses.
   2597       }
   2598       Constant *Ptr = getVal(SI->getOperand(1));
   2599       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
   2600         DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr);
   2601         Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
   2602         DEBUG(dbgs() << "; To: " << *Ptr << "\n");
   2603       }
   2604       if (!isSimpleEnoughPointerToCommit(Ptr)) {
   2605         // If this is too complex for us to commit, reject it.
   2606         DEBUG(dbgs() << "Pointer is too complex for us to evaluate store.");
   2607         return false;
   2608       }
   2609 
   2610       Constant *Val = getVal(SI->getOperand(0));
   2611 
   2612       // If this might be too difficult for the backend to handle (e.g. the addr
   2613       // of one global variable divided by another) then we can't commit it.
   2614       if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, TD)) {
   2615         DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val
   2616               << "\n");
   2617         return false;
   2618       }
   2619 
   2620       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
   2621         if (CE->getOpcode() == Instruction::BitCast) {
   2622           DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n");
   2623           // If we're evaluating a store through a bitcast, then we need
   2624           // to pull the bitcast off the pointer type and push it onto the
   2625           // stored value.
   2626           Ptr = CE->getOperand(0);
   2627 
   2628           Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType();
   2629 
   2630           // In order to push the bitcast onto the stored value, a bitcast
   2631           // from NewTy to Val's type must be legal.  If it's not, we can try
   2632           // introspecting NewTy to find a legal conversion.
   2633           while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) {
   2634             // If NewTy is a struct, we can convert the pointer to the struct
   2635             // into a pointer to its first member.
   2636             // FIXME: This could be extended to support arrays as well.
   2637             if (StructType *STy = dyn_cast<StructType>(NewTy)) {
   2638               NewTy = STy->getTypeAtIndex(0U);
   2639 
   2640               IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32);
   2641               Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
   2642               Constant * const IdxList[] = {IdxZero, IdxZero};
   2643 
   2644               Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList);
   2645               if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
   2646                 Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
   2647 
   2648             // If we can't improve the situation by introspecting NewTy,
   2649             // we have to give up.
   2650             } else {
   2651               DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
   2652                     "evaluate.\n");
   2653               return false;
   2654             }
   2655           }
   2656 
   2657           // If we found compatible types, go ahead and push the bitcast
   2658           // onto the stored value.
   2659           Val = ConstantExpr::getBitCast(Val, NewTy);
   2660 
   2661           DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n");
   2662         }
   2663       }
   2664 
   2665       MutatedMemory[Ptr] = Val;
   2666     } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
   2667       InstResult = ConstantExpr::get(BO->getOpcode(),
   2668                                      getVal(BO->getOperand(0)),
   2669                                      getVal(BO->getOperand(1)));
   2670       DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult
   2671             << "\n");
   2672     } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
   2673       InstResult = ConstantExpr::getCompare(CI->getPredicate(),
   2674                                             getVal(CI->getOperand(0)),
   2675                                             getVal(CI->getOperand(1)));
   2676       DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult
   2677             << "\n");
   2678     } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
   2679       InstResult = ConstantExpr::getCast(CI->getOpcode(),
   2680                                          getVal(CI->getOperand(0)),
   2681                                          CI->getType());
   2682       DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult
   2683             << "\n");
   2684     } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
   2685       InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)),
   2686                                            getVal(SI->getOperand(1)),
   2687                                            getVal(SI->getOperand(2)));
   2688       DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult
   2689             << "\n");
   2690     } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
   2691       Constant *P = getVal(GEP->getOperand(0));
   2692       SmallVector<Constant*, 8> GEPOps;
   2693       for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
   2694            i != e; ++i)
   2695         GEPOps.push_back(getVal(*i));
   2696       InstResult =
   2697         ConstantExpr::getGetElementPtr(P, GEPOps,
   2698                                        cast<GEPOperator>(GEP)->isInBounds());
   2699       DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult
   2700             << "\n");
   2701     } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
   2702 
   2703       if (!LI->isSimple()) {
   2704         DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n");
   2705         return false;  // no volatile/atomic accesses.
   2706       }
   2707 
   2708       Constant *Ptr = getVal(LI->getOperand(0));
   2709       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
   2710         Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
   2711         DEBUG(dbgs() << "Found a constant pointer expression, constant "
   2712               "folding: " << *Ptr << "\n");
   2713       }
   2714       InstResult = ComputeLoadResult(Ptr);
   2715       if (InstResult == 0) {
   2716         DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load."
   2717               "\n");
   2718         return false; // Could not evaluate load.
   2719       }
   2720 
   2721       DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n");
   2722     } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
   2723       if (AI->isArrayAllocation()) {
   2724         DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n");
   2725         return false;  // Cannot handle array allocs.
   2726       }
   2727       Type *Ty = AI->getType()->getElementType();
   2728       AllocaTmps.push_back(new GlobalVariable(Ty, false,
   2729                                               GlobalValue::InternalLinkage,
   2730                                               UndefValue::get(Ty),
   2731                                               AI->getName()));
   2732       InstResult = AllocaTmps.back();
   2733       DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
   2734     } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
   2735       CallSite CS(CurInst);
   2736 
   2737       // Debug info can safely be ignored here.
   2738       if (isa<DbgInfoIntrinsic>(CS.getInstruction())) {
   2739         DEBUG(dbgs() << "Ignoring debug info.\n");
   2740         ++CurInst;
   2741         continue;
   2742       }
   2743 
   2744       // Cannot handle inline asm.
   2745       if (isa<InlineAsm>(CS.getCalledValue())) {
   2746         DEBUG(dbgs() << "Found inline asm, can not evaluate.\n");
   2747         return false;
   2748       }
   2749 
   2750       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
   2751         if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) {
   2752           if (MSI->isVolatile()) {
   2753             DEBUG(dbgs() << "Can not optimize a volatile memset " <<
   2754                   "intrinsic.\n");
   2755             return false;
   2756           }
   2757           Constant *Ptr = getVal(MSI->getDest());
   2758           Constant *Val = getVal(MSI->getValue());
   2759           Constant *DestVal = ComputeLoadResult(getVal(Ptr));
   2760           if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
   2761             // This memset is a no-op.
   2762             DEBUG(dbgs() << "Ignoring no-op memset.\n");
   2763             ++CurInst;
   2764             continue;
   2765           }
   2766         }
   2767 
   2768         if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
   2769             II->getIntrinsicID() == Intrinsic::lifetime_end) {
   2770           DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n");
   2771           ++CurInst;
   2772           continue;
   2773         }
   2774 
   2775         if (II->getIntrinsicID() == Intrinsic::invariant_start) {
   2776           // We don't insert an entry into Values, as it doesn't have a
   2777           // meaningful return value.
   2778           if (!II->use_empty()) {
   2779             DEBUG(dbgs() << "Found unused invariant_start. Cant evaluate.\n");
   2780             return false;
   2781           }
   2782           ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0));
   2783           Value *PtrArg = getVal(II->getArgOperand(1));
   2784           Value *Ptr = PtrArg->stripPointerCasts();
   2785           if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
   2786             Type *ElemTy = cast<PointerType>(GV->getType())->getElementType();
   2787             if (TD && !Size->isAllOnesValue() &&
   2788                 Size->getValue().getLimitedValue() >=
   2789                 TD->getTypeStoreSize(ElemTy)) {
   2790               Invariants.insert(GV);
   2791               DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV
   2792                     << "\n");
   2793             } else {
   2794               DEBUG(dbgs() << "Found a global var, but can not treat it as an "
   2795                     "invariant.\n");
   2796             }
   2797           }
   2798           // Continue even if we do nothing.
   2799           ++CurInst;
   2800           continue;
   2801         }
   2802 
   2803         DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
   2804         return false;
   2805       }
   2806 
   2807       // Resolve function pointers.
   2808       Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue()));
   2809       if (!Callee || Callee->mayBeOverridden()) {
   2810         DEBUG(dbgs() << "Can not resolve function pointer.\n");
   2811         return false;  // Cannot resolve.
   2812       }
   2813 
   2814       SmallVector<Constant*, 8> Formals;
   2815       for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i)
   2816         Formals.push_back(getVal(*i));
   2817 
   2818       if (Callee->isDeclaration()) {
   2819         // If this is a function we can constant fold, do it.
   2820         if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) {
   2821           InstResult = C;
   2822           DEBUG(dbgs() << "Constant folded function call. Result: " <<
   2823                 *InstResult << "\n");
   2824         } else {
   2825           DEBUG(dbgs() << "Can not constant fold function call.\n");
   2826           return false;
   2827         }
   2828       } else {
   2829         if (Callee->getFunctionType()->isVarArg()) {
   2830           DEBUG(dbgs() << "Can not constant fold vararg function call.\n");
   2831           return false;
   2832         }
   2833 
   2834         Constant *RetVal = 0;
   2835         // Execute the call, if successful, use the return value.
   2836         ValueStack.push_back(new DenseMap<Value*, Constant*>);
   2837         if (!EvaluateFunction(Callee, RetVal, Formals)) {
   2838           DEBUG(dbgs() << "Failed to evaluate function.\n");
   2839           return false;
   2840         }
   2841         delete ValueStack.pop_back_val();
   2842         InstResult = RetVal;
   2843 
   2844         if (InstResult != NULL) {
   2845           DEBUG(dbgs() << "Successfully evaluated function. Result: " <<
   2846                 InstResult << "\n\n");
   2847         } else {
   2848           DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n");
   2849         }
   2850       }
   2851     } else if (isa<TerminatorInst>(CurInst)) {
   2852       DEBUG(dbgs() << "Found a terminator instruction.\n");
   2853 
   2854       if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
   2855         if (BI->isUnconditional()) {
   2856           NextBB = BI->getSuccessor(0);
   2857         } else {
   2858           ConstantInt *Cond =
   2859             dyn_cast<ConstantInt>(getVal(BI->getCondition()));
   2860           if (!Cond) return false;  // Cannot determine.
   2861 
   2862           NextBB = BI->getSuccessor(!Cond->getZExtValue());
   2863         }
   2864       } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
   2865         ConstantInt *Val =
   2866           dyn_cast<ConstantInt>(getVal(SI->getCondition()));
   2867         if (!Val) return false;  // Cannot determine.
   2868         NextBB = SI->findCaseValue(Val).getCaseSuccessor();
   2869       } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
   2870         Value *Val = getVal(IBI->getAddress())->stripPointerCasts();
   2871         if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
   2872           NextBB = BA->getBasicBlock();
   2873         else
   2874           return false;  // Cannot determine.
   2875       } else if (isa<ReturnInst>(CurInst)) {
   2876         NextBB = 0;
   2877       } else {
   2878         // invoke, unwind, resume, unreachable.
   2879         DEBUG(dbgs() << "Can not handle terminator.");
   2880         return false;  // Cannot handle this terminator.
   2881       }
   2882 
   2883       // We succeeded at evaluating this block!
   2884       DEBUG(dbgs() << "Successfully evaluated block.\n");
   2885       return true;
   2886     } else {
   2887       // Did not know how to evaluate this!
   2888       DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction."
   2889             "\n");
   2890       return false;
   2891     }
   2892 
   2893     if (!CurInst->use_empty()) {
   2894       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
   2895         InstResult = ConstantFoldConstantExpression(CE, TD, TLI);
   2896 
   2897       setVal(CurInst, InstResult);
   2898     }
   2899 
   2900     // If we just processed an invoke, we finished evaluating the block.
   2901     if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {
   2902       NextBB = II->getNormalDest();
   2903       DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n");
   2904       return true;
   2905     }
   2906 
   2907     // Advance program counter.
   2908     ++CurInst;
   2909   }
   2910 }
   2911 
   2912 /// EvaluateFunction - Evaluate a call to function F, returning true if
   2913 /// successful, false if we can't evaluate it.  ActualArgs contains the formal
   2914 /// arguments for the function.
   2915 bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
   2916                                  const SmallVectorImpl<Constant*> &ActualArgs) {
   2917   // Check to see if this function is already executing (recursion).  If so,
   2918   // bail out.  TODO: we might want to accept limited recursion.
   2919   if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
   2920     return false;
   2921 
   2922   CallStack.push_back(F);
   2923 
   2924   // Initialize arguments to the incoming values specified.
   2925   unsigned ArgNo = 0;
   2926   for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
   2927        ++AI, ++ArgNo)
   2928     setVal(AI, ActualArgs[ArgNo]);
   2929 
   2930   // ExecutedBlocks - We only handle non-looping, non-recursive code.  As such,
   2931   // we can only evaluate any one basic block at most once.  This set keeps
   2932   // track of what we have executed so we can detect recursive cases etc.
   2933   SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
   2934 
   2935   // CurBB - The current basic block we're evaluating.
   2936   BasicBlock *CurBB = F->begin();
   2937 
   2938   BasicBlock::iterator CurInst = CurBB->begin();
   2939 
   2940   while (1) {
   2941     BasicBlock *NextBB = 0; // Initialized to avoid compiler warnings.
   2942     DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n");
   2943 
   2944     if (!EvaluateBlock(CurInst, NextBB))
   2945       return false;
   2946 
   2947     if (NextBB == 0) {
   2948       // Successfully running until there's no next block means that we found
   2949       // the return.  Fill it the return value and pop the call stack.
   2950       ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator());
   2951       if (RI->getNumOperands())
   2952         RetVal = getVal(RI->getOperand(0));
   2953       CallStack.pop_back();
   2954       return true;
   2955     }
   2956 
   2957     // Okay, we succeeded in evaluating this control flow.  See if we have
   2958     // executed the new block before.  If so, we have a looping function,
   2959     // which we cannot evaluate in reasonable time.
   2960     if (!ExecutedBlocks.insert(NextBB))
   2961       return false;  // looped!
   2962 
   2963     // Okay, we have never been in this block before.  Check to see if there
   2964     // are any PHI nodes.  If so, evaluate them with information about where
   2965     // we came from.
   2966     PHINode *PN = 0;
   2967     for (CurInst = NextBB->begin();
   2968          (PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
   2969       setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB)));
   2970 
   2971     // Advance to the next block.
   2972     CurBB = NextBB;
   2973   }
   2974 }
   2975 
   2976 /// EvaluateStaticConstructor - Evaluate static constructors in the function, if
   2977 /// we can.  Return true if we can, false otherwise.
   2978 static bool EvaluateStaticConstructor(Function *F, const DataLayout *TD,
   2979                                       const TargetLibraryInfo *TLI) {
   2980   // Call the function.
   2981   Evaluator Eval(TD, TLI);
   2982   Constant *RetValDummy;
   2983   bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
   2984                                            SmallVector<Constant*, 0>());
   2985 
   2986   if (EvalSuccess) {
   2987     // We succeeded at evaluation: commit the result.
   2988     DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
   2989           << F->getName() << "' to " << Eval.getMutatedMemory().size()
   2990           << " stores.\n");
   2991     for (DenseMap<Constant*, Constant*>::const_iterator I =
   2992            Eval.getMutatedMemory().begin(), E = Eval.getMutatedMemory().end();
   2993          I != E; ++I)
   2994       CommitValueTo(I->second, I->first);
   2995     for (SmallPtrSet<GlobalVariable*, 8>::const_iterator I =
   2996            Eval.getInvariants().begin(), E = Eval.getInvariants().end();
   2997          I != E; ++I)
   2998       (*I)->setConstant(true);
   2999   }
   3000 
   3001   return EvalSuccess;
   3002 }
   3003 
   3004 /// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible.
   3005 /// Return true if anything changed.
   3006 bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
   3007   std::vector<Function*> Ctors = ParseGlobalCtors(GCL);
   3008   bool MadeChange = false;
   3009   if (Ctors.empty()) return false;
   3010 
   3011   // Loop over global ctors, optimizing them when we can.
   3012   for (unsigned i = 0; i != Ctors.size(); ++i) {
   3013     Function *F = Ctors[i];
   3014     // Found a null terminator in the middle of the list, prune off the rest of
   3015     // the list.
   3016     if (F == 0) {
   3017       if (i != Ctors.size()-1) {
   3018         Ctors.resize(i+1);
   3019         MadeChange = true;
   3020       }
   3021       break;
   3022     }
   3023     DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n");
   3024 
   3025     // We cannot simplify external ctor functions.
   3026     if (F->empty()) continue;
   3027 
   3028     // If we can evaluate the ctor at compile time, do.
   3029     if (EvaluateStaticConstructor(F, TD, TLI)) {
   3030       Ctors.erase(Ctors.begin()+i);
   3031       MadeChange = true;
   3032       --i;
   3033       ++NumCtorsEvaluated;
   3034       continue;
   3035     }
   3036   }
   3037 
   3038   if (!MadeChange) return false;
   3039 
   3040   GCL = InstallGlobalCtors(GCL, Ctors);
   3041   return true;
   3042 }
   3043 
   3044 static int compareNames(const void *A, const void *B) {
   3045   const GlobalValue *VA = *reinterpret_cast<GlobalValue* const*>(A);
   3046   const GlobalValue *VB = *reinterpret_cast<GlobalValue* const*>(B);
   3047   if (VA->getName() < VB->getName())
   3048     return -1;
   3049   if (VB->getName() < VA->getName())
   3050     return 1;
   3051   return 0;
   3052 }
   3053 
   3054 static void setUsedInitializer(GlobalVariable &V,
   3055                                SmallPtrSet<GlobalValue *, 8> Init) {
   3056   if (Init.empty()) {
   3057     V.eraseFromParent();
   3058     return;
   3059   }
   3060 
   3061   SmallVector<llvm::Constant *, 8> UsedArray;
   3062   PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext());
   3063 
   3064   for (SmallPtrSet<GlobalValue *, 8>::iterator I = Init.begin(), E = Init.end();
   3065        I != E; ++I) {
   3066     Constant *Cast = llvm::ConstantExpr::getBitCast(*I, Int8PtrTy);
   3067     UsedArray.push_back(Cast);
   3068   }
   3069   // Sort to get deterministic order.
   3070   array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames);
   3071   ArrayType *ATy = ArrayType::get(Int8PtrTy, UsedArray.size());
   3072 
   3073   Module *M = V.getParent();
   3074   V.removeFromParent();
   3075   GlobalVariable *NV =
   3076       new GlobalVariable(*M, ATy, false, llvm::GlobalValue::AppendingLinkage,
   3077                          llvm::ConstantArray::get(ATy, UsedArray), "");
   3078   NV->takeName(&V);
   3079   NV->setSection("llvm.metadata");
   3080   delete &V;
   3081 }
   3082 
   3083 namespace {
   3084 /// \brief An easy to access representation of llvm.used and llvm.compiler.used.
   3085 class LLVMUsed {
   3086   SmallPtrSet<GlobalValue *, 8> Used;
   3087   SmallPtrSet<GlobalValue *, 8> CompilerUsed;
   3088   GlobalVariable *UsedV;
   3089   GlobalVariable *CompilerUsedV;
   3090 
   3091 public:
   3092   LLVMUsed(Module &M) {
   3093     UsedV = collectUsedGlobalVariables(M, Used, false);
   3094     CompilerUsedV = collectUsedGlobalVariables(M, CompilerUsed, true);
   3095   }
   3096   typedef SmallPtrSet<GlobalValue *, 8>::iterator iterator;
   3097   iterator usedBegin() { return Used.begin(); }
   3098   iterator usedEnd() { return Used.end(); }
   3099   iterator compilerUsedBegin() { return CompilerUsed.begin(); }
   3100   iterator compilerUsedEnd() { return CompilerUsed.end(); }
   3101   bool usedCount(GlobalValue *GV) const { return Used.count(GV); }
   3102   bool compilerUsedCount(GlobalValue *GV) const {
   3103     return CompilerUsed.count(GV);
   3104   }
   3105   bool usedErase(GlobalValue *GV) { return Used.erase(GV); }
   3106   bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); }
   3107   bool usedInsert(GlobalValue *GV) { return Used.insert(GV); }
   3108   bool compilerUsedInsert(GlobalValue *GV) { return CompilerUsed.insert(GV); }
   3109 
   3110   void syncVariablesAndSets() {
   3111     if (UsedV)
   3112       setUsedInitializer(*UsedV, Used);
   3113     if (CompilerUsedV)
   3114       setUsedInitializer(*CompilerUsedV, CompilerUsed);
   3115   }
   3116 };
   3117 }
   3118 
   3119 static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) {
   3120   if (GA.use_empty()) // No use at all.
   3121     return false;
   3122 
   3123   assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) &&
   3124          "We should have removed the duplicated "
   3125          "element from llvm.compiler.used");
   3126   if (!GA.hasOneUse())
   3127     // Strictly more than one use. So at least one is not in llvm.used and
   3128     // llvm.compiler.used.
   3129     return true;
   3130 
   3131   // Exactly one use. Check if it is in llvm.used or llvm.compiler.used.
   3132   return !U.usedCount(&GA) && !U.compilerUsedCount(&GA);
   3133 }
   3134 
   3135 static bool hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue &V,
   3136                                                const LLVMUsed &U) {
   3137   unsigned N = 2;
   3138   assert((!U.usedCount(&V) || !U.compilerUsedCount(&V)) &&
   3139          "We should have removed the duplicated "
   3140          "element from llvm.compiler.used");
   3141   if (U.usedCount(&V) || U.compilerUsedCount(&V))
   3142     ++N;
   3143   return V.hasNUsesOrMore(N);
   3144 }
   3145 
   3146 static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) {
   3147   if (!GA.hasLocalLinkage())
   3148     return true;
   3149 
   3150   return U.usedCount(&GA) || U.compilerUsedCount(&GA);
   3151 }
   3152 
   3153 static bool hasUsesToReplace(GlobalAlias &GA, LLVMUsed &U, bool &RenameTarget) {
   3154   RenameTarget = false;
   3155   bool Ret = false;
   3156   if (hasUseOtherThanLLVMUsed(GA, U))
   3157     Ret = true;
   3158 
   3159   // If the alias is externally visible, we may still be able to simplify it.
   3160   if (!mayHaveOtherReferences(GA, U))
   3161     return Ret;
   3162 
   3163   // If the aliasee has internal linkage, give it the name and linkage
   3164   // of the alias, and delete the alias.  This turns:
   3165   //   define internal ... @f(...)
   3166   //   @a = alias ... @f
   3167   // into:
   3168   //   define ... @a(...)
   3169   Constant *Aliasee = GA.getAliasee();
   3170   GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
   3171   if (!Target->hasLocalLinkage())
   3172     return Ret;
   3173 
   3174   // Do not perform the transform if multiple aliases potentially target the
   3175   // aliasee. This check also ensures that it is safe to replace the section
   3176   // and other attributes of the aliasee with those of the alias.
   3177   if (hasMoreThanOneUseOtherThanLLVMUsed(*Target, U))
   3178     return Ret;
   3179 
   3180   RenameTarget = true;
   3181   return true;
   3182 }
   3183 
   3184 bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
   3185   bool Changed = false;
   3186   LLVMUsed Used(M);
   3187 
   3188   for (SmallPtrSet<GlobalValue *, 8>::iterator I = Used.usedBegin(),
   3189                                                E = Used.usedEnd();
   3190        I != E; ++I)
   3191     Used.compilerUsedErase(*I);
   3192 
   3193   for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
   3194        I != E;) {
   3195     Module::alias_iterator J = I++;
   3196     // Aliases without names cannot be referenced outside this module.
   3197     if (!J->hasName() && !J->isDeclaration())
   3198       J->setLinkage(GlobalValue::InternalLinkage);
   3199     // If the aliasee may change at link time, nothing can be done - bail out.
   3200     if (J->mayBeOverridden())
   3201       continue;
   3202 
   3203     Constant *Aliasee = J->getAliasee();
   3204     GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
   3205     Target->removeDeadConstantUsers();
   3206 
   3207     // Make all users of the alias use the aliasee instead.
   3208     bool RenameTarget;
   3209     if (!hasUsesToReplace(*J, Used, RenameTarget))
   3210       continue;
   3211 
   3212     J->replaceAllUsesWith(Aliasee);
   3213     ++NumAliasesResolved;
   3214     Changed = true;
   3215 
   3216     if (RenameTarget) {
   3217       // Give the aliasee the name, linkage and other attributes of the alias.
   3218       Target->takeName(J);
   3219       Target->setLinkage(J->getLinkage());
   3220       Target->GlobalValue::copyAttributesFrom(J);
   3221 
   3222       if (Used.usedErase(J))
   3223         Used.usedInsert(Target);
   3224 
   3225       if (Used.compilerUsedErase(J))
   3226         Used.compilerUsedInsert(Target);
   3227     } else if (mayHaveOtherReferences(*J, Used))
   3228       continue;
   3229 
   3230     // Delete the alias.
   3231     M.getAliasList().erase(J);
   3232     ++NumAliasesRemoved;
   3233     Changed = true;
   3234   }
   3235 
   3236   Used.syncVariablesAndSets();
   3237 
   3238   return Changed;
   3239 }
   3240 
   3241 static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
   3242   if (!TLI->has(LibFunc::cxa_atexit))
   3243     return 0;
   3244 
   3245   Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit));
   3246 
   3247   if (!Fn)
   3248     return 0;
   3249 
   3250   FunctionType *FTy = Fn->getFunctionType();
   3251 
   3252   // Checking that the function has the right return type, the right number of
   3253   // parameters and that they all have pointer types should be enough.
   3254   if (!FTy->getReturnType()->isIntegerTy() ||
   3255       FTy->getNumParams() != 3 ||
   3256       !FTy->getParamType(0)->isPointerTy() ||
   3257       !FTy->getParamType(1)->isPointerTy() ||
   3258       !FTy->getParamType(2)->isPointerTy())
   3259     return 0;
   3260 
   3261   return Fn;
   3262 }
   3263 
   3264 /// cxxDtorIsEmpty - Returns whether the given function is an empty C++
   3265 /// destructor and can therefore be eliminated.
   3266 /// Note that we assume that other optimization passes have already simplified
   3267 /// the code so we only look for a function with a single basic block, where
   3268 /// the only allowed instructions are 'ret', 'call' to an empty C++ dtor and
   3269 /// other side-effect free instructions.
   3270 static bool cxxDtorIsEmpty(const Function &Fn,
   3271                            SmallPtrSet<const Function *, 8> &CalledFunctions) {
   3272   // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
   3273   // nounwind, but that doesn't seem worth doing.
   3274   if (Fn.isDeclaration())
   3275     return false;
   3276 
   3277   if (++Fn.begin() != Fn.end())
   3278     return false;
   3279 
   3280   const BasicBlock &EntryBlock = Fn.getEntryBlock();
   3281   for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end();
   3282        I != E; ++I) {
   3283     if (const CallInst *CI = dyn_cast<CallInst>(I)) {
   3284       // Ignore debug intrinsics.
   3285       if (isa<DbgInfoIntrinsic>(CI))
   3286         continue;
   3287 
   3288       const Function *CalledFn = CI->getCalledFunction();
   3289 
   3290       if (!CalledFn)
   3291         return false;
   3292 
   3293       SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions);
   3294 
   3295       // Don't treat recursive functions as empty.
   3296       if (!NewCalledFunctions.insert(CalledFn))
   3297         return false;
   3298 
   3299       if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions))
   3300         return false;
   3301     } else if (isa<ReturnInst>(*I))
   3302       return true; // We're done.
   3303     else if (I->mayHaveSideEffects())
   3304       return false; // Destructor with side effects, bail.
   3305   }
   3306 
   3307   return false;
   3308 }
   3309 
   3310 bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
   3311   /// Itanium C++ ABI p3.3.5:
   3312   ///
   3313   ///   After constructing a global (or local static) object, that will require
   3314   ///   destruction on exit, a termination function is registered as follows:
   3315   ///
   3316   ///   extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
   3317   ///
   3318   ///   This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
   3319   ///   call f(p) when DSO d is unloaded, before all such termination calls
   3320   ///   registered before this one. It returns zero if registration is
   3321   ///   successful, nonzero on failure.
   3322 
   3323   // This pass will look for calls to __cxa_atexit where the function is trivial
   3324   // and remove them.
   3325   bool Changed = false;
   3326 
   3327   for (Function::use_iterator I = CXAAtExitFn->use_begin(),
   3328        E = CXAAtExitFn->use_end(); I != E;) {
   3329     // We're only interested in calls. Theoretically, we could handle invoke
   3330     // instructions as well, but neither llvm-gcc nor clang generate invokes
   3331     // to __cxa_atexit.
   3332     CallInst *CI = dyn_cast<CallInst>(*I++);
   3333     if (!CI)
   3334       continue;
   3335 
   3336     Function *DtorFn =
   3337       dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
   3338     if (!DtorFn)
   3339       continue;
   3340 
   3341     SmallPtrSet<const Function *, 8> CalledFunctions;
   3342     if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions))
   3343       continue;
   3344 
   3345     // Just remove the call.
   3346     CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
   3347     CI->eraseFromParent();
   3348 
   3349     ++NumCXXDtorsRemoved;
   3350 
   3351     Changed |= true;
   3352   }
   3353 
   3354   return Changed;
   3355 }
   3356 
   3357 bool GlobalOpt::runOnModule(Module &M) {
   3358   bool Changed = false;
   3359 
   3360   TD = getAnalysisIfAvailable<DataLayout>();
   3361   TLI = &getAnalysis<TargetLibraryInfo>();
   3362 
   3363   // Try to find the llvm.globalctors list.
   3364   GlobalVariable *GlobalCtors = FindGlobalCtors(M);
   3365 
   3366   bool LocalChange = true;
   3367   while (LocalChange) {
   3368     LocalChange = false;
   3369 
   3370     // Delete functions that are trivially dead, ccc -> fastcc
   3371     LocalChange |= OptimizeFunctions(M);
   3372 
   3373     // Optimize global_ctors list.
   3374     if (GlobalCtors)
   3375       LocalChange |= OptimizeGlobalCtorsList(GlobalCtors);
   3376 
   3377     // Optimize non-address-taken globals.
   3378     LocalChange |= OptimizeGlobalVars(M);
   3379 
   3380     // Resolve aliases, when possible.
   3381     LocalChange |= OptimizeGlobalAliases(M);
   3382 
   3383     // Try to remove trivial global destructors if they are not removed
   3384     // already.
   3385     Function *CXAAtExitFn = FindCXAAtExit(M, TLI);
   3386     if (CXAAtExitFn)
   3387       LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
   3388 
   3389     Changed |= LocalChange;
   3390   }
   3391 
   3392   // TODO: Move all global ctors functions to the end of the module for code
   3393   // layout.
   3394 
   3395   return Changed;
   3396 }
   3397