Home | History | Annotate | Download | only in IPO
      1 //===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This pass transforms simple global variables that never have their address
     11 // taken.  If obviously true, it marks read/write globals as constant, deletes
     12 // variables only stored to, etc.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #define DEBUG_TYPE "globalopt"
     17 #include "llvm/Transforms/IPO.h"
     18 #include "llvm/CallingConv.h"
     19 #include "llvm/Constants.h"
     20 #include "llvm/DerivedTypes.h"
     21 #include "llvm/Instructions.h"
     22 #include "llvm/IntrinsicInst.h"
     23 #include "llvm/Module.h"
     24 #include "llvm/Operator.h"
     25 #include "llvm/Pass.h"
     26 #include "llvm/Analysis/ConstantFolding.h"
     27 #include "llvm/Analysis/MemoryBuiltins.h"
     28 #include "llvm/Target/TargetData.h"
     29 #include "llvm/Target/TargetLibraryInfo.h"
     30 #include "llvm/Support/CallSite.h"
     31 #include "llvm/Support/Debug.h"
     32 #include "llvm/Support/ErrorHandling.h"
     33 #include "llvm/Support/GetElementPtrTypeIterator.h"
     34 #include "llvm/Support/MathExtras.h"
     35 #include "llvm/Support/raw_ostream.h"
     36 #include "llvm/ADT/DenseMap.h"
     37 #include "llvm/ADT/SmallPtrSet.h"
     38 #include "llvm/ADT/SmallVector.h"
     39 #include "llvm/ADT/Statistic.h"
     40 #include "llvm/ADT/STLExtras.h"
     41 #include <algorithm>
     42 using namespace llvm;
     43 
     44 STATISTIC(NumMarked    , "Number of globals marked constant");
     45 STATISTIC(NumUnnamed   , "Number of globals marked unnamed_addr");
     46 STATISTIC(NumSRA       , "Number of aggregate globals broken into scalars");
     47 STATISTIC(NumHeapSRA   , "Number of heap objects SRA'd");
     48 STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
     49 STATISTIC(NumDeleted   , "Number of globals deleted");
     50 STATISTIC(NumFnDeleted , "Number of functions deleted");
     51 STATISTIC(NumGlobUses  , "Number of global uses devirtualized");
     52 STATISTIC(NumLocalized , "Number of globals localized");
     53 STATISTIC(NumShrunkToBool  , "Number of global vars shrunk to booleans");
     54 STATISTIC(NumFastCallFns   , "Number of functions converted to fastcc");
     55 STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
     56 STATISTIC(NumNestRemoved   , "Number of nest attributes removed");
     57 STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
     58 STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
     59 STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
     60 
     61 namespace {
     62   struct GlobalStatus;
     63   struct GlobalOpt : public ModulePass {
     64     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
     65       AU.addRequired<TargetLibraryInfo>();
     66     }
     67     static char ID; // Pass identification, replacement for typeid
     68     GlobalOpt() : ModulePass(ID) {
     69       initializeGlobalOptPass(*PassRegistry::getPassRegistry());
     70     }
     71 
     72     bool runOnModule(Module &M);
     73 
     74   private:
     75     GlobalVariable *FindGlobalCtors(Module &M);
     76     bool OptimizeFunctions(Module &M);
     77     bool OptimizeGlobalVars(Module &M);
     78     bool OptimizeGlobalAliases(Module &M);
     79     bool OptimizeGlobalCtorsList(GlobalVariable *&GCL);
     80     bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
     81     bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
     82                                const SmallPtrSet<const PHINode*, 16> &PHIUsers,
     83                                const GlobalStatus &GS);
     84     bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
     85 
     86     TargetData *TD;
     87     TargetLibraryInfo *TLI;
     88   };
     89 }
     90 
     91 char GlobalOpt::ID = 0;
     92 INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
     93                 "Global Variable Optimizer", false, false)
     94 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
     95 INITIALIZE_PASS_END(GlobalOpt, "globalopt",
     96                 "Global Variable Optimizer", false, false)
     97 
     98 ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
     99 
    100 namespace {
    101 
    102 /// GlobalStatus - As we analyze each global, keep track of some information
    103 /// about it.  If we find out that the address of the global is taken, none of
    104 /// this info will be accurate.
    105 struct GlobalStatus {
    106   /// isCompared - True if the global's address is used in a comparison.
    107   bool isCompared;
    108 
    109   /// isLoaded - True if the global is ever loaded.  If the global isn't ever
    110   /// loaded it can be deleted.
    111   bool isLoaded;
    112 
    113   /// StoredType - Keep track of what stores to the global look like.
    114   ///
    115   enum StoredType {
    116     /// NotStored - There is no store to this global.  It can thus be marked
    117     /// constant.
    118     NotStored,
    119 
    120     /// isInitializerStored - This global is stored to, but the only thing
    121     /// stored is the constant it was initialized with.  This is only tracked
    122     /// for scalar globals.
    123     isInitializerStored,
    124 
    125     /// isStoredOnce - This global is stored to, but only its initializer and
    126     /// one other value is ever stored to it.  If this global isStoredOnce, we
    127     /// track the value stored to it in StoredOnceValue below.  This is only
    128     /// tracked for scalar globals.
    129     isStoredOnce,
    130 
    131     /// isStored - This global is stored to by multiple values or something else
    132     /// that we cannot track.
    133     isStored
    134   } StoredType;
    135 
    136   /// StoredOnceValue - If only one value (besides the initializer constant) is
    137   /// ever stored to this global, keep track of what value it is.
    138   Value *StoredOnceValue;
    139 
    140   /// AccessingFunction/HasMultipleAccessingFunctions - These start out
    141   /// null/false.  When the first accessing function is noticed, it is recorded.
    142   /// When a second different accessing function is noticed,
    143   /// HasMultipleAccessingFunctions is set to true.
    144   const Function *AccessingFunction;
    145   bool HasMultipleAccessingFunctions;
    146 
    147   /// HasNonInstructionUser - Set to true if this global has a user that is not
    148   /// an instruction (e.g. a constant expr or GV initializer).
    149   bool HasNonInstructionUser;
    150 
    151   /// HasPHIUser - Set to true if this global has a user that is a PHI node.
    152   bool HasPHIUser;
    153 
    154   /// AtomicOrdering - Set to the strongest atomic ordering requirement.
    155   AtomicOrdering Ordering;
    156 
    157   GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored),
    158                    StoredOnceValue(0), AccessingFunction(0),
    159                    HasMultipleAccessingFunctions(false),
    160                    HasNonInstructionUser(false), HasPHIUser(false),
    161                    Ordering(NotAtomic) {}
    162 };
    163 
    164 }
    165 
    166 /// StrongerOrdering - Return the stronger of the two ordering. If the two
    167 /// orderings are acquire and release, then return AcquireRelease.
    168 ///
    169 static AtomicOrdering StrongerOrdering(AtomicOrdering X, AtomicOrdering Y) {
    170   if (X == Acquire && Y == Release) return AcquireRelease;
    171   if (Y == Acquire && X == Release) return AcquireRelease;
    172   return (AtomicOrdering)std::max(X, Y);
    173 }
    174 
    175 /// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used
    176 /// by constants itself.  Note that constants cannot be cyclic, so this test is
    177 /// pretty easy to implement recursively.
    178 ///
    179 static bool SafeToDestroyConstant(const Constant *C) {
    180   if (isa<GlobalValue>(C)) return false;
    181 
    182   for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E;
    183        ++UI)
    184     if (const Constant *CU = dyn_cast<Constant>(*UI)) {
    185       if (!SafeToDestroyConstant(CU)) return false;
    186     } else
    187       return false;
    188   return true;
    189 }
    190 
    191 
    192 /// AnalyzeGlobal - Look at all uses of the global and fill in the GlobalStatus
    193 /// structure.  If the global has its address taken, return true to indicate we
    194 /// can't do anything with it.
    195 ///
    196 static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
    197                           SmallPtrSet<const PHINode*, 16> &PHIUsers) {
    198   for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
    199        ++UI) {
    200     const User *U = *UI;
    201     if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
    202       GS.HasNonInstructionUser = true;
    203 
    204       // If the result of the constantexpr isn't pointer type, then we won't
    205       // know to expect it in various places.  Just reject early.
    206       if (!isa<PointerType>(CE->getType())) return true;
    207 
    208       if (AnalyzeGlobal(CE, GS, PHIUsers)) return true;
    209     } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
    210       if (!GS.HasMultipleAccessingFunctions) {
    211         const Function *F = I->getParent()->getParent();
    212         if (GS.AccessingFunction == 0)
    213           GS.AccessingFunction = F;
    214         else if (GS.AccessingFunction != F)
    215           GS.HasMultipleAccessingFunctions = true;
    216       }
    217       if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
    218         GS.isLoaded = true;
    219         // Don't hack on volatile loads.
    220         if (LI->isVolatile()) return true;
    221         GS.Ordering = StrongerOrdering(GS.Ordering, LI->getOrdering());
    222       } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
    223         // Don't allow a store OF the address, only stores TO the address.
    224         if (SI->getOperand(0) == V) return true;
    225 
    226         // Don't hack on volatile stores.
    227         if (SI->isVolatile()) return true;
    228         GS.Ordering = StrongerOrdering(GS.Ordering, SI->getOrdering());
    229 
    230         // If this is a direct store to the global (i.e., the global is a scalar
    231         // value, not an aggregate), keep more specific information about
    232         // stores.
    233         if (GS.StoredType != GlobalStatus::isStored) {
    234           if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(
    235                                                            SI->getOperand(1))) {
    236             Value *StoredVal = SI->getOperand(0);
    237             if (StoredVal == GV->getInitializer()) {
    238               if (GS.StoredType < GlobalStatus::isInitializerStored)
    239                 GS.StoredType = GlobalStatus::isInitializerStored;
    240             } else if (isa<LoadInst>(StoredVal) &&
    241                        cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
    242               if (GS.StoredType < GlobalStatus::isInitializerStored)
    243                 GS.StoredType = GlobalStatus::isInitializerStored;
    244             } else if (GS.StoredType < GlobalStatus::isStoredOnce) {
    245               GS.StoredType = GlobalStatus::isStoredOnce;
    246               GS.StoredOnceValue = StoredVal;
    247             } else if (GS.StoredType == GlobalStatus::isStoredOnce &&
    248                        GS.StoredOnceValue == StoredVal) {
    249               // noop.
    250             } else {
    251               GS.StoredType = GlobalStatus::isStored;
    252             }
    253           } else {
    254             GS.StoredType = GlobalStatus::isStored;
    255           }
    256         }
    257       } else if (isa<GetElementPtrInst>(I)) {
    258         if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
    259       } else if (isa<SelectInst>(I)) {
    260         if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
    261       } else if (const PHINode *PN = dyn_cast<PHINode>(I)) {
    262         // PHI nodes we can check just like select or GEP instructions, but we
    263         // have to be careful about infinite recursion.
    264         if (PHIUsers.insert(PN))  // Not already visited.
    265           if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
    266         GS.HasPHIUser = true;
    267       } else if (isa<CmpInst>(I)) {
    268         GS.isCompared = true;
    269       } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
    270         if (MTI->isVolatile()) return true;
    271         if (MTI->getArgOperand(0) == V)
    272           GS.StoredType = GlobalStatus::isStored;
    273         if (MTI->getArgOperand(1) == V)
    274           GS.isLoaded = true;
    275       } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) {
    276         assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!");
    277         if (MSI->isVolatile()) return true;
    278         GS.StoredType = GlobalStatus::isStored;
    279       } else {
    280         return true;  // Any other non-load instruction might take address!
    281       }
    282     } else if (const Constant *C = dyn_cast<Constant>(U)) {
    283       GS.HasNonInstructionUser = true;
    284       // We might have a dead and dangling constant hanging off of here.
    285       if (!SafeToDestroyConstant(C))
    286         return true;
    287     } else {
    288       GS.HasNonInstructionUser = true;
    289       // Otherwise must be some other user.
    290       return true;
    291     }
    292   }
    293 
    294   return false;
    295 }
    296 
    297 /// CleanupConstantGlobalUsers - We just marked GV constant.  Loop over all
    298 /// users of the global, cleaning up the obvious ones.  This is largely just a
    299 /// quick scan over the use list to clean up the easy and obvious cruft.  This
    300 /// returns true if it made a change.
    301 static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
    302                                        TargetData *TD, TargetLibraryInfo *TLI) {
    303   bool Changed = false;
    304   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) {
    305     User *U = *UI++;
    306 
    307     if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
    308       if (Init) {
    309         // Replace the load with the initializer.
    310         LI->replaceAllUsesWith(Init);
    311         LI->eraseFromParent();
    312         Changed = true;
    313       }
    314     } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
    315       // Store must be unreachable or storing Init into the global.
    316       SI->eraseFromParent();
    317       Changed = true;
    318     } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
    319       if (CE->getOpcode() == Instruction::GetElementPtr) {
    320         Constant *SubInit = 0;
    321         if (Init)
    322           SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
    323         Changed |= CleanupConstantGlobalUsers(CE, SubInit, TD, TLI);
    324       } else if (CE->getOpcode() == Instruction::BitCast &&
    325                  CE->getType()->isPointerTy()) {
    326         // Pointer cast, delete any stores and memsets to the global.
    327         Changed |= CleanupConstantGlobalUsers(CE, 0, TD, TLI);
    328       }
    329 
    330       if (CE->use_empty()) {
    331         CE->destroyConstant();
    332         Changed = true;
    333       }
    334     } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
    335       // Do not transform "gepinst (gep constexpr (GV))" here, because forming
    336       // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
    337       // and will invalidate our notion of what Init is.
    338       Constant *SubInit = 0;
    339       if (!isa<ConstantExpr>(GEP->getOperand(0))) {
    340         ConstantExpr *CE =
    341           dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, TD, TLI));
    342         if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
    343           SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
    344 
    345         // If the initializer is an all-null value and we have an inbounds GEP,
    346         // we already know what the result of any load from that GEP is.
    347         // TODO: Handle splats.
    348         if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds())
    349           SubInit = Constant::getNullValue(GEP->getType()->getElementType());
    350       }
    351       Changed |= CleanupConstantGlobalUsers(GEP, SubInit, TD, TLI);
    352 
    353       if (GEP->use_empty()) {
    354         GEP->eraseFromParent();
    355         Changed = true;
    356       }
    357     } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
    358       if (MI->getRawDest() == V) {
    359         MI->eraseFromParent();
    360         Changed = true;
    361       }
    362 
    363     } else if (Constant *C = dyn_cast<Constant>(U)) {
    364       // If we have a chain of dead constantexprs or other things dangling from
    365       // us, and if they are all dead, nuke them without remorse.
    366       if (SafeToDestroyConstant(C)) {
    367         C->destroyConstant();
    368         // This could have invalidated UI, start over from scratch.
    369         CleanupConstantGlobalUsers(V, Init, TD, TLI);
    370         return true;
    371       }
    372     }
    373   }
    374   return Changed;
    375 }
    376 
    377 /// isSafeSROAElementUse - Return true if the specified instruction is a safe
    378 /// user of a derived expression from a global that we want to SROA.
    379 static bool isSafeSROAElementUse(Value *V) {
    380   // We might have a dead and dangling constant hanging off of here.
    381   if (Constant *C = dyn_cast<Constant>(V))
    382     return SafeToDestroyConstant(C);
    383 
    384   Instruction *I = dyn_cast<Instruction>(V);
    385   if (!I) return false;
    386 
    387   // Loads are ok.
    388   if (isa<LoadInst>(I)) return true;
    389 
    390   // Stores *to* the pointer are ok.
    391   if (StoreInst *SI = dyn_cast<StoreInst>(I))
    392     return SI->getOperand(0) != V;
    393 
    394   // Otherwise, it must be a GEP.
    395   GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
    396   if (GEPI == 0) return false;
    397 
    398   if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
    399       !cast<Constant>(GEPI->getOperand(1))->isNullValue())
    400     return false;
    401 
    402   for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end();
    403        I != E; ++I)
    404     if (!isSafeSROAElementUse(*I))
    405       return false;
    406   return true;
    407 }
    408 
    409 
    410 /// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value.
    411 /// Look at it and its uses and decide whether it is safe to SROA this global.
    412 ///
    413 static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
    414   // The user of the global must be a GEP Inst or a ConstantExpr GEP.
    415   if (!isa<GetElementPtrInst>(U) &&
    416       (!isa<ConstantExpr>(U) ||
    417        cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
    418     return false;
    419 
    420   // Check to see if this ConstantExpr GEP is SRA'able.  In particular, we
    421   // don't like < 3 operand CE's, and we don't like non-constant integer
    422   // indices.  This enforces that all uses are 'gep GV, 0, C, ...' for some
    423   // value of C.
    424   if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
    425       !cast<Constant>(U->getOperand(1))->isNullValue() ||
    426       !isa<ConstantInt>(U->getOperand(2)))
    427     return false;
    428 
    429   gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
    430   ++GEPI;  // Skip over the pointer index.
    431 
    432   // If this is a use of an array allocation, do a bit more checking for sanity.
    433   if (ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) {
    434     uint64_t NumElements = AT->getNumElements();
    435     ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
    436 
    437     // Check to make sure that index falls within the array.  If not,
    438     // something funny is going on, so we won't do the optimization.
    439     //
    440     if (Idx->getZExtValue() >= NumElements)
    441       return false;
    442 
    443     // We cannot scalar repl this level of the array unless any array
    444     // sub-indices are in-range constants.  In particular, consider:
    445     // A[0][i].  We cannot know that the user isn't doing invalid things like
    446     // allowing i to index an out-of-range subscript that accesses A[1].
    447     //
    448     // Scalar replacing *just* the outer index of the array is probably not
    449     // going to be a win anyway, so just give up.
    450     for (++GEPI; // Skip array index.
    451          GEPI != E;
    452          ++GEPI) {
    453       uint64_t NumElements;
    454       if (ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI))
    455         NumElements = SubArrayTy->getNumElements();
    456       else if (VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI))
    457         NumElements = SubVectorTy->getNumElements();
    458       else {
    459         assert((*GEPI)->isStructTy() &&
    460                "Indexed GEP type is not array, vector, or struct!");
    461         continue;
    462       }
    463 
    464       ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
    465       if (!IdxVal || IdxVal->getZExtValue() >= NumElements)
    466         return false;
    467     }
    468   }
    469 
    470   for (Value::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I)
    471     if (!isSafeSROAElementUse(*I))
    472       return false;
    473   return true;
    474 }
    475 
    476 /// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it
    477 /// is safe for us to perform this transformation.
    478 ///
    479 static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
    480   for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end();
    481        UI != E; ++UI) {
    482     if (!IsUserOfGlobalSafeForSRA(*UI, GV))
    483       return false;
    484   }
    485   return true;
    486 }
    487 
    488 
    489 /// SRAGlobal - Perform scalar replacement of aggregates on the specified global
    490 /// variable.  This opens the door for other optimizations by exposing the
    491 /// behavior of the program in a more fine-grained way.  We have determined that
    492 /// this transformation is safe already.  We return the first global variable we
    493 /// insert so that the caller can reprocess it.
    494 static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
    495   // Make sure this global only has simple uses that we can SRA.
    496   if (!GlobalUsersSafeToSRA(GV))
    497     return 0;
    498 
    499   assert(GV->hasLocalLinkage() && !GV->isConstant());
    500   Constant *Init = GV->getInitializer();
    501   Type *Ty = Init->getType();
    502 
    503   std::vector<GlobalVariable*> NewGlobals;
    504   Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
    505 
    506   // Get the alignment of the global, either explicit or target-specific.
    507   unsigned StartAlignment = GV->getAlignment();
    508   if (StartAlignment == 0)
    509     StartAlignment = TD.getABITypeAlignment(GV->getType());
    510 
    511   if (StructType *STy = dyn_cast<StructType>(Ty)) {
    512     NewGlobals.reserve(STy->getNumElements());
    513     const StructLayout &Layout = *TD.getStructLayout(STy);
    514     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
    515       Constant *In = Init->getAggregateElement(i);
    516       assert(In && "Couldn't get element of initializer?");
    517       GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
    518                                                GlobalVariable::InternalLinkage,
    519                                                In, GV->getName()+"."+Twine(i),
    520                                                GV->isThreadLocal(),
    521                                               GV->getType()->getAddressSpace());
    522       Globals.insert(GV, NGV);
    523       NewGlobals.push_back(NGV);
    524 
    525       // Calculate the known alignment of the field.  If the original aggregate
    526       // had 256 byte alignment for example, something might depend on that:
    527       // propagate info to each field.
    528       uint64_t FieldOffset = Layout.getElementOffset(i);
    529       unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset);
    530       if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i)))
    531         NGV->setAlignment(NewAlign);
    532     }
    533   } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
    534     unsigned NumElements = 0;
    535     if (ArrayType *ATy = dyn_cast<ArrayType>(STy))
    536       NumElements = ATy->getNumElements();
    537     else
    538       NumElements = cast<VectorType>(STy)->getNumElements();
    539 
    540     if (NumElements > 16 && GV->hasNUsesOrMore(16))
    541       return 0; // It's not worth it.
    542     NewGlobals.reserve(NumElements);
    543 
    544     uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType());
    545     unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
    546     for (unsigned i = 0, e = NumElements; i != e; ++i) {
    547       Constant *In = Init->getAggregateElement(i);
    548       assert(In && "Couldn't get element of initializer?");
    549 
    550       GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
    551                                                GlobalVariable::InternalLinkage,
    552                                                In, GV->getName()+"."+Twine(i),
    553                                                GV->isThreadLocal(),
    554                                               GV->getType()->getAddressSpace());
    555       Globals.insert(GV, NGV);
    556       NewGlobals.push_back(NGV);
    557 
    558       // Calculate the known alignment of the field.  If the original aggregate
    559       // had 256 byte alignment for example, something might depend on that:
    560       // propagate info to each field.
    561       unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i);
    562       if (NewAlign > EltAlign)
    563         NGV->setAlignment(NewAlign);
    564     }
    565   }
    566 
    567   if (NewGlobals.empty())
    568     return 0;
    569 
    570   DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
    571 
    572   Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
    573 
    574   // Loop over all of the uses of the global, replacing the constantexpr geps,
    575   // with smaller constantexpr geps or direct references.
    576   while (!GV->use_empty()) {
    577     User *GEP = GV->use_back();
    578     assert(((isa<ConstantExpr>(GEP) &&
    579              cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)||
    580             isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!");
    581 
    582     // Ignore the 1th operand, which has to be zero or else the program is quite
    583     // broken (undefined).  Get the 2nd operand, which is the structure or array
    584     // index.
    585     unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
    586     if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access.
    587 
    588     Value *NewPtr = NewGlobals[Val];
    589 
    590     // Form a shorter GEP if needed.
    591     if (GEP->getNumOperands() > 3) {
    592       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) {
    593         SmallVector<Constant*, 8> Idxs;
    594         Idxs.push_back(NullInt);
    595         for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
    596           Idxs.push_back(CE->getOperand(i));
    597         NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), Idxs);
    598       } else {
    599         GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
    600         SmallVector<Value*, 8> Idxs;
    601         Idxs.push_back(NullInt);
    602         for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
    603           Idxs.push_back(GEPI->getOperand(i));
    604         NewPtr = GetElementPtrInst::Create(NewPtr, Idxs,
    605                                            GEPI->getName()+"."+Twine(Val),GEPI);
    606       }
    607     }
    608     GEP->replaceAllUsesWith(NewPtr);
    609 
    610     if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP))
    611       GEPI->eraseFromParent();
    612     else
    613       cast<ConstantExpr>(GEP)->destroyConstant();
    614   }
    615 
    616   // Delete the old global, now that it is dead.
    617   Globals.erase(GV);
    618   ++NumSRA;
    619 
    620   // Loop over the new globals array deleting any globals that are obviously
    621   // dead.  This can arise due to scalarization of a structure or an array that
    622   // has elements that are dead.
    623   unsigned FirstGlobal = 0;
    624   for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i)
    625     if (NewGlobals[i]->use_empty()) {
    626       Globals.erase(NewGlobals[i]);
    627       if (FirstGlobal == i) ++FirstGlobal;
    628     }
    629 
    630   return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : 0;
    631 }
    632 
    633 /// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
    634 /// value will trap if the value is dynamically null.  PHIs keeps track of any
    635 /// phi nodes we've seen to avoid reprocessing them.
    636 static bool AllUsesOfValueWillTrapIfNull(const Value *V,
    637                                          SmallPtrSet<const PHINode*, 8> &PHIs) {
    638   for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
    639        ++UI) {
    640     const User *U = *UI;
    641 
    642     if (isa<LoadInst>(U)) {
    643       // Will trap.
    644     } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
    645       if (SI->getOperand(0) == V) {
    646         //cerr << "NONTRAPPING USE: " << *U;
    647         return false;  // Storing the value.
    648       }
    649     } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
    650       if (CI->getCalledValue() != V) {
    651         //cerr << "NONTRAPPING USE: " << *U;
    652         return false;  // Not calling the ptr
    653       }
    654     } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
    655       if (II->getCalledValue() != V) {
    656         //cerr << "NONTRAPPING USE: " << *U;
    657         return false;  // Not calling the ptr
    658       }
    659     } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
    660       if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
    661     } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
    662       if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
    663     } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
    664       // If we've already seen this phi node, ignore it, it has already been
    665       // checked.
    666       if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
    667         return false;
    668     } else if (isa<ICmpInst>(U) &&
    669                isa<ConstantPointerNull>(UI->getOperand(1))) {
    670       // Ignore icmp X, null
    671     } else {
    672       //cerr << "NONTRAPPING USE: " << *U;
    673       return false;
    674     }
    675   }
    676   return true;
    677 }
    678 
    679 /// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads
    680 /// from GV will trap if the loaded value is null.  Note that this also permits
    681 /// comparisons of the loaded value against null, as a special case.
    682 static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
    683   for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
    684        UI != E; ++UI) {
    685     const User *U = *UI;
    686 
    687     if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
    688       SmallPtrSet<const PHINode*, 8> PHIs;
    689       if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
    690         return false;
    691     } else if (isa<StoreInst>(U)) {
    692       // Ignore stores to the global.
    693     } else {
    694       // We don't know or understand this user, bail out.
    695       //cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
    696       return false;
    697     }
    698   }
    699   return true;
    700 }
    701 
    702 static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
    703   bool Changed = false;
    704   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) {
    705     Instruction *I = cast<Instruction>(*UI++);
    706     if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
    707       LI->setOperand(0, NewV);
    708       Changed = true;
    709     } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
    710       if (SI->getOperand(1) == V) {
    711         SI->setOperand(1, NewV);
    712         Changed = true;
    713       }
    714     } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
    715       CallSite CS(I);
    716       if (CS.getCalledValue() == V) {
    717         // Calling through the pointer!  Turn into a direct call, but be careful
    718         // that the pointer is not also being passed as an argument.
    719         CS.setCalledFunction(NewV);
    720         Changed = true;
    721         bool PassedAsArg = false;
    722         for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
    723           if (CS.getArgument(i) == V) {
    724             PassedAsArg = true;
    725             CS.setArgument(i, NewV);
    726           }
    727 
    728         if (PassedAsArg) {
    729           // Being passed as an argument also.  Be careful to not invalidate UI!
    730           UI = V->use_begin();
    731         }
    732       }
    733     } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
    734       Changed |= OptimizeAwayTrappingUsesOfValue(CI,
    735                                 ConstantExpr::getCast(CI->getOpcode(),
    736                                                       NewV, CI->getType()));
    737       if (CI->use_empty()) {
    738         Changed = true;
    739         CI->eraseFromParent();
    740       }
    741     } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
    742       // Should handle GEP here.
    743       SmallVector<Constant*, 8> Idxs;
    744       Idxs.reserve(GEPI->getNumOperands()-1);
    745       for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
    746            i != e; ++i)
    747         if (Constant *C = dyn_cast<Constant>(*i))
    748           Idxs.push_back(C);
    749         else
    750           break;
    751       if (Idxs.size() == GEPI->getNumOperands()-1)
    752         Changed |= OptimizeAwayTrappingUsesOfValue(GEPI,
    753                           ConstantExpr::getGetElementPtr(NewV, Idxs));
    754       if (GEPI->use_empty()) {
    755         Changed = true;
    756         GEPI->eraseFromParent();
    757       }
    758     }
    759   }
    760 
    761   return Changed;
    762 }
    763 
    764 
    765 /// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null
    766 /// value stored into it.  If there are uses of the loaded value that would trap
    767 /// if the loaded value is dynamically null, then we know that they cannot be
    768 /// reachable with a null optimize away the load.
    769 static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
    770                                             TargetData *TD,
    771                                             TargetLibraryInfo *TLI) {
    772   bool Changed = false;
    773 
    774   // Keep track of whether we are able to remove all the uses of the global
    775   // other than the store that defines it.
    776   bool AllNonStoreUsesGone = true;
    777 
    778   // Replace all uses of loads with uses of uses of the stored value.
    779   for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){
    780     User *GlobalUser = *GUI++;
    781     if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
    782       Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
    783       // If we were able to delete all uses of the loads
    784       if (LI->use_empty()) {
    785         LI->eraseFromParent();
    786         Changed = true;
    787       } else {
    788         AllNonStoreUsesGone = false;
    789       }
    790     } else if (isa<StoreInst>(GlobalUser)) {
    791       // Ignore the store that stores "LV" to the global.
    792       assert(GlobalUser->getOperand(1) == GV &&
    793              "Must be storing *to* the global");
    794     } else {
    795       AllNonStoreUsesGone = false;
    796 
    797       // If we get here we could have other crazy uses that are transitively
    798       // loaded.
    799       assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
    800               isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser)) &&
    801              "Only expect load and stores!");
    802     }
    803   }
    804 
    805   if (Changed) {
    806     DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV);
    807     ++NumGlobUses;
    808   }
    809 
    810   // If we nuked all of the loads, then none of the stores are needed either,
    811   // nor is the global.
    812   if (AllNonStoreUsesGone) {
    813     DEBUG(dbgs() << "  *** GLOBAL NOW DEAD!\n");
    814     CleanupConstantGlobalUsers(GV, 0, TD, TLI);
    815     if (GV->use_empty()) {
    816       GV->eraseFromParent();
    817       ++NumDeleted;
    818     }
    819     Changed = true;
    820   }
    821   return Changed;
    822 }
    823 
    824 /// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
    825 /// instructions that are foldable.
    826 static void ConstantPropUsersOf(Value *V,
    827                                 TargetData *TD, TargetLibraryInfo *TLI) {
    828   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; )
    829     if (Instruction *I = dyn_cast<Instruction>(*UI++))
    830       if (Constant *NewC = ConstantFoldInstruction(I, TD, TLI)) {
    831         I->replaceAllUsesWith(NewC);
    832 
    833         // Advance UI to the next non-I use to avoid invalidating it!
    834         // Instructions could multiply use V.
    835         while (UI != E && *UI == I)
    836           ++UI;
    837         I->eraseFromParent();
    838       }
    839 }
    840 
    841 /// OptimizeGlobalAddressOfMalloc - This function takes the specified global
    842 /// variable, and transforms the program as if it always contained the result of
    843 /// the specified malloc.  Because it is always the result of the specified
    844 /// malloc, there is no reason to actually DO the malloc.  Instead, turn the
    845 /// malloc into a global, and any loads of GV as uses of the new global.
    846 static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
    847                                                      CallInst *CI,
    848                                                      Type *AllocTy,
    849                                                      ConstantInt *NElements,
    850                                                      TargetData *TD,
    851                                                      TargetLibraryInfo *TLI) {
    852   DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI << '\n');
    853 
    854   Type *GlobalType;
    855   if (NElements->getZExtValue() == 1)
    856     GlobalType = AllocTy;
    857   else
    858     // If we have an array allocation, the global variable is of an array.
    859     GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue());
    860 
    861   // Create the new global variable.  The contents of the malloc'd memory is
    862   // undefined, so initialize with an undef value.
    863   GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
    864                                              GlobalType, false,
    865                                              GlobalValue::InternalLinkage,
    866                                              UndefValue::get(GlobalType),
    867                                              GV->getName()+".body",
    868                                              GV,
    869                                              GV->isThreadLocal());
    870 
    871   // If there are bitcast users of the malloc (which is typical, usually we have
    872   // a malloc + bitcast) then replace them with uses of the new global.  Update
    873   // other users to use the global as well.
    874   BitCastInst *TheBC = 0;
    875   while (!CI->use_empty()) {
    876     Instruction *User = cast<Instruction>(CI->use_back());
    877     if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
    878       if (BCI->getType() == NewGV->getType()) {
    879         BCI->replaceAllUsesWith(NewGV);
    880         BCI->eraseFromParent();
    881       } else {
    882         BCI->setOperand(0, NewGV);
    883       }
    884     } else {
    885       if (TheBC == 0)
    886         TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
    887       User->replaceUsesOfWith(CI, TheBC);
    888     }
    889   }
    890 
    891   Constant *RepValue = NewGV;
    892   if (NewGV->getType() != GV->getType()->getElementType())
    893     RepValue = ConstantExpr::getBitCast(RepValue,
    894                                         GV->getType()->getElementType());
    895 
    896   // If there is a comparison against null, we will insert a global bool to
    897   // keep track of whether the global was initialized yet or not.
    898   GlobalVariable *InitBool =
    899     new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
    900                        GlobalValue::InternalLinkage,
    901                        ConstantInt::getFalse(GV->getContext()),
    902                        GV->getName()+".init", GV->isThreadLocal());
    903   bool InitBoolUsed = false;
    904 
    905   // Loop over all uses of GV, processing them in turn.
    906   while (!GV->use_empty()) {
    907     if (StoreInst *SI = dyn_cast<StoreInst>(GV->use_back())) {
    908       // The global is initialized when the store to it occurs.
    909       new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0,
    910                     SI->getOrdering(), SI->getSynchScope(), SI);
    911       SI->eraseFromParent();
    912       continue;
    913     }
    914 
    915     LoadInst *LI = cast<LoadInst>(GV->use_back());
    916     while (!LI->use_empty()) {
    917       Use &LoadUse = LI->use_begin().getUse();
    918       if (!isa<ICmpInst>(LoadUse.getUser())) {
    919         LoadUse = RepValue;
    920         continue;
    921       }
    922 
    923       ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
    924       // Replace the cmp X, 0 with a use of the bool value.
    925       // Sink the load to where the compare was, if atomic rules allow us to.
    926       Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0,
    927                                LI->getOrdering(), LI->getSynchScope(),
    928                                LI->isUnordered() ? (Instruction*)ICI : LI);
    929       InitBoolUsed = true;
    930       switch (ICI->getPredicate()) {
    931       default: llvm_unreachable("Unknown ICmp Predicate!");
    932       case ICmpInst::ICMP_ULT:
    933       case ICmpInst::ICMP_SLT:   // X < null -> always false
    934         LV = ConstantInt::getFalse(GV->getContext());
    935         break;
    936       case ICmpInst::ICMP_ULE:
    937       case ICmpInst::ICMP_SLE:
    938       case ICmpInst::ICMP_EQ:
    939         LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
    940         break;
    941       case ICmpInst::ICMP_NE:
    942       case ICmpInst::ICMP_UGE:
    943       case ICmpInst::ICMP_SGE:
    944       case ICmpInst::ICMP_UGT:
    945       case ICmpInst::ICMP_SGT:
    946         break;  // no change.
    947       }
    948       ICI->replaceAllUsesWith(LV);
    949       ICI->eraseFromParent();
    950     }
    951     LI->eraseFromParent();
    952   }
    953 
    954   // If the initialization boolean was used, insert it, otherwise delete it.
    955   if (!InitBoolUsed) {
    956     while (!InitBool->use_empty())  // Delete initializations
    957       cast<StoreInst>(InitBool->use_back())->eraseFromParent();
    958     delete InitBool;
    959   } else
    960     GV->getParent()->getGlobalList().insert(GV, InitBool);
    961 
    962   // Now the GV is dead, nuke it and the malloc..
    963   GV->eraseFromParent();
    964   CI->eraseFromParent();
    965 
    966   // To further other optimizations, loop over all users of NewGV and try to
    967   // constant prop them.  This will promote GEP instructions with constant
    968   // indices into GEP constant-exprs, which will allow global-opt to hack on it.
    969   ConstantPropUsersOf(NewGV, TD, TLI);
    970   if (RepValue != NewGV)
    971     ConstantPropUsersOf(RepValue, TD, TLI);
    972 
    973   return NewGV;
    974 }
    975 
    976 /// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking
    977 /// to make sure that there are no complex uses of V.  We permit simple things
    978 /// like dereferencing the pointer, but not storing through the address, unless
    979 /// it is to the specified global.
    980 static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
    981                                                       const GlobalVariable *GV,
    982                                          SmallPtrSet<const PHINode*, 8> &PHIs) {
    983   for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
    984        UI != E; ++UI) {
    985     const Instruction *Inst = cast<Instruction>(*UI);
    986 
    987     if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
    988       continue; // Fine, ignore.
    989     }
    990 
    991     if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
    992       if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
    993         return false;  // Storing the pointer itself... bad.
    994       continue; // Otherwise, storing through it, or storing into GV... fine.
    995     }
    996 
    997     // Must index into the array and into the struct.
    998     if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
    999       if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
   1000         return false;
   1001       continue;
   1002     }
   1003 
   1004     if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
   1005       // PHIs are ok if all uses are ok.  Don't infinitely recurse through PHI
   1006       // cycles.
   1007       if (PHIs.insert(PN))
   1008         if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs))
   1009           return false;
   1010       continue;
   1011     }
   1012 
   1013     if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
   1014       if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
   1015         return false;
   1016       continue;
   1017     }
   1018 
   1019     return false;
   1020   }
   1021   return true;
   1022 }
   1023 
   1024 /// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV
   1025 /// somewhere.  Transform all uses of the allocation into loads from the
   1026 /// global and uses of the resultant pointer.  Further, delete the store into
   1027 /// GV.  This assumes that these value pass the
   1028 /// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
   1029 static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
   1030                                           GlobalVariable *GV) {
   1031   while (!Alloc->use_empty()) {
   1032     Instruction *U = cast<Instruction>(*Alloc->use_begin());
   1033     Instruction *InsertPt = U;
   1034     if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
   1035       // If this is the store of the allocation into the global, remove it.
   1036       if (SI->getOperand(1) == GV) {
   1037         SI->eraseFromParent();
   1038         continue;
   1039       }
   1040     } else if (PHINode *PN = dyn_cast<PHINode>(U)) {
   1041       // Insert the load in the corresponding predecessor, not right before the
   1042       // PHI.
   1043       InsertPt = PN->getIncomingBlock(Alloc->use_begin())->getTerminator();
   1044     } else if (isa<BitCastInst>(U)) {
   1045       // Must be bitcast between the malloc and store to initialize the global.
   1046       ReplaceUsesOfMallocWithGlobal(U, GV);
   1047       U->eraseFromParent();
   1048       continue;
   1049     } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
   1050       // If this is a "GEP bitcast" and the user is a store to the global, then
   1051       // just process it as a bitcast.
   1052       if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse())
   1053         if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->use_back()))
   1054           if (SI->getOperand(1) == GV) {
   1055             // Must be bitcast GEP between the malloc and store to initialize
   1056             // the global.
   1057             ReplaceUsesOfMallocWithGlobal(GEPI, GV);
   1058             GEPI->eraseFromParent();
   1059             continue;
   1060           }
   1061     }
   1062 
   1063     // Insert a load from the global, and use it instead of the malloc.
   1064     Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
   1065     U->replaceUsesOfWith(Alloc, NL);
   1066   }
   1067 }
   1068 
   1069 /// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi
   1070 /// of a load) are simple enough to perform heap SRA on.  This permits GEP's
   1071 /// that index through the array and struct field, icmps of null, and PHIs.
   1072 static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
   1073                         SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs,
   1074                         SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) {
   1075   // We permit two users of the load: setcc comparing against the null
   1076   // pointer, and a getelementptr of a specific form.
   1077   for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
   1078        ++UI) {
   1079     const Instruction *User = cast<Instruction>(*UI);
   1080 
   1081     // Comparison against null is ok.
   1082     if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) {
   1083       if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
   1084         return false;
   1085       continue;
   1086     }
   1087 
   1088     // getelementptr is also ok, but only a simple form.
   1089     if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
   1090       // Must index into the array and into the struct.
   1091       if (GEPI->getNumOperands() < 3)
   1092         return false;
   1093 
   1094       // Otherwise the GEP is ok.
   1095       continue;
   1096     }
   1097 
   1098     if (const PHINode *PN = dyn_cast<PHINode>(User)) {
   1099       if (!LoadUsingPHIsPerLoad.insert(PN))
   1100         // This means some phi nodes are dependent on each other.
   1101         // Avoid infinite looping!
   1102         return false;
   1103       if (!LoadUsingPHIs.insert(PN))
   1104         // If we have already analyzed this PHI, then it is safe.
   1105         continue;
   1106 
   1107       // Make sure all uses of the PHI are simple enough to transform.
   1108       if (!LoadUsesSimpleEnoughForHeapSRA(PN,
   1109                                           LoadUsingPHIs, LoadUsingPHIsPerLoad))
   1110         return false;
   1111 
   1112       continue;
   1113     }
   1114 
   1115     // Otherwise we don't know what this is, not ok.
   1116     return false;
   1117   }
   1118 
   1119   return true;
   1120 }
   1121 
   1122 
   1123 /// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
   1124 /// GV are simple enough to perform HeapSRA, return true.
   1125 static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
   1126                                                     Instruction *StoredVal) {
   1127   SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
   1128   SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad;
   1129   for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
   1130        UI != E; ++UI)
   1131     if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
   1132       if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
   1133                                           LoadUsingPHIsPerLoad))
   1134         return false;
   1135       LoadUsingPHIsPerLoad.clear();
   1136     }
   1137 
   1138   // If we reach here, we know that all uses of the loads and transitive uses
   1139   // (through PHI nodes) are simple enough to transform.  However, we don't know
   1140   // that all inputs the to the PHI nodes are in the same equivalence sets.
   1141   // Check to verify that all operands of the PHIs are either PHIS that can be
   1142   // transformed, loads from GV, or MI itself.
   1143   for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin()
   1144        , E = LoadUsingPHIs.end(); I != E; ++I) {
   1145     const PHINode *PN = *I;
   1146     for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
   1147       Value *InVal = PN->getIncomingValue(op);
   1148 
   1149       // PHI of the stored value itself is ok.
   1150       if (InVal == StoredVal) continue;
   1151 
   1152       if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
   1153         // One of the PHIs in our set is (optimistically) ok.
   1154         if (LoadUsingPHIs.count(InPN))
   1155           continue;
   1156         return false;
   1157       }
   1158 
   1159       // Load from GV is ok.
   1160       if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
   1161         if (LI->getOperand(0) == GV)
   1162           continue;
   1163 
   1164       // UNDEF? NULL?
   1165 
   1166       // Anything else is rejected.
   1167       return false;
   1168     }
   1169   }
   1170 
   1171   return true;
   1172 }
   1173 
   1174 static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
   1175                DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
   1176                    std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
   1177   std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
   1178 
   1179   if (FieldNo >= FieldVals.size())
   1180     FieldVals.resize(FieldNo+1);
   1181 
   1182   // If we already have this value, just reuse the previously scalarized
   1183   // version.
   1184   if (Value *FieldVal = FieldVals[FieldNo])
   1185     return FieldVal;
   1186 
   1187   // Depending on what instruction this is, we have several cases.
   1188   Value *Result;
   1189   if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
   1190     // This is a scalarized version of the load from the global.  Just create
   1191     // a new Load of the scalarized global.
   1192     Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
   1193                                            InsertedScalarizedValues,
   1194                                            PHIsToRewrite),
   1195                           LI->getName()+".f"+Twine(FieldNo), LI);
   1196   } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
   1197     // PN's type is pointer to struct.  Make a new PHI of pointer to struct
   1198     // field.
   1199     StructType *ST =
   1200       cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
   1201 
   1202     PHINode *NewPN =
   1203      PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
   1204                      PN->getNumIncomingValues(),
   1205                      PN->getName()+".f"+Twine(FieldNo), PN);
   1206     Result = NewPN;
   1207     PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
   1208   } else {
   1209     llvm_unreachable("Unknown usable value");
   1210   }
   1211 
   1212   return FieldVals[FieldNo] = Result;
   1213 }
   1214 
   1215 /// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
   1216 /// the load, rewrite the derived value to use the HeapSRoA'd load.
   1217 static void RewriteHeapSROALoadUser(Instruction *LoadUser,
   1218              DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
   1219                    std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
   1220   // If this is a comparison against null, handle it.
   1221   if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
   1222     assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
   1223     // If we have a setcc of the loaded pointer, we can use a setcc of any
   1224     // field.
   1225     Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
   1226                                    InsertedScalarizedValues, PHIsToRewrite);
   1227 
   1228     Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
   1229                               Constant::getNullValue(NPtr->getType()),
   1230                               SCI->getName());
   1231     SCI->replaceAllUsesWith(New);
   1232     SCI->eraseFromParent();
   1233     return;
   1234   }
   1235 
   1236   // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
   1237   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
   1238     assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
   1239            && "Unexpected GEPI!");
   1240 
   1241     // Load the pointer for this field.
   1242     unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
   1243     Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
   1244                                      InsertedScalarizedValues, PHIsToRewrite);
   1245 
   1246     // Create the new GEP idx vector.
   1247     SmallVector<Value*, 8> GEPIdx;
   1248     GEPIdx.push_back(GEPI->getOperand(1));
   1249     GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
   1250 
   1251     Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx,
   1252                                              GEPI->getName(), GEPI);
   1253     GEPI->replaceAllUsesWith(NGEPI);
   1254     GEPI->eraseFromParent();
   1255     return;
   1256   }
   1257 
   1258   // Recursively transform the users of PHI nodes.  This will lazily create the
   1259   // PHIs that are needed for individual elements.  Keep track of what PHIs we
   1260   // see in InsertedScalarizedValues so that we don't get infinite loops (very
   1261   // antisocial).  If the PHI is already in InsertedScalarizedValues, it has
   1262   // already been seen first by another load, so its uses have already been
   1263   // processed.
   1264   PHINode *PN = cast<PHINode>(LoadUser);
   1265   if (!InsertedScalarizedValues.insert(std::make_pair(PN,
   1266                                               std::vector<Value*>())).second)
   1267     return;
   1268 
   1269   // If this is the first time we've seen this PHI, recursively process all
   1270   // users.
   1271   for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) {
   1272     Instruction *User = cast<Instruction>(*UI++);
   1273     RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
   1274   }
   1275 }
   1276 
   1277 /// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global.  Ptr
   1278 /// is a value loaded from the global.  Eliminate all uses of Ptr, making them
   1279 /// use FieldGlobals instead.  All uses of loaded values satisfy
   1280 /// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
   1281 static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
   1282                DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
   1283                    std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
   1284   for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();
   1285        UI != E; ) {
   1286     Instruction *User = cast<Instruction>(*UI++);
   1287     RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
   1288   }
   1289 
   1290   if (Load->use_empty()) {
   1291     Load->eraseFromParent();
   1292     InsertedScalarizedValues.erase(Load);
   1293   }
   1294 }
   1295 
   1296 /// PerformHeapAllocSRoA - CI is an allocation of an array of structures.  Break
   1297 /// it up into multiple allocations of arrays of the fields.
   1298 static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
   1299                                             Value *NElems, TargetData *TD) {
   1300   DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *CI << '\n');
   1301   Type *MAT = getMallocAllocatedType(CI);
   1302   StructType *STy = cast<StructType>(MAT);
   1303 
   1304   // There is guaranteed to be at least one use of the malloc (storing
   1305   // it into GV).  If there are other uses, change them to be uses of
   1306   // the global to simplify later code.  This also deletes the store
   1307   // into GV.
   1308   ReplaceUsesOfMallocWithGlobal(CI, GV);
   1309 
   1310   // Okay, at this point, there are no users of the malloc.  Insert N
   1311   // new mallocs at the same place as CI, and N globals.
   1312   std::vector<Value*> FieldGlobals;
   1313   std::vector<Value*> FieldMallocs;
   1314 
   1315   for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
   1316     Type *FieldTy = STy->getElementType(FieldNo);
   1317     PointerType *PFieldTy = PointerType::getUnqual(FieldTy);
   1318 
   1319     GlobalVariable *NGV =
   1320       new GlobalVariable(*GV->getParent(),
   1321                          PFieldTy, false, GlobalValue::InternalLinkage,
   1322                          Constant::getNullValue(PFieldTy),
   1323                          GV->getName() + ".f" + Twine(FieldNo), GV,
   1324                          GV->isThreadLocal());
   1325     FieldGlobals.push_back(NGV);
   1326 
   1327     unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
   1328     if (StructType *ST = dyn_cast<StructType>(FieldTy))
   1329       TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
   1330     Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
   1331     Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
   1332                                         ConstantInt::get(IntPtrTy, TypeSize),
   1333                                         NElems, 0,
   1334                                         CI->getName() + ".f" + Twine(FieldNo));
   1335     FieldMallocs.push_back(NMI);
   1336     new StoreInst(NMI, NGV, CI);
   1337   }
   1338 
   1339   // The tricky aspect of this transformation is handling the case when malloc
   1340   // fails.  In the original code, malloc failing would set the result pointer
   1341   // of malloc to null.  In this case, some mallocs could succeed and others
   1342   // could fail.  As such, we emit code that looks like this:
   1343   //    F0 = malloc(field0)
   1344   //    F1 = malloc(field1)
   1345   //    F2 = malloc(field2)
   1346   //    if (F0 == 0 || F1 == 0 || F2 == 0) {
   1347   //      if (F0) { free(F0); F0 = 0; }
   1348   //      if (F1) { free(F1); F1 = 0; }
   1349   //      if (F2) { free(F2); F2 = 0; }
   1350   //    }
   1351   // The malloc can also fail if its argument is too large.
   1352   Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0);
   1353   Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0),
   1354                                   ConstantZero, "isneg");
   1355   for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
   1356     Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
   1357                              Constant::getNullValue(FieldMallocs[i]->getType()),
   1358                                "isnull");
   1359     RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
   1360   }
   1361 
   1362   // Split the basic block at the old malloc.
   1363   BasicBlock *OrigBB = CI->getParent();
   1364   BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
   1365 
   1366   // Create the block to check the first condition.  Put all these blocks at the
   1367   // end of the function as they are unlikely to be executed.
   1368   BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
   1369                                                 "malloc_ret_null",
   1370                                                 OrigBB->getParent());
   1371 
   1372   // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
   1373   // branch on RunningOr.
   1374   OrigBB->getTerminator()->eraseFromParent();
   1375   BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
   1376 
   1377   // Within the NullPtrBlock, we need to emit a comparison and branch for each
   1378   // pointer, because some may be null while others are not.
   1379   for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
   1380     Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
   1381     Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
   1382                               Constant::getNullValue(GVVal->getType()));
   1383     BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
   1384                                                OrigBB->getParent());
   1385     BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
   1386                                                OrigBB->getParent());
   1387     Instruction *BI = BranchInst::Create(FreeBlock, NextBlock,
   1388                                          Cmp, NullPtrBlock);
   1389 
   1390     // Fill in FreeBlock.
   1391     CallInst::CreateFree(GVVal, BI);
   1392     new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
   1393                   FreeBlock);
   1394     BranchInst::Create(NextBlock, FreeBlock);
   1395 
   1396     NullPtrBlock = NextBlock;
   1397   }
   1398 
   1399   BranchInst::Create(ContBB, NullPtrBlock);
   1400 
   1401   // CI is no longer needed, remove it.
   1402   CI->eraseFromParent();
   1403 
   1404   /// InsertedScalarizedLoads - As we process loads, if we can't immediately
   1405   /// update all uses of the load, keep track of what scalarized loads are
   1406   /// inserted for a given load.
   1407   DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
   1408   InsertedScalarizedValues[GV] = FieldGlobals;
   1409 
   1410   std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
   1411 
   1412   // Okay, the malloc site is completely handled.  All of the uses of GV are now
   1413   // loads, and all uses of those loads are simple.  Rewrite them to use loads
   1414   // of the per-field globals instead.
   1415   for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) {
   1416     Instruction *User = cast<Instruction>(*UI++);
   1417 
   1418     if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
   1419       RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
   1420       continue;
   1421     }
   1422 
   1423     // Must be a store of null.
   1424     StoreInst *SI = cast<StoreInst>(User);
   1425     assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
   1426            "Unexpected heap-sra user!");
   1427 
   1428     // Insert a store of null into each global.
   1429     for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
   1430       PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
   1431       Constant *Null = Constant::getNullValue(PT->getElementType());
   1432       new StoreInst(Null, FieldGlobals[i], SI);
   1433     }
   1434     // Erase the original store.
   1435     SI->eraseFromParent();
   1436   }
   1437 
   1438   // While we have PHIs that are interesting to rewrite, do it.
   1439   while (!PHIsToRewrite.empty()) {
   1440     PHINode *PN = PHIsToRewrite.back().first;
   1441     unsigned FieldNo = PHIsToRewrite.back().second;
   1442     PHIsToRewrite.pop_back();
   1443     PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
   1444     assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
   1445 
   1446     // Add all the incoming values.  This can materialize more phis.
   1447     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
   1448       Value *InVal = PN->getIncomingValue(i);
   1449       InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
   1450                                PHIsToRewrite);
   1451       FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
   1452     }
   1453   }
   1454 
   1455   // Drop all inter-phi links and any loads that made it this far.
   1456   for (DenseMap<Value*, std::vector<Value*> >::iterator
   1457        I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
   1458        I != E; ++I) {
   1459     if (PHINode *PN = dyn_cast<PHINode>(I->first))
   1460       PN->dropAllReferences();
   1461     else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
   1462       LI->dropAllReferences();
   1463   }
   1464 
   1465   // Delete all the phis and loads now that inter-references are dead.
   1466   for (DenseMap<Value*, std::vector<Value*> >::iterator
   1467        I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
   1468        I != E; ++I) {
   1469     if (PHINode *PN = dyn_cast<PHINode>(I->first))
   1470       PN->eraseFromParent();
   1471     else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
   1472       LI->eraseFromParent();
   1473   }
   1474 
   1475   // The old global is now dead, remove it.
   1476   GV->eraseFromParent();
   1477 
   1478   ++NumHeapSRA;
   1479   return cast<GlobalVariable>(FieldGlobals[0]);
   1480 }
   1481 
   1482 /// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
   1483 /// pointer global variable with a single value stored it that is a malloc or
   1484 /// cast of malloc.
   1485 static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
   1486                                                CallInst *CI,
   1487                                                Type *AllocTy,
   1488                                                AtomicOrdering Ordering,
   1489                                                Module::global_iterator &GVI,
   1490                                                TargetData *TD,
   1491                                                TargetLibraryInfo *TLI) {
   1492   if (!TD)
   1493     return false;
   1494 
   1495   // If this is a malloc of an abstract type, don't touch it.
   1496   if (!AllocTy->isSized())
   1497     return false;
   1498 
   1499   // We can't optimize this global unless all uses of it are *known* to be
   1500   // of the malloc value, not of the null initializer value (consider a use
   1501   // that compares the global's value against zero to see if the malloc has
   1502   // been reached).  To do this, we check to see if all uses of the global
   1503   // would trap if the global were null: this proves that they must all
   1504   // happen after the malloc.
   1505   if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
   1506     return false;
   1507 
   1508   // We can't optimize this if the malloc itself is used in a complex way,
   1509   // for example, being stored into multiple globals.  This allows the
   1510   // malloc to be stored into the specified global, loaded icmp'd, and
   1511   // GEP'd.  These are all things we could transform to using the global
   1512   // for.
   1513   SmallPtrSet<const PHINode*, 8> PHIs;
   1514   if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
   1515     return false;
   1516 
   1517   // If we have a global that is only initialized with a fixed size malloc,
   1518   // transform the program to use global memory instead of malloc'd memory.
   1519   // This eliminates dynamic allocation, avoids an indirection accessing the
   1520   // data, and exposes the resultant global to further GlobalOpt.
   1521   // We cannot optimize the malloc if we cannot determine malloc array size.
   1522   Value *NElems = getMallocArraySize(CI, TD, true);
   1523   if (!NElems)
   1524     return false;
   1525 
   1526   if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
   1527     // Restrict this transformation to only working on small allocations
   1528     // (2048 bytes currently), as we don't want to introduce a 16M global or
   1529     // something.
   1530     if (NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
   1531       GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD, TLI);
   1532       return true;
   1533     }
   1534 
   1535   // If the allocation is an array of structures, consider transforming this
   1536   // into multiple malloc'd arrays, one for each field.  This is basically
   1537   // SRoA for malloc'd memory.
   1538 
   1539   if (Ordering != NotAtomic)
   1540     return false;
   1541 
   1542   // If this is an allocation of a fixed size array of structs, analyze as a
   1543   // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
   1544   if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
   1545     if (ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
   1546       AllocTy = AT->getElementType();
   1547 
   1548   StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
   1549   if (!AllocSTy)
   1550     return false;
   1551 
   1552   // This the structure has an unreasonable number of fields, leave it
   1553   // alone.
   1554   if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
   1555       AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
   1556 
   1557     // If this is a fixed size array, transform the Malloc to be an alloc of
   1558     // structs.  malloc [100 x struct],1 -> malloc struct, 100
   1559     if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
   1560       Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
   1561       unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
   1562       Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
   1563       Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
   1564       Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
   1565                                                    AllocSize, NumElements,
   1566                                                    0, CI->getName());
   1567       Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
   1568       CI->replaceAllUsesWith(Cast);
   1569       CI->eraseFromParent();
   1570       CI = dyn_cast<BitCastInst>(Malloc) ?
   1571         extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc);
   1572     }
   1573 
   1574     GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true), TD);
   1575     return true;
   1576   }
   1577 
   1578   return false;
   1579 }
   1580 
   1581 // OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
   1582 // that only one value (besides its initializer) is ever stored to the global.
   1583 static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
   1584                                      AtomicOrdering Ordering,
   1585                                      Module::global_iterator &GVI,
   1586                                      TargetData *TD, TargetLibraryInfo *TLI) {
   1587   // Ignore no-op GEPs and bitcasts.
   1588   StoredOnceVal = StoredOnceVal->stripPointerCasts();
   1589 
   1590   // If we are dealing with a pointer global that is initialized to null and
   1591   // only has one (non-null) value stored into it, then we can optimize any
   1592   // users of the loaded value (often calls and loads) that would trap if the
   1593   // value was null.
   1594   if (GV->getInitializer()->getType()->isPointerTy() &&
   1595       GV->getInitializer()->isNullValue()) {
   1596     if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
   1597       if (GV->getInitializer()->getType() != SOVC->getType())
   1598         SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
   1599 
   1600       // Optimize away any trapping uses of the loaded value.
   1601       if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, TD, TLI))
   1602         return true;
   1603     } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
   1604       Type *MallocType = getMallocAllocatedType(CI);
   1605       if (MallocType &&
   1606           TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, GVI,
   1607                                              TD, TLI))
   1608         return true;
   1609     }
   1610   }
   1611 
   1612   return false;
   1613 }
   1614 
   1615 /// TryToShrinkGlobalToBoolean - At this point, we have learned that the only
   1616 /// two values ever stored into GV are its initializer and OtherVal.  See if we
   1617 /// can shrink the global into a boolean and select between the two values
   1618 /// whenever it is used.  This exposes the values to other scalar optimizations.
   1619 static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
   1620   Type *GVElType = GV->getType()->getElementType();
   1621 
   1622   // If GVElType is already i1, it is already shrunk.  If the type of the GV is
   1623   // an FP value, pointer or vector, don't do this optimization because a select
   1624   // between them is very expensive and unlikely to lead to later
   1625   // simplification.  In these cases, we typically end up with "cond ? v1 : v2"
   1626   // where v1 and v2 both require constant pool loads, a big loss.
   1627   if (GVElType == Type::getInt1Ty(GV->getContext()) ||
   1628       GVElType->isFloatingPointTy() ||
   1629       GVElType->isPointerTy() || GVElType->isVectorTy())
   1630     return false;
   1631 
   1632   // Walk the use list of the global seeing if all the uses are load or store.
   1633   // If there is anything else, bail out.
   1634   for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){
   1635     User *U = *I;
   1636     if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
   1637       return false;
   1638   }
   1639 
   1640   DEBUG(dbgs() << "   *** SHRINKING TO BOOL: " << *GV);
   1641 
   1642   // Create the new global, initializing it to false.
   1643   GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
   1644                                              false,
   1645                                              GlobalValue::InternalLinkage,
   1646                                         ConstantInt::getFalse(GV->getContext()),
   1647                                              GV->getName()+".b",
   1648                                              GV->isThreadLocal());
   1649   GV->getParent()->getGlobalList().insert(GV, NewGV);
   1650 
   1651   Constant *InitVal = GV->getInitializer();
   1652   assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
   1653          "No reason to shrink to bool!");
   1654 
   1655   // If initialized to zero and storing one into the global, we can use a cast
   1656   // instead of a select to synthesize the desired value.
   1657   bool IsOneZero = false;
   1658   if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal))
   1659     IsOneZero = InitVal->isNullValue() && CI->isOne();
   1660 
   1661   while (!GV->use_empty()) {
   1662     Instruction *UI = cast<Instruction>(GV->use_back());
   1663     if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
   1664       // Change the store into a boolean store.
   1665       bool StoringOther = SI->getOperand(0) == OtherVal;
   1666       // Only do this if we weren't storing a loaded value.
   1667       Value *StoreVal;
   1668       if (StoringOther || SI->getOperand(0) == InitVal)
   1669         StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
   1670                                     StoringOther);
   1671       else {
   1672         // Otherwise, we are storing a previously loaded copy.  To do this,
   1673         // change the copy from copying the original value to just copying the
   1674         // bool.
   1675         Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
   1676 
   1677         // If we've already replaced the input, StoredVal will be a cast or
   1678         // select instruction.  If not, it will be a load of the original
   1679         // global.
   1680         if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
   1681           assert(LI->getOperand(0) == GV && "Not a copy!");
   1682           // Insert a new load, to preserve the saved value.
   1683           StoreVal = new LoadInst(NewGV, LI->getName()+".b", false, 0,
   1684                                   LI->getOrdering(), LI->getSynchScope(), LI);
   1685         } else {
   1686           assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
   1687                  "This is not a form that we understand!");
   1688           StoreVal = StoredVal->getOperand(0);
   1689           assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
   1690         }
   1691       }
   1692       new StoreInst(StoreVal, NewGV, false, 0,
   1693                     SI->getOrdering(), SI->getSynchScope(), SI);
   1694     } else {
   1695       // Change the load into a load of bool then a select.
   1696       LoadInst *LI = cast<LoadInst>(UI);
   1697       LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0,
   1698                                    LI->getOrdering(), LI->getSynchScope(), LI);
   1699       Value *NSI;
   1700       if (IsOneZero)
   1701         NSI = new ZExtInst(NLI, LI->getType(), "", LI);
   1702       else
   1703         NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
   1704       NSI->takeName(LI);
   1705       LI->replaceAllUsesWith(NSI);
   1706     }
   1707     UI->eraseFromParent();
   1708   }
   1709 
   1710   GV->eraseFromParent();
   1711   return true;
   1712 }
   1713 
   1714 
   1715 /// ProcessGlobal - Analyze the specified global variable and optimize it if
   1716 /// possible.  If we make a change, return true.
   1717 bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
   1718                               Module::global_iterator &GVI) {
   1719   if (!GV->hasLocalLinkage())
   1720     return false;
   1721 
   1722   // Do more involved optimizations if the global is internal.
   1723   GV->removeDeadConstantUsers();
   1724 
   1725   if (GV->use_empty()) {
   1726     DEBUG(dbgs() << "GLOBAL DEAD: " << *GV);
   1727     GV->eraseFromParent();
   1728     ++NumDeleted;
   1729     return true;
   1730   }
   1731 
   1732   SmallPtrSet<const PHINode*, 16> PHIUsers;
   1733   GlobalStatus GS;
   1734 
   1735   if (AnalyzeGlobal(GV, GS, PHIUsers))
   1736     return false;
   1737 
   1738   if (!GS.isCompared && !GV->hasUnnamedAddr()) {
   1739     GV->setUnnamedAddr(true);
   1740     NumUnnamed++;
   1741   }
   1742 
   1743   if (GV->isConstant() || !GV->hasInitializer())
   1744     return false;
   1745 
   1746   return ProcessInternalGlobal(GV, GVI, PHIUsers, GS);
   1747 }
   1748 
   1749 /// ProcessInternalGlobal - Analyze the specified global variable and optimize
   1750 /// it if possible.  If we make a change, return true.
   1751 bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
   1752                                       Module::global_iterator &GVI,
   1753                                 const SmallPtrSet<const PHINode*, 16> &PHIUsers,
   1754                                       const GlobalStatus &GS) {
   1755   // If this is a first class global and has only one accessing function
   1756   // and this function is main (which we know is not recursive we can make
   1757   // this global a local variable) we replace the global with a local alloca
   1758   // in this function.
   1759   //
   1760   // NOTE: It doesn't make sense to promote non single-value types since we
   1761   // are just replacing static memory to stack memory.
   1762   //
   1763   // If the global is in different address space, don't bring it to stack.
   1764   if (!GS.HasMultipleAccessingFunctions &&
   1765       GS.AccessingFunction && !GS.HasNonInstructionUser &&
   1766       GV->getType()->getElementType()->isSingleValueType() &&
   1767       GS.AccessingFunction->getName() == "main" &&
   1768       GS.AccessingFunction->hasExternalLinkage() &&
   1769       GV->getType()->getAddressSpace() == 0) {
   1770     DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
   1771     Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
   1772                                                    ->getEntryBlock().begin());
   1773     Type *ElemTy = GV->getType()->getElementType();
   1774     // FIXME: Pass Global's alignment when globals have alignment
   1775     AllocaInst *Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
   1776     if (!isa<UndefValue>(GV->getInitializer()))
   1777       new StoreInst(GV->getInitializer(), Alloca, &FirstI);
   1778 
   1779     GV->replaceAllUsesWith(Alloca);
   1780     GV->eraseFromParent();
   1781     ++NumLocalized;
   1782     return true;
   1783   }
   1784 
   1785   // If the global is never loaded (but may be stored to), it is dead.
   1786   // Delete it now.
   1787   if (!GS.isLoaded) {
   1788     DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
   1789 
   1790     // Delete any stores we can find to the global.  We may not be able to
   1791     // make it completely dead though.
   1792     bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(),
   1793                                               TD, TLI);
   1794 
   1795     // If the global is dead now, delete it.
   1796     if (GV->use_empty()) {
   1797       GV->eraseFromParent();
   1798       ++NumDeleted;
   1799       Changed = true;
   1800     }
   1801     return Changed;
   1802 
   1803   } else if (GS.StoredType <= GlobalStatus::isInitializerStored) {
   1804     DEBUG(dbgs() << "MARKING CONSTANT: " << *GV);
   1805     GV->setConstant(true);
   1806 
   1807     // Clean up any obviously simplifiable users now.
   1808     CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI);
   1809 
   1810     // If the global is dead now, just nuke it.
   1811     if (GV->use_empty()) {
   1812       DEBUG(dbgs() << "   *** Marking constant allowed us to simplify "
   1813             << "all users and delete global!\n");
   1814       GV->eraseFromParent();
   1815       ++NumDeleted;
   1816     }
   1817 
   1818     ++NumMarked;
   1819     return true;
   1820   } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
   1821     if (TargetData *TD = getAnalysisIfAvailable<TargetData>())
   1822       if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) {
   1823         GVI = FirstNewGV;  // Don't skip the newly produced globals!
   1824         return true;
   1825       }
   1826   } else if (GS.StoredType == GlobalStatus::isStoredOnce) {
   1827     // If the initial value for the global was an undef value, and if only
   1828     // one other value was stored into it, we can just change the
   1829     // initializer to be the stored value, then delete all stores to the
   1830     // global.  This allows us to mark it constant.
   1831     if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
   1832       if (isa<UndefValue>(GV->getInitializer())) {
   1833         // Change the initial value here.
   1834         GV->setInitializer(SOVConstant);
   1835 
   1836         // Clean up any obviously simplifiable users now.
   1837         CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI);
   1838 
   1839         if (GV->use_empty()) {
   1840           DEBUG(dbgs() << "   *** Substituting initializer allowed us to "
   1841                 << "simplify all users and delete global!\n");
   1842           GV->eraseFromParent();
   1843           ++NumDeleted;
   1844         } else {
   1845           GVI = GV;
   1846         }
   1847         ++NumSubstitute;
   1848         return true;
   1849       }
   1850 
   1851     // Try to optimize globals based on the knowledge that only one value
   1852     // (besides its initializer) is ever stored to the global.
   1853     if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, GVI,
   1854                                  TD, TLI))
   1855       return true;
   1856 
   1857     // Otherwise, if the global was not a boolean, we can shrink it to be a
   1858     // boolean.
   1859     if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
   1860       if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
   1861         ++NumShrunkToBool;
   1862         return true;
   1863       }
   1864   }
   1865 
   1866   return false;
   1867 }
   1868 
   1869 /// ChangeCalleesToFastCall - Walk all of the direct calls of the specified
   1870 /// function, changing them to FastCC.
   1871 static void ChangeCalleesToFastCall(Function *F) {
   1872   for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
   1873     CallSite User(cast<Instruction>(*UI));
   1874     User.setCallingConv(CallingConv::Fast);
   1875   }
   1876 }
   1877 
   1878 static AttrListPtr StripNest(const AttrListPtr &Attrs) {
   1879   for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
   1880     if ((Attrs.getSlot(i).Attrs & Attribute::Nest) == 0)
   1881       continue;
   1882 
   1883     // There can be only one.
   1884     return Attrs.removeAttr(Attrs.getSlot(i).Index, Attribute::Nest);
   1885   }
   1886 
   1887   return Attrs;
   1888 }
   1889 
   1890 static void RemoveNestAttribute(Function *F) {
   1891   F->setAttributes(StripNest(F->getAttributes()));
   1892   for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
   1893     CallSite User(cast<Instruction>(*UI));
   1894     User.setAttributes(StripNest(User.getAttributes()));
   1895   }
   1896 }
   1897 
   1898 bool GlobalOpt::OptimizeFunctions(Module &M) {
   1899   bool Changed = false;
   1900   // Optimize functions.
   1901   for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
   1902     Function *F = FI++;
   1903     // Functions without names cannot be referenced outside this module.
   1904     if (!F->hasName() && !F->isDeclaration())
   1905       F->setLinkage(GlobalValue::InternalLinkage);
   1906     F->removeDeadConstantUsers();
   1907     if (F->isDefTriviallyDead()) {
   1908       F->eraseFromParent();
   1909       Changed = true;
   1910       ++NumFnDeleted;
   1911     } else if (F->hasLocalLinkage()) {
   1912       if (F->getCallingConv() == CallingConv::C && !F->isVarArg() &&
   1913           !F->hasAddressTaken()) {
   1914         // If this function has C calling conventions, is not a varargs
   1915         // function, and is only called directly, promote it to use the Fast
   1916         // calling convention.
   1917         F->setCallingConv(CallingConv::Fast);
   1918         ChangeCalleesToFastCall(F);
   1919         ++NumFastCallFns;
   1920         Changed = true;
   1921       }
   1922 
   1923       if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
   1924           !F->hasAddressTaken()) {
   1925         // The function is not used by a trampoline intrinsic, so it is safe
   1926         // to remove the 'nest' attribute.
   1927         RemoveNestAttribute(F);
   1928         ++NumNestRemoved;
   1929         Changed = true;
   1930       }
   1931     }
   1932   }
   1933   return Changed;
   1934 }
   1935 
   1936 bool GlobalOpt::OptimizeGlobalVars(Module &M) {
   1937   bool Changed = false;
   1938   for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
   1939        GVI != E; ) {
   1940     GlobalVariable *GV = GVI++;
   1941     // Global variables without names cannot be referenced outside this module.
   1942     if (!GV->hasName() && !GV->isDeclaration())
   1943       GV->setLinkage(GlobalValue::InternalLinkage);
   1944     // Simplify the initializer.
   1945     if (GV->hasInitializer())
   1946       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
   1947         Constant *New = ConstantFoldConstantExpression(CE, TD, TLI);
   1948         if (New && New != CE)
   1949           GV->setInitializer(New);
   1950       }
   1951 
   1952     Changed |= ProcessGlobal(GV, GVI);
   1953   }
   1954   return Changed;
   1955 }
   1956 
   1957 /// FindGlobalCtors - Find the llvm.global_ctors list, verifying that all
   1958 /// initializers have an init priority of 65535.
   1959 GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
   1960   GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
   1961   if (GV == 0) return 0;
   1962 
   1963   // Verify that the initializer is simple enough for us to handle. We are
   1964   // only allowed to optimize the initializer if it is unique.
   1965   if (!GV->hasUniqueInitializer()) return 0;
   1966 
   1967   if (isa<ConstantAggregateZero>(GV->getInitializer()))
   1968     return GV;
   1969   ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
   1970 
   1971   for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
   1972     if (isa<ConstantAggregateZero>(*i))
   1973       continue;
   1974     ConstantStruct *CS = cast<ConstantStruct>(*i);
   1975     if (isa<ConstantPointerNull>(CS->getOperand(1)))
   1976       continue;
   1977 
   1978     // Must have a function or null ptr.
   1979     if (!isa<Function>(CS->getOperand(1)))
   1980       return 0;
   1981 
   1982     // Init priority must be standard.
   1983     ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0));
   1984     if (CI->getZExtValue() != 65535)
   1985       return 0;
   1986   }
   1987 
   1988   return GV;
   1989 }
   1990 
   1991 /// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand,
   1992 /// return a list of the functions and null terminator as a vector.
   1993 static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
   1994   if (GV->getInitializer()->isNullValue())
   1995     return std::vector<Function*>();
   1996   ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
   1997   std::vector<Function*> Result;
   1998   Result.reserve(CA->getNumOperands());
   1999   for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
   2000     ConstantStruct *CS = cast<ConstantStruct>(*i);
   2001     Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
   2002   }
   2003   return Result;
   2004 }
   2005 
   2006 /// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the
   2007 /// specified array, returning the new global to use.
   2008 static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
   2009                                           const std::vector<Function*> &Ctors) {
   2010   // If we made a change, reassemble the initializer list.
   2011   Constant *CSVals[2];
   2012   CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535);
   2013   CSVals[1] = 0;
   2014 
   2015   StructType *StructTy =
   2016     cast <StructType>(
   2017     cast<ArrayType>(GCL->getType()->getElementType())->getElementType());
   2018 
   2019   // Create the new init list.
   2020   std::vector<Constant*> CAList;
   2021   for (unsigned i = 0, e = Ctors.size(); i != e; ++i) {
   2022     if (Ctors[i]) {
   2023       CSVals[1] = Ctors[i];
   2024     } else {
   2025       Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()),
   2026                                           false);
   2027       PointerType *PFTy = PointerType::getUnqual(FTy);
   2028       CSVals[1] = Constant::getNullValue(PFTy);
   2029       CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()),
   2030                                    0x7fffffff);
   2031     }
   2032     CAList.push_back(ConstantStruct::get(StructTy, CSVals));
   2033   }
   2034 
   2035   // Create the array initializer.
   2036   Constant *CA = ConstantArray::get(ArrayType::get(StructTy,
   2037                                                    CAList.size()), CAList);
   2038 
   2039   // If we didn't change the number of elements, don't create a new GV.
   2040   if (CA->getType() == GCL->getInitializer()->getType()) {
   2041     GCL->setInitializer(CA);
   2042     return GCL;
   2043   }
   2044 
   2045   // Create the new global and insert it next to the existing list.
   2046   GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(),
   2047                                            GCL->getLinkage(), CA, "",
   2048                                            GCL->isThreadLocal());
   2049   GCL->getParent()->getGlobalList().insert(GCL, NGV);
   2050   NGV->takeName(GCL);
   2051 
   2052   // Nuke the old list, replacing any uses with the new one.
   2053   if (!GCL->use_empty()) {
   2054     Constant *V = NGV;
   2055     if (V->getType() != GCL->getType())
   2056       V = ConstantExpr::getBitCast(V, GCL->getType());
   2057     GCL->replaceAllUsesWith(V);
   2058   }
   2059   GCL->eraseFromParent();
   2060 
   2061   if (Ctors.size())
   2062     return NGV;
   2063   else
   2064     return 0;
   2065 }
   2066 
   2067 
   2068 static inline bool
   2069 isSimpleEnoughValueToCommit(Constant *C,
   2070                             SmallPtrSet<Constant*, 8> &SimpleConstants,
   2071                             const TargetData *TD);
   2072 
   2073 
   2074 /// isSimpleEnoughValueToCommit - Return true if the specified constant can be
   2075 /// handled by the code generator.  We don't want to generate something like:
   2076 ///   void *X = &X/42;
   2077 /// because the code generator doesn't have a relocation that can handle that.
   2078 ///
   2079 /// This function should be called if C was not found (but just got inserted)
   2080 /// in SimpleConstants to avoid having to rescan the same constants all the
   2081 /// time.
   2082 static bool isSimpleEnoughValueToCommitHelper(Constant *C,
   2083                                    SmallPtrSet<Constant*, 8> &SimpleConstants,
   2084                                    const TargetData *TD) {
   2085   // Simple integer, undef, constant aggregate zero, global addresses, etc are
   2086   // all supported.
   2087   if (C->getNumOperands() == 0 || isa<BlockAddress>(C) ||
   2088       isa<GlobalValue>(C))
   2089     return true;
   2090 
   2091   // Aggregate values are safe if all their elements are.
   2092   if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
   2093       isa<ConstantVector>(C)) {
   2094     for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
   2095       Constant *Op = cast<Constant>(C->getOperand(i));
   2096       if (!isSimpleEnoughValueToCommit(Op, SimpleConstants, TD))
   2097         return false;
   2098     }
   2099     return true;
   2100   }
   2101 
   2102   // We don't know exactly what relocations are allowed in constant expressions,
   2103   // so we allow &global+constantoffset, which is safe and uniformly supported
   2104   // across targets.
   2105   ConstantExpr *CE = cast<ConstantExpr>(C);
   2106   switch (CE->getOpcode()) {
   2107   case Instruction::BitCast:
   2108     // Bitcast is fine if the casted value is fine.
   2109     return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
   2110 
   2111   case Instruction::IntToPtr:
   2112   case Instruction::PtrToInt:
   2113     // int <=> ptr is fine if the int type is the same size as the
   2114     // pointer type.
   2115     if (!TD || TD->getTypeSizeInBits(CE->getType()) !=
   2116                TD->getTypeSizeInBits(CE->getOperand(0)->getType()))
   2117       return false;
   2118     return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
   2119 
   2120   // GEP is fine if it is simple + constant offset.
   2121   case Instruction::GetElementPtr:
   2122     for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
   2123       if (!isa<ConstantInt>(CE->getOperand(i)))
   2124         return false;
   2125     return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
   2126 
   2127   case Instruction::Add:
   2128     // We allow simple+cst.
   2129     if (!isa<ConstantInt>(CE->getOperand(1)))
   2130       return false;
   2131     return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
   2132   }
   2133   return false;
   2134 }
   2135 
   2136 static inline bool
   2137 isSimpleEnoughValueToCommit(Constant *C,
   2138                             SmallPtrSet<Constant*, 8> &SimpleConstants,
   2139                             const TargetData *TD) {
   2140   // If we already checked this constant, we win.
   2141   if (!SimpleConstants.insert(C)) return true;
   2142   // Check the constant.
   2143   return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, TD);
   2144 }
   2145 
   2146 
   2147 /// isSimpleEnoughPointerToCommit - Return true if this constant is simple
   2148 /// enough for us to understand.  In particular, if it is a cast to anything
   2149 /// other than from one pointer type to another pointer type, we punt.
   2150 /// We basically just support direct accesses to globals and GEP's of
   2151 /// globals.  This should be kept up to date with CommitValueTo.
   2152 static bool isSimpleEnoughPointerToCommit(Constant *C) {
   2153   // Conservatively, avoid aggregate types. This is because we don't
   2154   // want to worry about them partially overlapping other stores.
   2155   if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
   2156     return false;
   2157 
   2158   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
   2159     // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
   2160     // external globals.
   2161     return GV->hasUniqueInitializer();
   2162 
   2163   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
   2164     // Handle a constantexpr gep.
   2165     if (CE->getOpcode() == Instruction::GetElementPtr &&
   2166         isa<GlobalVariable>(CE->getOperand(0)) &&
   2167         cast<GEPOperator>(CE)->isInBounds()) {
   2168       GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
   2169       // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
   2170       // external globals.
   2171       if (!GV->hasUniqueInitializer())
   2172         return false;
   2173 
   2174       // The first index must be zero.
   2175       ConstantInt *CI = dyn_cast<ConstantInt>(*llvm::next(CE->op_begin()));
   2176       if (!CI || !CI->isZero()) return false;
   2177 
   2178       // The remaining indices must be compile-time known integers within the
   2179       // notional bounds of the corresponding static array types.
   2180       if (!CE->isGEPWithNoNotionalOverIndexing())
   2181         return false;
   2182 
   2183       return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
   2184 
   2185     // A constantexpr bitcast from a pointer to another pointer is a no-op,
   2186     // and we know how to evaluate it by moving the bitcast from the pointer
   2187     // operand to the value operand.
   2188     } else if (CE->getOpcode() == Instruction::BitCast &&
   2189                isa<GlobalVariable>(CE->getOperand(0))) {
   2190       // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
   2191       // external globals.
   2192       return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer();
   2193     }
   2194   }
   2195 
   2196   return false;
   2197 }
   2198 
   2199 /// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global
   2200 /// initializer.  This returns 'Init' modified to reflect 'Val' stored into it.
   2201 /// At this point, the GEP operands of Addr [0, OpNo) have been stepped into.
   2202 static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
   2203                                    ConstantExpr *Addr, unsigned OpNo) {
   2204   // Base case of the recursion.
   2205   if (OpNo == Addr->getNumOperands()) {
   2206     assert(Val->getType() == Init->getType() && "Type mismatch!");
   2207     return Val;
   2208   }
   2209 
   2210   SmallVector<Constant*, 32> Elts;
   2211   if (StructType *STy = dyn_cast<StructType>(Init->getType())) {
   2212     // Break up the constant into its elements.
   2213     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
   2214       Elts.push_back(Init->getAggregateElement(i));
   2215 
   2216     // Replace the element that we are supposed to.
   2217     ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
   2218     unsigned Idx = CU->getZExtValue();
   2219     assert(Idx < STy->getNumElements() && "Struct index out of range!");
   2220     Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
   2221 
   2222     // Return the modified struct.
   2223     return ConstantStruct::get(STy, Elts);
   2224   }
   2225 
   2226   ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
   2227   SequentialType *InitTy = cast<SequentialType>(Init->getType());
   2228 
   2229   uint64_t NumElts;
   2230   if (ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
   2231     NumElts = ATy->getNumElements();
   2232   else
   2233     NumElts = InitTy->getVectorNumElements();
   2234 
   2235   // Break up the array into elements.
   2236   for (uint64_t i = 0, e = NumElts; i != e; ++i)
   2237     Elts.push_back(Init->getAggregateElement(i));
   2238 
   2239   assert(CI->getZExtValue() < NumElts);
   2240   Elts[CI->getZExtValue()] =
   2241     EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
   2242 
   2243   if (Init->getType()->isArrayTy())
   2244     return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
   2245   return ConstantVector::get(Elts);
   2246 }
   2247 
   2248 /// CommitValueTo - We have decided that Addr (which satisfies the predicate
   2249 /// isSimpleEnoughPointerToCommit) should get Val as its value.  Make it happen.
   2250 static void CommitValueTo(Constant *Val, Constant *Addr) {
   2251   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
   2252     assert(GV->hasInitializer());
   2253     GV->setInitializer(Val);
   2254     return;
   2255   }
   2256 
   2257   ConstantExpr *CE = cast<ConstantExpr>(Addr);
   2258   GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
   2259   GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2));
   2260 }
   2261 
   2262 namespace {
   2263 
   2264 /// Evaluator - This class evaluates LLVM IR, producing the Constant
   2265 /// representing each SSA instruction.  Changes to global variables are stored
   2266 /// in a mapping that can be iterated over after the evaluation is complete.
   2267 /// Once an evaluation call fails, the evaluation object should not be reused.
   2268 class Evaluator {
   2269 public:
   2270   Evaluator(const TargetData *TD, const TargetLibraryInfo *TLI)
   2271     : TD(TD), TLI(TLI) {
   2272     ValueStack.push_back(new DenseMap<Value*, Constant*>);
   2273   }
   2274 
   2275   ~Evaluator() {
   2276     DeleteContainerPointers(ValueStack);
   2277     while (!AllocaTmps.empty()) {
   2278       GlobalVariable *Tmp = AllocaTmps.back();
   2279       AllocaTmps.pop_back();
   2280 
   2281       // If there are still users of the alloca, the program is doing something
   2282       // silly, e.g. storing the address of the alloca somewhere and using it
   2283       // later.  Since this is undefined, we'll just make it be null.
   2284       if (!Tmp->use_empty())
   2285         Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
   2286       delete Tmp;
   2287     }
   2288   }
   2289 
   2290   /// EvaluateFunction - Evaluate a call to function F, returning true if
   2291   /// successful, false if we can't evaluate it.  ActualArgs contains the formal
   2292   /// arguments for the function.
   2293   bool EvaluateFunction(Function *F, Constant *&RetVal,
   2294                         const SmallVectorImpl<Constant*> &ActualArgs);
   2295 
   2296   /// EvaluateBlock - Evaluate all instructions in block BB, returning true if
   2297   /// successful, false if we can't evaluate it.  NewBB returns the next BB that
   2298   /// control flows into, or null upon return.
   2299   bool EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB);
   2300 
   2301   Constant *getVal(Value *V) {
   2302     if (Constant *CV = dyn_cast<Constant>(V)) return CV;
   2303     Constant *R = ValueStack.back()->lookup(V);
   2304     assert(R && "Reference to an uncomputed value!");
   2305     return R;
   2306   }
   2307 
   2308   void setVal(Value *V, Constant *C) {
   2309     ValueStack.back()->operator[](V) = C;
   2310   }
   2311 
   2312   const DenseMap<Constant*, Constant*> &getMutatedMemory() const {
   2313     return MutatedMemory;
   2314   }
   2315 
   2316   const SmallPtrSet<GlobalVariable*, 8> &getInvariants() const {
   2317     return Invariants;
   2318   }
   2319 
   2320 private:
   2321   Constant *ComputeLoadResult(Constant *P);
   2322 
   2323   /// ValueStack - As we compute SSA register values, we store their contents
   2324   /// here. The back of the vector contains the current function and the stack
   2325   /// contains the values in the calling frames.
   2326   SmallVector<DenseMap<Value*, Constant*>*, 4> ValueStack;
   2327 
   2328   /// CallStack - This is used to detect recursion.  In pathological situations
   2329   /// we could hit exponential behavior, but at least there is nothing
   2330   /// unbounded.
   2331   SmallVector<Function*, 4> CallStack;
   2332 
   2333   /// MutatedMemory - For each store we execute, we update this map.  Loads
   2334   /// check this to get the most up-to-date value.  If evaluation is successful,
   2335   /// this state is committed to the process.
   2336   DenseMap<Constant*, Constant*> MutatedMemory;
   2337 
   2338   /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
   2339   /// to represent its body.  This vector is needed so we can delete the
   2340   /// temporary globals when we are done.
   2341   SmallVector<GlobalVariable*, 32> AllocaTmps;
   2342 
   2343   /// Invariants - These global variables have been marked invariant by the
   2344   /// static constructor.
   2345   SmallPtrSet<GlobalVariable*, 8> Invariants;
   2346 
   2347   /// SimpleConstants - These are constants we have checked and know to be
   2348   /// simple enough to live in a static initializer of a global.
   2349   SmallPtrSet<Constant*, 8> SimpleConstants;
   2350 
   2351   const TargetData *TD;
   2352   const TargetLibraryInfo *TLI;
   2353 };
   2354 
   2355 }  // anonymous namespace
   2356 
   2357 /// ComputeLoadResult - Return the value that would be computed by a load from
   2358 /// P after the stores reflected by 'memory' have been performed.  If we can't
   2359 /// decide, return null.
   2360 Constant *Evaluator::ComputeLoadResult(Constant *P) {
   2361   // If this memory location has been recently stored, use the stored value: it
   2362   // is the most up-to-date.
   2363   DenseMap<Constant*, Constant*>::const_iterator I = MutatedMemory.find(P);
   2364   if (I != MutatedMemory.end()) return I->second;
   2365 
   2366   // Access it.
   2367   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
   2368     if (GV->hasDefinitiveInitializer())
   2369       return GV->getInitializer();
   2370     return 0;
   2371   }
   2372 
   2373   // Handle a constantexpr getelementptr.
   2374   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P))
   2375     if (CE->getOpcode() == Instruction::GetElementPtr &&
   2376         isa<GlobalVariable>(CE->getOperand(0))) {
   2377       GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
   2378       if (GV->hasDefinitiveInitializer())
   2379         return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
   2380     }
   2381 
   2382   return 0;  // don't know how to evaluate.
   2383 }
   2384 
   2385 /// EvaluateBlock - Evaluate all instructions in block BB, returning true if
   2386 /// successful, false if we can't evaluate it.  NewBB returns the next BB that
   2387 /// control flows into, or null upon return.
   2388 bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
   2389                               BasicBlock *&NextBB) {
   2390   // This is the main evaluation loop.
   2391   while (1) {
   2392     Constant *InstResult = 0;
   2393 
   2394     if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
   2395       if (!SI->isSimple()) return false;  // no volatile/atomic accesses.
   2396       Constant *Ptr = getVal(SI->getOperand(1));
   2397       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
   2398         Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
   2399       if (!isSimpleEnoughPointerToCommit(Ptr))
   2400         // If this is too complex for us to commit, reject it.
   2401         return false;
   2402 
   2403       Constant *Val = getVal(SI->getOperand(0));
   2404 
   2405       // If this might be too difficult for the backend to handle (e.g. the addr
   2406       // of one global variable divided by another) then we can't commit it.
   2407       if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, TD))
   2408         return false;
   2409 
   2410       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
   2411         if (CE->getOpcode() == Instruction::BitCast) {
   2412           // If we're evaluating a store through a bitcast, then we need
   2413           // to pull the bitcast off the pointer type and push it onto the
   2414           // stored value.
   2415           Ptr = CE->getOperand(0);
   2416 
   2417           Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType();
   2418 
   2419           // In order to push the bitcast onto the stored value, a bitcast
   2420           // from NewTy to Val's type must be legal.  If it's not, we can try
   2421           // introspecting NewTy to find a legal conversion.
   2422           while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) {
   2423             // If NewTy is a struct, we can convert the pointer to the struct
   2424             // into a pointer to its first member.
   2425             // FIXME: This could be extended to support arrays as well.
   2426             if (StructType *STy = dyn_cast<StructType>(NewTy)) {
   2427               NewTy = STy->getTypeAtIndex(0U);
   2428 
   2429               IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32);
   2430               Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
   2431               Constant * const IdxList[] = {IdxZero, IdxZero};
   2432 
   2433               Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList);
   2434               if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
   2435                 Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
   2436 
   2437             // If we can't improve the situation by introspecting NewTy,
   2438             // we have to give up.
   2439             } else {
   2440               return false;
   2441             }
   2442           }
   2443 
   2444           // If we found compatible types, go ahead and push the bitcast
   2445           // onto the stored value.
   2446           Val = ConstantExpr::getBitCast(Val, NewTy);
   2447         }
   2448 
   2449       MutatedMemory[Ptr] = Val;
   2450     } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
   2451       InstResult = ConstantExpr::get(BO->getOpcode(),
   2452                                      getVal(BO->getOperand(0)),
   2453                                      getVal(BO->getOperand(1)));
   2454     } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
   2455       InstResult = ConstantExpr::getCompare(CI->getPredicate(),
   2456                                             getVal(CI->getOperand(0)),
   2457                                             getVal(CI->getOperand(1)));
   2458     } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
   2459       InstResult = ConstantExpr::getCast(CI->getOpcode(),
   2460                                          getVal(CI->getOperand(0)),
   2461                                          CI->getType());
   2462     } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
   2463       InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)),
   2464                                            getVal(SI->getOperand(1)),
   2465                                            getVal(SI->getOperand(2)));
   2466     } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
   2467       Constant *P = getVal(GEP->getOperand(0));
   2468       SmallVector<Constant*, 8> GEPOps;
   2469       for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
   2470            i != e; ++i)
   2471         GEPOps.push_back(getVal(*i));
   2472       InstResult =
   2473         ConstantExpr::getGetElementPtr(P, GEPOps,
   2474                                        cast<GEPOperator>(GEP)->isInBounds());
   2475     } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
   2476       if (!LI->isSimple()) return false;  // no volatile/atomic accesses.
   2477       Constant *Ptr = getVal(LI->getOperand(0));
   2478       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
   2479         Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
   2480       InstResult = ComputeLoadResult(Ptr);
   2481       if (InstResult == 0) return false; // Could not evaluate load.
   2482     } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
   2483       if (AI->isArrayAllocation()) return false;  // Cannot handle array allocs.
   2484       Type *Ty = AI->getType()->getElementType();
   2485       AllocaTmps.push_back(new GlobalVariable(Ty, false,
   2486                                               GlobalValue::InternalLinkage,
   2487                                               UndefValue::get(Ty),
   2488                                               AI->getName()));
   2489       InstResult = AllocaTmps.back();
   2490     } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
   2491       CallSite CS(CurInst);
   2492 
   2493       // Debug info can safely be ignored here.
   2494       if (isa<DbgInfoIntrinsic>(CS.getInstruction())) {
   2495         ++CurInst;
   2496         continue;
   2497       }
   2498 
   2499       // Cannot handle inline asm.
   2500       if (isa<InlineAsm>(CS.getCalledValue())) return false;
   2501 
   2502       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
   2503         if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) {
   2504           if (MSI->isVolatile()) return false;
   2505           Constant *Ptr = getVal(MSI->getDest());
   2506           Constant *Val = getVal(MSI->getValue());
   2507           Constant *DestVal = ComputeLoadResult(getVal(Ptr));
   2508           if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
   2509             // This memset is a no-op.
   2510             ++CurInst;
   2511             continue;
   2512           }
   2513         }
   2514 
   2515         if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
   2516             II->getIntrinsicID() == Intrinsic::lifetime_end) {
   2517           ++CurInst;
   2518           continue;
   2519         }
   2520 
   2521         if (II->getIntrinsicID() == Intrinsic::invariant_start) {
   2522           // We don't insert an entry into Values, as it doesn't have a
   2523           // meaningful return value.
   2524           if (!II->use_empty())
   2525             return false;
   2526           ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0));
   2527           Value *PtrArg = getVal(II->getArgOperand(1));
   2528           Value *Ptr = PtrArg->stripPointerCasts();
   2529           if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
   2530             Type *ElemTy = cast<PointerType>(GV->getType())->getElementType();
   2531             if (!Size->isAllOnesValue() &&
   2532                 Size->getValue().getLimitedValue() >=
   2533                 TD->getTypeStoreSize(ElemTy))
   2534               Invariants.insert(GV);
   2535           }
   2536           // Continue even if we do nothing.
   2537           ++CurInst;
   2538           continue;
   2539         }
   2540         return false;
   2541       }
   2542 
   2543       // Resolve function pointers.
   2544       Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue()));
   2545       if (!Callee || Callee->mayBeOverridden())
   2546         return false;  // Cannot resolve.
   2547 
   2548       SmallVector<Constant*, 8> Formals;
   2549       for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i)
   2550         Formals.push_back(getVal(*i));
   2551 
   2552       if (Callee->isDeclaration()) {
   2553         // If this is a function we can constant fold, do it.
   2554         if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) {
   2555           InstResult = C;
   2556         } else {
   2557           return false;
   2558         }
   2559       } else {
   2560         if (Callee->getFunctionType()->isVarArg())
   2561           return false;
   2562 
   2563         Constant *RetVal;
   2564         // Execute the call, if successful, use the return value.
   2565         ValueStack.push_back(new DenseMap<Value*, Constant*>);
   2566         if (!EvaluateFunction(Callee, RetVal, Formals))
   2567           return false;
   2568         delete ValueStack.pop_back_val();
   2569         InstResult = RetVal;
   2570       }
   2571     } else if (isa<TerminatorInst>(CurInst)) {
   2572       if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
   2573         if (BI->isUnconditional()) {
   2574           NextBB = BI->getSuccessor(0);
   2575         } else {
   2576           ConstantInt *Cond =
   2577             dyn_cast<ConstantInt>(getVal(BI->getCondition()));
   2578           if (!Cond) return false;  // Cannot determine.
   2579 
   2580           NextBB = BI->getSuccessor(!Cond->getZExtValue());
   2581         }
   2582       } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
   2583         ConstantInt *Val =
   2584           dyn_cast<ConstantInt>(getVal(SI->getCondition()));
   2585         if (!Val) return false;  // Cannot determine.
   2586         NextBB = SI->findCaseValue(Val).getCaseSuccessor();
   2587       } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
   2588         Value *Val = getVal(IBI->getAddress())->stripPointerCasts();
   2589         if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
   2590           NextBB = BA->getBasicBlock();
   2591         else
   2592           return false;  // Cannot determine.
   2593       } else if (isa<ReturnInst>(CurInst)) {
   2594         NextBB = 0;
   2595       } else {
   2596         // invoke, unwind, resume, unreachable.
   2597         return false;  // Cannot handle this terminator.
   2598       }
   2599 
   2600       // We succeeded at evaluating this block!
   2601       return true;
   2602     } else {
   2603       // Did not know how to evaluate this!
   2604       return false;
   2605     }
   2606 
   2607     if (!CurInst->use_empty()) {
   2608       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
   2609         InstResult = ConstantFoldConstantExpression(CE, TD, TLI);
   2610 
   2611       setVal(CurInst, InstResult);
   2612     }
   2613 
   2614     // If we just processed an invoke, we finished evaluating the block.
   2615     if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {
   2616       NextBB = II->getNormalDest();
   2617       return true;
   2618     }
   2619 
   2620     // Advance program counter.
   2621     ++CurInst;
   2622   }
   2623 }
   2624 
   2625 /// EvaluateFunction - Evaluate a call to function F, returning true if
   2626 /// successful, false if we can't evaluate it.  ActualArgs contains the formal
   2627 /// arguments for the function.
   2628 bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
   2629                                  const SmallVectorImpl<Constant*> &ActualArgs) {
   2630   // Check to see if this function is already executing (recursion).  If so,
   2631   // bail out.  TODO: we might want to accept limited recursion.
   2632   if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
   2633     return false;
   2634 
   2635   CallStack.push_back(F);
   2636 
   2637   // Initialize arguments to the incoming values specified.
   2638   unsigned ArgNo = 0;
   2639   for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
   2640        ++AI, ++ArgNo)
   2641     setVal(AI, ActualArgs[ArgNo]);
   2642 
   2643   // ExecutedBlocks - We only handle non-looping, non-recursive code.  As such,
   2644   // we can only evaluate any one basic block at most once.  This set keeps
   2645   // track of what we have executed so we can detect recursive cases etc.
   2646   SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
   2647 
   2648   // CurBB - The current basic block we're evaluating.
   2649   BasicBlock *CurBB = F->begin();
   2650 
   2651   BasicBlock::iterator CurInst = CurBB->begin();
   2652 
   2653   while (1) {
   2654     BasicBlock *NextBB = 0; // Initialized to avoid compiler warnings.
   2655     if (!EvaluateBlock(CurInst, NextBB))
   2656       return false;
   2657 
   2658     if (NextBB == 0) {
   2659       // Successfully running until there's no next block means that we found
   2660       // the return.  Fill it the return value and pop the call stack.
   2661       ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator());
   2662       if (RI->getNumOperands())
   2663         RetVal = getVal(RI->getOperand(0));
   2664       CallStack.pop_back();
   2665       return true;
   2666     }
   2667 
   2668     // Okay, we succeeded in evaluating this control flow.  See if we have
   2669     // executed the new block before.  If so, we have a looping function,
   2670     // which we cannot evaluate in reasonable time.
   2671     if (!ExecutedBlocks.insert(NextBB))
   2672       return false;  // looped!
   2673 
   2674     // Okay, we have never been in this block before.  Check to see if there
   2675     // are any PHI nodes.  If so, evaluate them with information about where
   2676     // we came from.
   2677     PHINode *PN = 0;
   2678     for (CurInst = NextBB->begin();
   2679          (PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
   2680       setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB)));
   2681 
   2682     // Advance to the next block.
   2683     CurBB = NextBB;
   2684   }
   2685 }
   2686 
   2687 /// EvaluateStaticConstructor - Evaluate static constructors in the function, if
   2688 /// we can.  Return true if we can, false otherwise.
   2689 static bool EvaluateStaticConstructor(Function *F, const TargetData *TD,
   2690                                       const TargetLibraryInfo *TLI) {
   2691   // Call the function.
   2692   Evaluator Eval(TD, TLI);
   2693   Constant *RetValDummy;
   2694   bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
   2695                                            SmallVector<Constant*, 0>());
   2696 
   2697   if (EvalSuccess) {
   2698     // We succeeded at evaluation: commit the result.
   2699     DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
   2700           << F->getName() << "' to " << Eval.getMutatedMemory().size()
   2701           << " stores.\n");
   2702     for (DenseMap<Constant*, Constant*>::const_iterator I =
   2703            Eval.getMutatedMemory().begin(), E = Eval.getMutatedMemory().end();
   2704 	 I != E; ++I)
   2705       CommitValueTo(I->second, I->first);
   2706     for (SmallPtrSet<GlobalVariable*, 8>::const_iterator I =
   2707            Eval.getInvariants().begin(), E = Eval.getInvariants().end();
   2708          I != E; ++I)
   2709       (*I)->setConstant(true);
   2710   }
   2711 
   2712   return EvalSuccess;
   2713 }
   2714 
   2715 /// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible.
   2716 /// Return true if anything changed.
   2717 bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
   2718   std::vector<Function*> Ctors = ParseGlobalCtors(GCL);
   2719   bool MadeChange = false;
   2720   if (Ctors.empty()) return false;
   2721 
   2722   // Loop over global ctors, optimizing them when we can.
   2723   for (unsigned i = 0; i != Ctors.size(); ++i) {
   2724     Function *F = Ctors[i];
   2725     // Found a null terminator in the middle of the list, prune off the rest of
   2726     // the list.
   2727     if (F == 0) {
   2728       if (i != Ctors.size()-1) {
   2729         Ctors.resize(i+1);
   2730         MadeChange = true;
   2731       }
   2732       break;
   2733     }
   2734 
   2735     // We cannot simplify external ctor functions.
   2736     if (F->empty()) continue;
   2737 
   2738     // If we can evaluate the ctor at compile time, do.
   2739     if (EvaluateStaticConstructor(F, TD, TLI)) {
   2740       Ctors.erase(Ctors.begin()+i);
   2741       MadeChange = true;
   2742       --i;
   2743       ++NumCtorsEvaluated;
   2744       continue;
   2745     }
   2746   }
   2747 
   2748   if (!MadeChange) return false;
   2749 
   2750   GCL = InstallGlobalCtors(GCL, Ctors);
   2751   return true;
   2752 }
   2753 
   2754 bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
   2755   bool Changed = false;
   2756 
   2757   for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
   2758        I != E;) {
   2759     Module::alias_iterator J = I++;
   2760     // Aliases without names cannot be referenced outside this module.
   2761     if (!J->hasName() && !J->isDeclaration())
   2762       J->setLinkage(GlobalValue::InternalLinkage);
   2763     // If the aliasee may change at link time, nothing can be done - bail out.
   2764     if (J->mayBeOverridden())
   2765       continue;
   2766 
   2767     Constant *Aliasee = J->getAliasee();
   2768     GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
   2769     Target->removeDeadConstantUsers();
   2770     bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse();
   2771 
   2772     // Make all users of the alias use the aliasee instead.
   2773     if (!J->use_empty()) {
   2774       J->replaceAllUsesWith(Aliasee);
   2775       ++NumAliasesResolved;
   2776       Changed = true;
   2777     }
   2778 
   2779     // If the alias is externally visible, we may still be able to simplify it.
   2780     if (!J->hasLocalLinkage()) {
   2781       // If the aliasee has internal linkage, give it the name and linkage
   2782       // of the alias, and delete the alias.  This turns:
   2783       //   define internal ... @f(...)
   2784       //   @a = alias ... @f
   2785       // into:
   2786       //   define ... @a(...)
   2787       if (!Target->hasLocalLinkage())
   2788         continue;
   2789 
   2790       // Do not perform the transform if multiple aliases potentially target the
   2791       // aliasee. This check also ensures that it is safe to replace the section
   2792       // and other attributes of the aliasee with those of the alias.
   2793       if (!hasOneUse)
   2794         continue;
   2795 
   2796       // Give the aliasee the name, linkage and other attributes of the alias.
   2797       Target->takeName(J);
   2798       Target->setLinkage(J->getLinkage());
   2799       Target->GlobalValue::copyAttributesFrom(J);
   2800     }
   2801 
   2802     // Delete the alias.
   2803     M.getAliasList().erase(J);
   2804     ++NumAliasesRemoved;
   2805     Changed = true;
   2806   }
   2807 
   2808   return Changed;
   2809 }
   2810 
   2811 static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
   2812   if (!TLI->has(LibFunc::cxa_atexit))
   2813     return 0;
   2814 
   2815   Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit));
   2816 
   2817   if (!Fn)
   2818     return 0;
   2819 
   2820   FunctionType *FTy = Fn->getFunctionType();
   2821 
   2822   // Checking that the function has the right return type, the right number of
   2823   // parameters and that they all have pointer types should be enough.
   2824   if (!FTy->getReturnType()->isIntegerTy() ||
   2825       FTy->getNumParams() != 3 ||
   2826       !FTy->getParamType(0)->isPointerTy() ||
   2827       !FTy->getParamType(1)->isPointerTy() ||
   2828       !FTy->getParamType(2)->isPointerTy())
   2829     return 0;
   2830 
   2831   return Fn;
   2832 }
   2833 
   2834 /// cxxDtorIsEmpty - Returns whether the given function is an empty C++
   2835 /// destructor and can therefore be eliminated.
   2836 /// Note that we assume that other optimization passes have already simplified
   2837 /// the code so we only look for a function with a single basic block, where
   2838 /// the only allowed instructions are 'ret', 'call' to an empty C++ dtor and
   2839 /// other side-effect free instructions.
   2840 static bool cxxDtorIsEmpty(const Function &Fn,
   2841                            SmallPtrSet<const Function *, 8> &CalledFunctions) {
   2842   // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
   2843   // nounwind, but that doesn't seem worth doing.
   2844   if (Fn.isDeclaration())
   2845     return false;
   2846 
   2847   if (++Fn.begin() != Fn.end())
   2848     return false;
   2849 
   2850   const BasicBlock &EntryBlock = Fn.getEntryBlock();
   2851   for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end();
   2852        I != E; ++I) {
   2853     if (const CallInst *CI = dyn_cast<CallInst>(I)) {
   2854       // Ignore debug intrinsics.
   2855       if (isa<DbgInfoIntrinsic>(CI))
   2856         continue;
   2857 
   2858       const Function *CalledFn = CI->getCalledFunction();
   2859 
   2860       if (!CalledFn)
   2861         return false;
   2862 
   2863       SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions);
   2864 
   2865       // Don't treat recursive functions as empty.
   2866       if (!NewCalledFunctions.insert(CalledFn))
   2867         return false;
   2868 
   2869       if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions))
   2870         return false;
   2871     } else if (isa<ReturnInst>(*I))
   2872       return true; // We're done.
   2873     else if (I->mayHaveSideEffects())
   2874       return false; // Destructor with side effects, bail.
   2875   }
   2876 
   2877   return false;
   2878 }
   2879 
   2880 bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
   2881   /// Itanium C++ ABI p3.3.5:
   2882   ///
   2883   ///   After constructing a global (or local static) object, that will require
   2884   ///   destruction on exit, a termination function is registered as follows:
   2885   ///
   2886   ///   extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
   2887   ///
   2888   ///   This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
   2889   ///   call f(p) when DSO d is unloaded, before all such termination calls
   2890   ///   registered before this one. It returns zero if registration is
   2891   ///   successful, nonzero on failure.
   2892 
   2893   // This pass will look for calls to __cxa_atexit where the function is trivial
   2894   // and remove them.
   2895   bool Changed = false;
   2896 
   2897   for (Function::use_iterator I = CXAAtExitFn->use_begin(),
   2898        E = CXAAtExitFn->use_end(); I != E;) {
   2899     // We're only interested in calls. Theoretically, we could handle invoke
   2900     // instructions as well, but neither llvm-gcc nor clang generate invokes
   2901     // to __cxa_atexit.
   2902     CallInst *CI = dyn_cast<CallInst>(*I++);
   2903     if (!CI)
   2904       continue;
   2905 
   2906     Function *DtorFn =
   2907       dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
   2908     if (!DtorFn)
   2909       continue;
   2910 
   2911     SmallPtrSet<const Function *, 8> CalledFunctions;
   2912     if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions))
   2913       continue;
   2914 
   2915     // Just remove the call.
   2916     CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
   2917     CI->eraseFromParent();
   2918 
   2919     ++NumCXXDtorsRemoved;
   2920 
   2921     Changed |= true;
   2922   }
   2923 
   2924   return Changed;
   2925 }
   2926 
   2927 bool GlobalOpt::runOnModule(Module &M) {
   2928   bool Changed = false;
   2929 
   2930   TD = getAnalysisIfAvailable<TargetData>();
   2931   TLI = &getAnalysis<TargetLibraryInfo>();
   2932 
   2933   // Try to find the llvm.globalctors list.
   2934   GlobalVariable *GlobalCtors = FindGlobalCtors(M);
   2935 
   2936   Function *CXAAtExitFn = FindCXAAtExit(M, TLI);
   2937 
   2938   bool LocalChange = true;
   2939   while (LocalChange) {
   2940     LocalChange = false;
   2941 
   2942     // Delete functions that are trivially dead, ccc -> fastcc
   2943     LocalChange |= OptimizeFunctions(M);
   2944 
   2945     // Optimize global_ctors list.
   2946     if (GlobalCtors)
   2947       LocalChange |= OptimizeGlobalCtorsList(GlobalCtors);
   2948 
   2949     // Optimize non-address-taken globals.
   2950     LocalChange |= OptimizeGlobalVars(M);
   2951 
   2952     // Resolve aliases, when possible.
   2953     LocalChange |= OptimizeGlobalAliases(M);
   2954 
   2955     // Try to remove trivial global destructors.
   2956     if (CXAAtExitFn)
   2957       LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
   2958 
   2959     Changed |= LocalChange;
   2960   }
   2961 
   2962   // TODO: Move all global ctors functions to the end of the module for code
   2963   // layout.
   2964 
   2965   return Changed;
   2966 }
   2967