Home | History | Annotate | Download | only in Utils
      1 //===- SplitModule.cpp - Split a module into partitions -------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the function llvm::SplitModule, which splits a module
     11 // into multiple linkable partitions. It can be used to implement parallel code
     12 // generation for link-time optimization.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #define DEBUG_TYPE "split-module"
     17 
     18 #include "llvm/Transforms/Utils/SplitModule.h"
     19 #include "llvm/ADT/EquivalenceClasses.h"
     20 #include "llvm/ADT/Hashing.h"
     21 #include "llvm/ADT/MapVector.h"
     22 #include "llvm/ADT/SetVector.h"
     23 #include "llvm/IR/Constants.h"
     24 #include "llvm/IR/Function.h"
     25 #include "llvm/IR/GlobalAlias.h"
     26 #include "llvm/IR/GlobalObject.h"
     27 #include "llvm/IR/GlobalValue.h"
     28 #include "llvm/IR/Module.h"
     29 #include "llvm/Support/Debug.h"
     30 #include "llvm/Support/MD5.h"
     31 #include "llvm/Support/raw_ostream.h"
     32 #include "llvm/Transforms/Utils/Cloning.h"
     33 #include <queue>
     34 
     35 using namespace llvm;
     36 
     37 namespace {
     38 typedef EquivalenceClasses<const GlobalValue *> ClusterMapType;
     39 typedef DenseMap<const Comdat *, const GlobalValue *> ComdatMembersType;
     40 typedef DenseMap<const GlobalValue *, unsigned> ClusterIDMapType;
     41 }
     42 
     43 static void addNonConstUser(ClusterMapType &GVtoClusterMap,
     44                             const GlobalValue *GV, const User *U) {
     45   assert((!isa<Constant>(U) || isa<GlobalValue>(U)) && "Bad user");
     46 
     47   if (const Instruction *I = dyn_cast<Instruction>(U)) {
     48     const GlobalValue *F = I->getParent()->getParent();
     49     GVtoClusterMap.unionSets(GV, F);
     50   } else if (isa<GlobalIndirectSymbol>(U) || isa<Function>(U) ||
     51              isa<GlobalVariable>(U)) {
     52     GVtoClusterMap.unionSets(GV, cast<GlobalValue>(U));
     53   } else {
     54     llvm_unreachable("Underimplemented use case");
     55   }
     56 }
     57 
     58 // Adds all GlobalValue users of V to the same cluster as GV.
     59 static void addAllGlobalValueUsers(ClusterMapType &GVtoClusterMap,
     60                                    const GlobalValue *GV, const Value *V) {
     61   for (auto *U : V->users()) {
     62     SmallVector<const User *, 4> Worklist;
     63     Worklist.push_back(U);
     64     while (!Worklist.empty()) {
     65       const User *UU = Worklist.pop_back_val();
     66       // For each constant that is not a GV (a pure const) recurse.
     67       if (isa<Constant>(UU) && !isa<GlobalValue>(UU)) {
     68         Worklist.append(UU->user_begin(), UU->user_end());
     69         continue;
     70       }
     71       addNonConstUser(GVtoClusterMap, GV, UU);
     72     }
     73   }
     74 }
     75 
     76 // Find partitions for module in the way that no locals need to be
     77 // globalized.
     78 // Try to balance pack those partitions into N files since this roughly equals
     79 // thread balancing for the backend codegen step.
     80 static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap,
     81                            unsigned N) {
     82   // At this point module should have the proper mix of globals and locals.
     83   // As we attempt to partition this module, we must not change any
     84   // locals to globals.
     85   DEBUG(dbgs() << "Partition module with (" << M->size() << ")functions\n");
     86   ClusterMapType GVtoClusterMap;
     87   ComdatMembersType ComdatMembers;
     88 
     89   auto recordGVSet = [&GVtoClusterMap, &ComdatMembers](GlobalValue &GV) {
     90     if (GV.isDeclaration())
     91       return;
     92 
     93     if (!GV.hasName())
     94       GV.setName("__llvmsplit_unnamed");
     95 
     96     // Comdat groups must not be partitioned. For comdat groups that contain
     97     // locals, record all their members here so we can keep them together.
     98     // Comdat groups that only contain external globals are already handled by
     99     // the MD5-based partitioning.
    100     if (const Comdat *C = GV.getComdat()) {
    101       auto &Member = ComdatMembers[C];
    102       if (Member)
    103         GVtoClusterMap.unionSets(Member, &GV);
    104       else
    105         Member = &GV;
    106     }
    107 
    108     // For aliases we should not separate them from their aliasees regardless
    109     // of linkage.
    110     if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(&GV)) {
    111       if (const GlobalObject *Base = GIS->getBaseObject())
    112         GVtoClusterMap.unionSets(&GV, Base);
    113     }
    114 
    115     if (const Function *F = dyn_cast<Function>(&GV)) {
    116       for (const BasicBlock &BB : *F) {
    117         BlockAddress *BA = BlockAddress::lookup(&BB);
    118         if (!BA || !BA->isConstantUsed())
    119           continue;
    120         addAllGlobalValueUsers(GVtoClusterMap, F, BA);
    121       }
    122     }
    123 
    124     if (GV.hasLocalLinkage())
    125       addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV);
    126   };
    127 
    128   std::for_each(M->begin(), M->end(), recordGVSet);
    129   std::for_each(M->global_begin(), M->global_end(), recordGVSet);
    130   std::for_each(M->alias_begin(), M->alias_end(), recordGVSet);
    131 
    132   // Assigned all GVs to merged clusters while balancing number of objects in
    133   // each.
    134   auto CompareClusters = [](const std::pair<unsigned, unsigned> &a,
    135                             const std::pair<unsigned, unsigned> &b) {
    136     if (a.second || b.second)
    137       return a.second > b.second;
    138     else
    139       return a.first > b.first;
    140   };
    141 
    142   std::priority_queue<std::pair<unsigned, unsigned>,
    143                       std::vector<std::pair<unsigned, unsigned>>,
    144                       decltype(CompareClusters)>
    145       BalancinQueue(CompareClusters);
    146   // Pre-populate priority queue with N slot blanks.
    147   for (unsigned i = 0; i < N; ++i)
    148     BalancinQueue.push(std::make_pair(i, 0));
    149 
    150   typedef std::pair<unsigned, ClusterMapType::iterator> SortType;
    151   SmallVector<SortType, 64> Sets;
    152   SmallPtrSet<const GlobalValue *, 32> Visited;
    153 
    154   // To guarantee determinism, we have to sort SCC according to size.
    155   // When size is the same, use leader's name.
    156   for (ClusterMapType::iterator I = GVtoClusterMap.begin(),
    157                                 E = GVtoClusterMap.end(); I != E; ++I)
    158     if (I->isLeader())
    159       Sets.push_back(
    160           std::make_pair(std::distance(GVtoClusterMap.member_begin(I),
    161                                        GVtoClusterMap.member_end()), I));
    162 
    163   std::sort(Sets.begin(), Sets.end(), [](const SortType &a, const SortType &b) {
    164     if (a.first == b.first)
    165       return a.second->getData()->getName() > b.second->getData()->getName();
    166     else
    167       return a.first > b.first;
    168   });
    169 
    170   for (auto &I : Sets) {
    171     unsigned CurrentClusterID = BalancinQueue.top().first;
    172     unsigned CurrentClusterSize = BalancinQueue.top().second;
    173     BalancinQueue.pop();
    174 
    175     DEBUG(dbgs() << "Root[" << CurrentClusterID << "] cluster_size(" << I.first
    176                  << ") ----> " << I.second->getData()->getName() << "\n");
    177 
    178     for (ClusterMapType::member_iterator MI =
    179              GVtoClusterMap.findLeader(I.second);
    180          MI != GVtoClusterMap.member_end(); ++MI) {
    181       if (!Visited.insert(*MI).second)
    182         continue;
    183       DEBUG(dbgs() << "----> " << (*MI)->getName()
    184                    << ((*MI)->hasLocalLinkage() ? " l " : " e ") << "\n");
    185       Visited.insert(*MI);
    186       ClusterIDMap[*MI] = CurrentClusterID;
    187       CurrentClusterSize++;
    188     }
    189     // Add this set size to the number of entries in this cluster.
    190     BalancinQueue.push(std::make_pair(CurrentClusterID, CurrentClusterSize));
    191   }
    192 }
    193 
    194 static void externalize(GlobalValue *GV) {
    195   if (GV->hasLocalLinkage()) {
    196     GV->setLinkage(GlobalValue::ExternalLinkage);
    197     GV->setVisibility(GlobalValue::HiddenVisibility);
    198   }
    199 
    200   // Unnamed entities must be named consistently between modules. setName will
    201   // give a distinct name to each such entity.
    202   if (!GV->hasName())
    203     GV->setName("__llvmsplit_unnamed");
    204 }
    205 
    206 // Returns whether GV should be in partition (0-based) I of N.
    207 static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) {
    208   if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(GV))
    209     if (const GlobalObject *Base = GIS->getBaseObject())
    210       GV = Base;
    211 
    212   StringRef Name;
    213   if (const Comdat *C = GV->getComdat())
    214     Name = C->getName();
    215   else
    216     Name = GV->getName();
    217 
    218   // Partition by MD5 hash. We only need a few bits for evenness as the number
    219   // of partitions will generally be in the 1-2 figure range; the low 16 bits
    220   // are enough.
    221   MD5 H;
    222   MD5::MD5Result R;
    223   H.update(Name);
    224   H.final(R);
    225   return (R[0] | (R[1] << 8)) % N == I;
    226 }
    227 
    228 void llvm::SplitModule(
    229     std::unique_ptr<Module> M, unsigned N,
    230     function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback,
    231     bool PreserveLocals) {
    232   if (!PreserveLocals) {
    233     for (Function &F : *M)
    234       externalize(&F);
    235     for (GlobalVariable &GV : M->globals())
    236       externalize(&GV);
    237     for (GlobalAlias &GA : M->aliases())
    238       externalize(&GA);
    239     for (GlobalIFunc &GIF : M->ifuncs())
    240       externalize(&GIF);
    241   }
    242 
    243   // This performs splitting without a need for externalization, which might not
    244   // always be possible.
    245   ClusterIDMapType ClusterIDMap;
    246   findPartitions(M.get(), ClusterIDMap, N);
    247 
    248   // FIXME: We should be able to reuse M as the last partition instead of
    249   // cloning it.
    250   for (unsigned I = 0; I < N; ++I) {
    251     ValueToValueMapTy VMap;
    252     std::unique_ptr<Module> MPart(
    253         CloneModule(M.get(), VMap, [&](const GlobalValue *GV) {
    254           if (ClusterIDMap.count(GV))
    255             return (ClusterIDMap[GV] == I);
    256           else
    257             return isInPartition(GV, I, N);
    258         }));
    259     if (I != 0)
    260       MPart->setModuleInlineAsm("");
    261     ModuleCallback(std::move(MPart));
    262   }
    263 }
    264