1 //===- CallGraph.h - Build a Module's call graph ----------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// 11 /// This file provides interfaces used to build and manipulate a call graph, 12 /// which is a very useful tool for interprocedural optimization. 13 /// 14 /// Every function in a module is represented as a node in the call graph. The 15 /// callgraph node keeps track of which functions are called by the function 16 /// corresponding to the node. 17 /// 18 /// A call graph may contain nodes where the function that they correspond to 19 /// is null. These 'external' nodes are used to represent control flow that is 20 /// not represented (or analyzable) in the module. In particular, this 21 /// analysis builds one external node such that: 22 /// 1. All functions in the module without internal linkage will have edges 23 /// from this external node, indicating that they could be called by 24 /// functions outside of the module. 25 /// 2. All functions whose address is used for something more than a direct 26 /// call, for example being stored into a memory location will also have 27 /// an edge from this external node. Since they may be called by an 28 /// unknown caller later, they must be tracked as such. 29 /// 30 /// There is a second external node added for calls that leave this module. 31 /// Functions have a call edge to the external node iff: 32 /// 1. The function is external, reflecting the fact that they could call 33 /// anything without internal linkage or that has its address taken. 34 /// 2. The function contains an indirect function call. 35 /// 36 /// As an extension in the future, there may be multiple nodes with a null 37 /// function. These will be used when we can prove (through pointer analysis) 38 /// that an indirect call site can call only a specific set of functions. 39 /// 40 /// Because of these properties, the CallGraph captures a conservative superset 41 /// of all of the caller-callee relationships, which is useful for 42 /// transformations. 43 /// 44 /// The CallGraph class also attempts to figure out what the root of the 45 /// CallGraph is, which it currently does by looking for a function named 46 /// 'main'. If no function named 'main' is found, the external node is used as 47 /// the entry node, reflecting the fact that any function without internal 48 /// linkage could be called into (which is common for libraries). 49 /// 50 //===----------------------------------------------------------------------===// 51 52 #ifndef LLVM_ANALYSIS_CALLGRAPH_H 53 #define LLVM_ANALYSIS_CALLGRAPH_H 54 55 #include "llvm/ADT/GraphTraits.h" 56 #include "llvm/ADT/STLExtras.h" 57 #include "llvm/IR/CallSite.h" 58 #include "llvm/IR/Function.h" 59 #include "llvm/IR/Intrinsics.h" 60 #include "llvm/IR/PassManager.h" 61 #include "llvm/IR/ValueHandle.h" 62 #include "llvm/Pass.h" 63 #include <map> 64 65 namespace llvm { 66 67 class Function; 68 class Module; 69 class CallGraphNode; 70 71 /// \brief The basic data container for the call graph of a \c Module of IR. 72 /// 73 /// This class exposes both the interface to the call graph for a module of IR. 74 /// 75 /// The core call graph itself can also be updated to reflect changes to the IR. 76 class CallGraph { 77 Module &M; 78 79 typedef std::map<const Function *, std::unique_ptr<CallGraphNode>> 80 FunctionMapTy; 81 82 /// \brief A map from \c Function* to \c CallGraphNode*. 83 FunctionMapTy FunctionMap; 84 85 /// \brief Root is root of the call graph, or the external node if a 'main' 86 /// function couldn't be found. 87 CallGraphNode *Root; 88 89 /// \brief This node has edges to all external functions and those internal 90 /// functions that have their address taken. 91 CallGraphNode *ExternalCallingNode; 92 93 /// \brief This node has edges to it from all functions making indirect calls 94 /// or calling an external function. 95 std::unique_ptr<CallGraphNode> CallsExternalNode; 96 97 /// \brief Replace the function represented by this node by another. 98 /// 99 /// This does not rescan the body of the function, so it is suitable when 100 /// splicing the body of one function to another while also updating all 101 /// callers from the old function to the new. 102 void spliceFunction(const Function *From, const Function *To); 103 104 /// \brief Add a function to the call graph, and link the node to all of the 105 /// functions that it calls. 106 void addToCallGraph(Function *F); 107 108 public: 109 explicit CallGraph(Module &M); 110 CallGraph(CallGraph &&Arg); 111 ~CallGraph(); 112 113 void print(raw_ostream &OS) const; 114 void dump() const; 115 116 typedef FunctionMapTy::iterator iterator; 117 typedef FunctionMapTy::const_iterator const_iterator; 118 119 /// \brief Returns the module the call graph corresponds to. 120 Module &getModule() const { return M; } 121 122 inline iterator begin() { return FunctionMap.begin(); } 123 inline iterator end() { return FunctionMap.end(); } 124 inline const_iterator begin() const { return FunctionMap.begin(); } 125 inline const_iterator end() const { return FunctionMap.end(); } 126 127 /// \brief Returns the call graph node for the provided function. 128 inline const CallGraphNode *operator[](const Function *F) const { 129 const_iterator I = FunctionMap.find(F); 130 assert(I != FunctionMap.end() && "Function not in callgraph!"); 131 return I->second.get(); 132 } 133 134 /// \brief Returns the call graph node for the provided function. 135 inline CallGraphNode *operator[](const Function *F) { 136 const_iterator I = FunctionMap.find(F); 137 assert(I != FunctionMap.end() && "Function not in callgraph!"); 138 return I->second.get(); 139 } 140 141 /// \brief Returns the \c CallGraphNode which is used to represent 142 /// undetermined calls into the callgraph. 143 CallGraphNode *getExternalCallingNode() const { return ExternalCallingNode; } 144 145 CallGraphNode *getCallsExternalNode() const { 146 return CallsExternalNode.get(); 147 } 148 149 //===--------------------------------------------------------------------- 150 // Functions to keep a call graph up to date with a function that has been 151 // modified. 152 // 153 154 /// \brief Unlink the function from this module, returning it. 155 /// 156 /// Because this removes the function from the module, the call graph node is 157 /// destroyed. This is only valid if the function does not call any other 158 /// functions (ie, there are no edges in it's CGN). The easiest way to do 159 /// this is to dropAllReferences before calling this. 160 Function *removeFunctionFromModule(CallGraphNode *CGN); 161 162 /// \brief Similar to operator[], but this will insert a new CallGraphNode for 163 /// \c F if one does not already exist. 164 CallGraphNode *getOrInsertFunction(const Function *F); 165 }; 166 167 /// \brief A node in the call graph for a module. 168 /// 169 /// Typically represents a function in the call graph. There are also special 170 /// "null" nodes used to represent theoretical entries in the call graph. 171 class CallGraphNode { 172 public: 173 /// \brief A pair of the calling instruction (a call or invoke) 174 /// and the call graph node being called. 175 typedef std::pair<WeakVH, CallGraphNode *> CallRecord; 176 177 public: 178 typedef std::vector<CallRecord> CalledFunctionsVector; 179 180 /// \brief Creates a node for the specified function. 181 inline CallGraphNode(Function *F) : F(F), NumReferences(0) {} 182 183 ~CallGraphNode() { 184 assert(NumReferences == 0 && "Node deleted while references remain"); 185 } 186 187 typedef std::vector<CallRecord>::iterator iterator; 188 typedef std::vector<CallRecord>::const_iterator const_iterator; 189 190 /// \brief Returns the function that this call graph node represents. 191 Function *getFunction() const { return F; } 192 193 inline iterator begin() { return CalledFunctions.begin(); } 194 inline iterator end() { return CalledFunctions.end(); } 195 inline const_iterator begin() const { return CalledFunctions.begin(); } 196 inline const_iterator end() const { return CalledFunctions.end(); } 197 inline bool empty() const { return CalledFunctions.empty(); } 198 inline unsigned size() const { return (unsigned)CalledFunctions.size(); } 199 200 /// \brief Returns the number of other CallGraphNodes in this CallGraph that 201 /// reference this node in their callee list. 202 unsigned getNumReferences() const { return NumReferences; } 203 204 /// \brief Returns the i'th called function. 205 CallGraphNode *operator[](unsigned i) const { 206 assert(i < CalledFunctions.size() && "Invalid index"); 207 return CalledFunctions[i].second; 208 } 209 210 /// \brief Print out this call graph node. 211 void dump() const; 212 void print(raw_ostream &OS) const; 213 214 //===--------------------------------------------------------------------- 215 // Methods to keep a call graph up to date with a function that has been 216 // modified 217 // 218 219 /// \brief Removes all edges from this CallGraphNode to any functions it 220 /// calls. 221 void removeAllCalledFunctions() { 222 while (!CalledFunctions.empty()) { 223 CalledFunctions.back().second->DropRef(); 224 CalledFunctions.pop_back(); 225 } 226 } 227 228 /// \brief Moves all the callee information from N to this node. 229 void stealCalledFunctionsFrom(CallGraphNode *N) { 230 assert(CalledFunctions.empty() && 231 "Cannot steal callsite information if I already have some"); 232 std::swap(CalledFunctions, N->CalledFunctions); 233 } 234 235 /// \brief Adds a function to the list of functions called by this one. 236 void addCalledFunction(CallSite CS, CallGraphNode *M) { 237 assert(!CS.getInstruction() || !CS.getCalledFunction() || 238 !CS.getCalledFunction()->isIntrinsic() || 239 !Intrinsic::isLeaf(CS.getCalledFunction()->getIntrinsicID())); 240 CalledFunctions.emplace_back(CS.getInstruction(), M); 241 M->AddRef(); 242 } 243 244 void removeCallEdge(iterator I) { 245 I->second->DropRef(); 246 *I = CalledFunctions.back(); 247 CalledFunctions.pop_back(); 248 } 249 250 /// \brief Removes the edge in the node for the specified call site. 251 /// 252 /// Note that this method takes linear time, so it should be used sparingly. 253 void removeCallEdgeFor(CallSite CS); 254 255 /// \brief Removes all call edges from this node to the specified callee 256 /// function. 257 /// 258 /// This takes more time to execute than removeCallEdgeTo, so it should not 259 /// be used unless necessary. 260 void removeAnyCallEdgeTo(CallGraphNode *Callee); 261 262 /// \brief Removes one edge associated with a null callsite from this node to 263 /// the specified callee function. 264 void removeOneAbstractEdgeTo(CallGraphNode *Callee); 265 266 /// \brief Replaces the edge in the node for the specified call site with a 267 /// new one. 268 /// 269 /// Note that this method takes linear time, so it should be used sparingly. 270 void replaceCallEdge(CallSite CS, CallSite NewCS, CallGraphNode *NewNode); 271 272 private: 273 friend class CallGraph; 274 275 AssertingVH<Function> F; 276 277 std::vector<CallRecord> CalledFunctions; 278 279 /// \brief The number of times that this CallGraphNode occurs in the 280 /// CalledFunctions array of this or other CallGraphNodes. 281 unsigned NumReferences; 282 283 CallGraphNode(const CallGraphNode &) = delete; 284 void operator=(const CallGraphNode &) = delete; 285 286 void DropRef() { --NumReferences; } 287 void AddRef() { ++NumReferences; } 288 289 /// \brief A special function that should only be used by the CallGraph class. 290 void allReferencesDropped() { NumReferences = 0; } 291 }; 292 293 /// \brief An analysis pass to compute the \c CallGraph for a \c Module. 294 /// 295 /// This class implements the concept of an analysis pass used by the \c 296 /// ModuleAnalysisManager to run an analysis over a module and cache the 297 /// resulting data. 298 class CallGraphAnalysis : public AnalysisInfoMixin<CallGraphAnalysis> { 299 friend AnalysisInfoMixin<CallGraphAnalysis>; 300 static char PassID; 301 302 public: 303 /// \brief A formulaic typedef to inform clients of the result type. 304 typedef CallGraph Result; 305 306 /// \brief Compute the \c CallGraph for the module \c M. 307 /// 308 /// The real work here is done in the \c CallGraph constructor. 309 CallGraph run(Module &M, ModuleAnalysisManager &) { return CallGraph(M); } 310 }; 311 312 /// \brief Printer pass for the \c CallGraphAnalysis results. 313 class CallGraphPrinterPass : public PassInfoMixin<CallGraphPrinterPass> { 314 raw_ostream &OS; 315 316 public: 317 explicit CallGraphPrinterPass(raw_ostream &OS) : OS(OS) {} 318 PreservedAnalyses run(Module &M, AnalysisManager<Module> &AM); 319 }; 320 321 /// \brief The \c ModulePass which wraps up a \c CallGraph and the logic to 322 /// build it. 323 /// 324 /// This class exposes both the interface to the call graph container and the 325 /// module pass which runs over a module of IR and produces the call graph. The 326 /// call graph interface is entirelly a wrapper around a \c CallGraph object 327 /// which is stored internally for each module. 328 class CallGraphWrapperPass : public ModulePass { 329 std::unique_ptr<CallGraph> G; 330 331 public: 332 static char ID; // Class identification, replacement for typeinfo 333 334 CallGraphWrapperPass(); 335 ~CallGraphWrapperPass() override; 336 337 /// \brief The internal \c CallGraph around which the rest of this interface 338 /// is wrapped. 339 const CallGraph &getCallGraph() const { return *G; } 340 CallGraph &getCallGraph() { return *G; } 341 342 typedef CallGraph::iterator iterator; 343 typedef CallGraph::const_iterator const_iterator; 344 345 /// \brief Returns the module the call graph corresponds to. 346 Module &getModule() const { return G->getModule(); } 347 348 inline iterator begin() { return G->begin(); } 349 inline iterator end() { return G->end(); } 350 inline const_iterator begin() const { return G->begin(); } 351 inline const_iterator end() const { return G->end(); } 352 353 /// \brief Returns the call graph node for the provided function. 354 inline const CallGraphNode *operator[](const Function *F) const { 355 return (*G)[F]; 356 } 357 358 /// \brief Returns the call graph node for the provided function. 359 inline CallGraphNode *operator[](const Function *F) { return (*G)[F]; } 360 361 /// \brief Returns the \c CallGraphNode which is used to represent 362 /// undetermined calls into the callgraph. 363 CallGraphNode *getExternalCallingNode() const { 364 return G->getExternalCallingNode(); 365 } 366 367 CallGraphNode *getCallsExternalNode() const { 368 return G->getCallsExternalNode(); 369 } 370 371 //===--------------------------------------------------------------------- 372 // Functions to keep a call graph up to date with a function that has been 373 // modified. 374 // 375 376 /// \brief Unlink the function from this module, returning it. 377 /// 378 /// Because this removes the function from the module, the call graph node is 379 /// destroyed. This is only valid if the function does not call any other 380 /// functions (ie, there are no edges in it's CGN). The easiest way to do 381 /// this is to dropAllReferences before calling this. 382 Function *removeFunctionFromModule(CallGraphNode *CGN) { 383 return G->removeFunctionFromModule(CGN); 384 } 385 386 /// \brief Similar to operator[], but this will insert a new CallGraphNode for 387 /// \c F if one does not already exist. 388 CallGraphNode *getOrInsertFunction(const Function *F) { 389 return G->getOrInsertFunction(F); 390 } 391 392 //===--------------------------------------------------------------------- 393 // Implementation of the ModulePass interface needed here. 394 // 395 396 void getAnalysisUsage(AnalysisUsage &AU) const override; 397 bool runOnModule(Module &M) override; 398 void releaseMemory() override; 399 400 void print(raw_ostream &o, const Module *) const override; 401 void dump() const; 402 }; 403 404 //===----------------------------------------------------------------------===// 405 // GraphTraits specializations for call graphs so that they can be treated as 406 // graphs by the generic graph algorithms. 407 // 408 409 // Provide graph traits for tranversing call graphs using standard graph 410 // traversals. 411 template <> struct GraphTraits<CallGraphNode *> { 412 typedef CallGraphNode NodeType; 413 414 typedef CallGraphNode::CallRecord CGNPairTy; 415 typedef std::pointer_to_unary_function<CGNPairTy, CallGraphNode *> 416 CGNDerefFun; 417 418 static NodeType *getEntryNode(CallGraphNode *CGN) { return CGN; } 419 420 typedef mapped_iterator<NodeType::iterator, CGNDerefFun> ChildIteratorType; 421 422 static inline ChildIteratorType child_begin(NodeType *N) { 423 return map_iterator(N->begin(), CGNDerefFun(CGNDeref)); 424 } 425 static inline ChildIteratorType child_end(NodeType *N) { 426 return map_iterator(N->end(), CGNDerefFun(CGNDeref)); 427 } 428 429 static CallGraphNode *CGNDeref(CGNPairTy P) { return P.second; } 430 }; 431 432 template <> struct GraphTraits<const CallGraphNode *> { 433 typedef const CallGraphNode NodeType; 434 435 typedef CallGraphNode::CallRecord CGNPairTy; 436 typedef std::pointer_to_unary_function<CGNPairTy, const CallGraphNode *> 437 CGNDerefFun; 438 439 static NodeType *getEntryNode(const CallGraphNode *CGN) { return CGN; } 440 441 typedef mapped_iterator<NodeType::const_iterator, CGNDerefFun> 442 ChildIteratorType; 443 444 static inline ChildIteratorType child_begin(NodeType *N) { 445 return map_iterator(N->begin(), CGNDerefFun(CGNDeref)); 446 } 447 static inline ChildIteratorType child_end(NodeType *N) { 448 return map_iterator(N->end(), CGNDerefFun(CGNDeref)); 449 } 450 451 static const CallGraphNode *CGNDeref(CGNPairTy P) { return P.second; } 452 }; 453 454 template <> 455 struct GraphTraits<CallGraph *> : public GraphTraits<CallGraphNode *> { 456 static NodeType *getEntryNode(CallGraph *CGN) { 457 return CGN->getExternalCallingNode(); // Start at the external node! 458 } 459 typedef std::pair<const Function *const, std::unique_ptr<CallGraphNode>> 460 PairTy; 461 typedef std::pointer_to_unary_function<const PairTy &, CallGraphNode &> 462 DerefFun; 463 464 // nodes_iterator/begin/end - Allow iteration over all nodes in the graph 465 typedef mapped_iterator<CallGraph::iterator, DerefFun> nodes_iterator; 466 static nodes_iterator nodes_begin(CallGraph *CG) { 467 return map_iterator(CG->begin(), DerefFun(CGdereference)); 468 } 469 static nodes_iterator nodes_end(CallGraph *CG) { 470 return map_iterator(CG->end(), DerefFun(CGdereference)); 471 } 472 473 static CallGraphNode &CGdereference(const PairTy &P) { return *P.second; } 474 }; 475 476 template <> 477 struct GraphTraits<const CallGraph *> : public GraphTraits< 478 const CallGraphNode *> { 479 static NodeType *getEntryNode(const CallGraph *CGN) { 480 return CGN->getExternalCallingNode(); // Start at the external node! 481 } 482 typedef std::pair<const Function *const, std::unique_ptr<CallGraphNode>> 483 PairTy; 484 typedef std::pointer_to_unary_function<const PairTy &, const CallGraphNode &> 485 DerefFun; 486 487 // nodes_iterator/begin/end - Allow iteration over all nodes in the graph 488 typedef mapped_iterator<CallGraph::const_iterator, DerefFun> nodes_iterator; 489 static nodes_iterator nodes_begin(const CallGraph *CG) { 490 return map_iterator(CG->begin(), DerefFun(CGdereference)); 491 } 492 static nodes_iterator nodes_end(const CallGraph *CG) { 493 return map_iterator(CG->end(), DerefFun(CGdereference)); 494 } 495 496 static const CallGraphNode &CGdereference(const PairTy &P) { 497 return *P.second; 498 } 499 }; 500 501 } // End llvm namespace 502 503 #endif 504