Home | History | Annotate | Download | only in Analysis
      1 //======- CFLGraph.h - Abstract stratified sets implementation. --------======//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 /// \file
     10 /// This file defines CFLGraph, an auxiliary data structure used by CFL-based
     11 /// alias analysis.
     12 ///
     13 //===----------------------------------------------------------------------===//
     14 
     15 #ifndef LLVM_ANALYSIS_CFLGRAPH_H
     16 #define LLVM_ANALYSIS_CFLGRAPH_H
     17 
     18 #include "AliasAnalysisSummary.h"
     19 #include "llvm/ADT/SmallPtrSet.h"
     20 #include "llvm/Analysis/MemoryBuiltins.h"
     21 #include "llvm/IR/InstVisitor.h"
     22 #include "llvm/IR/Instructions.h"
     23 
     24 namespace llvm {
     25 namespace cflaa {
     26 
     27 /// \brief The Program Expression Graph (PEG) of CFL analysis
     28 /// CFLGraph is auxiliary data structure used by CFL-based alias analysis to
     29 /// describe flow-insensitive pointer-related behaviors. Given an LLVM function,
     30 /// the main purpose of this graph is to abstract away unrelated facts and
     31 /// translate the rest into a form that can be easily digested by CFL analyses.
     32 /// Each Node in the graph is an InstantiatedValue, and each edge represent a
     33 /// pointer assignment between InstantiatedValue. Pointer
     34 /// references/dereferences are not explicitly stored in the graph: we
     35 /// implicitly assume that for each node (X, I) it has a dereference edge to (X,
     36 /// I+1) and a reference edge to (X, I-1).
     37 class CFLGraph {
     38 public:
     39   typedef InstantiatedValue Node;
     40 
     41   struct Edge {
     42     Node Other;
     43   };
     44 
     45   typedef std::vector<Edge> EdgeList;
     46 
     47   struct NodeInfo {
     48     EdgeList Edges;
     49     AliasAttrs Attr;
     50   };
     51 
     52   class ValueInfo {
     53     std::vector<NodeInfo> Levels;
     54 
     55   public:
     56     bool addNodeToLevel(unsigned Level) {
     57       auto NumLevels = Levels.size();
     58       if (NumLevels > Level)
     59         return false;
     60       Levels.resize(Level + 1);
     61       return true;
     62     }
     63 
     64     NodeInfo &getNodeInfoAtLevel(unsigned Level) {
     65       assert(Level < Levels.size());
     66       return Levels[Level];
     67     }
     68     const NodeInfo &getNodeInfoAtLevel(unsigned Level) const {
     69       assert(Level < Levels.size());
     70       return Levels[Level];
     71     }
     72 
     73     unsigned getNumLevels() const { return Levels.size(); }
     74   };
     75 
     76 private:
     77   typedef DenseMap<Value *, ValueInfo> ValueMap;
     78   ValueMap ValueImpls;
     79 
     80   const NodeInfo *getNode(Node N) const {
     81     auto Itr = ValueImpls.find(N.Val);
     82     if (Itr == ValueImpls.end() || Itr->second.getNumLevels() <= N.DerefLevel)
     83       return nullptr;
     84     return &Itr->second.getNodeInfoAtLevel(N.DerefLevel);
     85   }
     86   NodeInfo *getNode(Node N) {
     87     auto Itr = ValueImpls.find(N.Val);
     88     if (Itr == ValueImpls.end() || Itr->second.getNumLevels() <= N.DerefLevel)
     89       return nullptr;
     90     return &Itr->second.getNodeInfoAtLevel(N.DerefLevel);
     91   }
     92 
     93 public:
     94   typedef ValueMap::const_iterator const_value_iterator;
     95 
     96   bool addNode(Node N, AliasAttrs Attr = AliasAttrs()) {
     97     assert(N.Val != nullptr);
     98     auto &ValInfo = ValueImpls[N.Val];
     99     auto Changed = ValInfo.addNodeToLevel(N.DerefLevel);
    100     ValInfo.getNodeInfoAtLevel(N.DerefLevel).Attr |= Attr;
    101     return Changed;
    102   }
    103 
    104   void addAttr(Node N, AliasAttrs Attr) {
    105     auto *Info = getNode(N);
    106     assert(Info != nullptr);
    107     Info->Attr |= Attr;
    108   }
    109 
    110   void addEdge(Node From, Node To, int64_t Offset = 0) {
    111     assert(getNode(To) != nullptr);
    112 
    113     auto *FromInfo = getNode(From);
    114     assert(FromInfo != nullptr);
    115     FromInfo->Edges.push_back(Edge{To});
    116   }
    117 
    118   AliasAttrs attrFor(Node N) const {
    119     auto *Info = getNode(N);
    120     assert(Info != nullptr);
    121     return Info->Attr;
    122   }
    123 
    124   iterator_range<const_value_iterator> value_mappings() const {
    125     return make_range<const_value_iterator>(ValueImpls.begin(),
    126                                             ValueImpls.end());
    127   }
    128 };
    129 
    130 ///\brief A builder class used to create CFLGraph instance from a given function
    131 /// The CFL-AA that uses this builder must provide its own type as a template
    132 /// argument. This is necessary for interprocedural processing: CFLGraphBuilder
    133 /// needs a way of obtaining the summary of other functions when callinsts are
    134 /// encountered.
    135 /// As a result, we expect the said CFL-AA to expose a getAliasSummary() public
    136 /// member function that takes a Function& and returns the corresponding summary
    137 /// as a const AliasSummary*.
    138 template <typename CFLAA> class CFLGraphBuilder {
    139   // Input of the builder
    140   CFLAA &Analysis;
    141   const TargetLibraryInfo &TLI;
    142 
    143   // Output of the builder
    144   CFLGraph Graph;
    145   SmallVector<Value *, 4> ReturnedValues;
    146 
    147   // Helper class
    148   /// Gets the edges our graph should have, based on an Instruction*
    149   class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> {
    150     CFLAA &AA;
    151     const TargetLibraryInfo &TLI;
    152 
    153     CFLGraph &Graph;
    154     SmallVectorImpl<Value *> &ReturnValues;
    155 
    156     static bool hasUsefulEdges(ConstantExpr *CE) {
    157       // ConstantExpr doesn't have terminators, invokes, or fences, so only
    158       // needs
    159       // to check for compares.
    160       return CE->getOpcode() != Instruction::ICmp &&
    161              CE->getOpcode() != Instruction::FCmp;
    162     }
    163 
    164     // Returns possible functions called by CS into the given SmallVectorImpl.
    165     // Returns true if targets found, false otherwise.
    166     static bool getPossibleTargets(CallSite CS,
    167                                    SmallVectorImpl<Function *> &Output) {
    168       if (auto *Fn = CS.getCalledFunction()) {
    169         Output.push_back(Fn);
    170         return true;
    171       }
    172 
    173       // TODO: If the call is indirect, we might be able to enumerate all
    174       // potential
    175       // targets of the call and return them, rather than just failing.
    176       return false;
    177     }
    178 
    179     void addNode(Value *Val, AliasAttrs Attr = AliasAttrs()) {
    180       assert(Val != nullptr && Val->getType()->isPointerTy());
    181       if (auto GVal = dyn_cast<GlobalValue>(Val)) {
    182         if (Graph.addNode(InstantiatedValue{GVal, 0},
    183                           getGlobalOrArgAttrFromValue(*GVal)))
    184           Graph.addNode(InstantiatedValue{GVal, 1}, getAttrUnknown());
    185       } else if (auto CExpr = dyn_cast<ConstantExpr>(Val)) {
    186         if (hasUsefulEdges(CExpr)) {
    187           if (Graph.addNode(InstantiatedValue{CExpr, 0}))
    188             visitConstantExpr(CExpr);
    189         }
    190       } else
    191         Graph.addNode(InstantiatedValue{Val, 0}, Attr);
    192     }
    193 
    194     void addAssignEdge(Value *From, Value *To, int64_t Offset = 0) {
    195       assert(From != nullptr && To != nullptr);
    196       if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy())
    197         return;
    198       addNode(From);
    199       if (To != From) {
    200         addNode(To);
    201         Graph.addEdge(InstantiatedValue{From, 0}, InstantiatedValue{To, 0},
    202                       Offset);
    203       }
    204     }
    205 
    206     void addDerefEdge(Value *From, Value *To) {
    207       assert(From != nullptr && To != nullptr);
    208       if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy())
    209         return;
    210       addNode(From);
    211       addNode(To);
    212       Graph.addNode(InstantiatedValue{From, 1});
    213       Graph.addEdge(InstantiatedValue{From, 1}, InstantiatedValue{To, 0});
    214     }
    215 
    216   public:
    217     GetEdgesVisitor(CFLGraphBuilder &Builder)
    218         : AA(Builder.Analysis), TLI(Builder.TLI), Graph(Builder.Graph),
    219           ReturnValues(Builder.ReturnedValues) {}
    220 
    221     void visitInstruction(Instruction &) {
    222       llvm_unreachable("Unsupported instruction encountered");
    223     }
    224 
    225     void visitReturnInst(ReturnInst &Inst) {
    226       if (auto RetVal = Inst.getReturnValue()) {
    227         if (RetVal->getType()->isPointerTy()) {
    228           addNode(RetVal);
    229           ReturnValues.push_back(RetVal);
    230         }
    231       }
    232     }
    233 
    234     void visitPtrToIntInst(PtrToIntInst &Inst) {
    235       auto *Ptr = Inst.getOperand(0);
    236       addNode(Ptr, getAttrEscaped());
    237     }
    238 
    239     void visitIntToPtrInst(IntToPtrInst &Inst) {
    240       auto *Ptr = &Inst;
    241       addNode(Ptr, getAttrUnknown());
    242     }
    243 
    244     void visitCastInst(CastInst &Inst) {
    245       auto *Src = Inst.getOperand(0);
    246       addAssignEdge(Src, &Inst);
    247     }
    248 
    249     void visitBinaryOperator(BinaryOperator &Inst) {
    250       auto *Op1 = Inst.getOperand(0);
    251       auto *Op2 = Inst.getOperand(1);
    252       addAssignEdge(Op1, &Inst);
    253       addAssignEdge(Op2, &Inst);
    254     }
    255 
    256     void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) {
    257       auto *Ptr = Inst.getPointerOperand();
    258       auto *Val = Inst.getNewValOperand();
    259       addDerefEdge(Ptr, Val);
    260     }
    261 
    262     void visitAtomicRMWInst(AtomicRMWInst &Inst) {
    263       auto *Ptr = Inst.getPointerOperand();
    264       auto *Val = Inst.getValOperand();
    265       addDerefEdge(Ptr, Val);
    266     }
    267 
    268     void visitPHINode(PHINode &Inst) {
    269       for (Value *Val : Inst.incoming_values())
    270         addAssignEdge(Val, &Inst);
    271     }
    272 
    273     void visitGetElementPtrInst(GetElementPtrInst &Inst) {
    274       auto *Op = Inst.getPointerOperand();
    275       addAssignEdge(Op, &Inst);
    276     }
    277 
    278     void visitSelectInst(SelectInst &Inst) {
    279       // Condition is not processed here (The actual statement producing
    280       // the condition result is processed elsewhere). For select, the
    281       // condition is evaluated, but not loaded, stored, or assigned
    282       // simply as a result of being the condition of a select.
    283 
    284       auto *TrueVal = Inst.getTrueValue();
    285       auto *FalseVal = Inst.getFalseValue();
    286       addAssignEdge(TrueVal, &Inst);
    287       addAssignEdge(FalseVal, &Inst);
    288     }
    289 
    290     void visitAllocaInst(AllocaInst &Inst) { addNode(&Inst); }
    291 
    292     void visitLoadInst(LoadInst &Inst) {
    293       auto *Ptr = Inst.getPointerOperand();
    294       auto *Val = &Inst;
    295       addDerefEdge(Ptr, Val);
    296     }
    297 
    298     void visitStoreInst(StoreInst &Inst) {
    299       auto *Ptr = Inst.getPointerOperand();
    300       auto *Val = Inst.getValueOperand();
    301       addDerefEdge(Ptr, Val);
    302     }
    303 
    304     void visitVAArgInst(VAArgInst &Inst) {
    305       // We can't fully model va_arg here. For *Ptr = Inst.getOperand(0), it
    306       // does
    307       // two things:
    308       //  1. Loads a value from *((T*)*Ptr).
    309       //  2. Increments (stores to) *Ptr by some target-specific amount.
    310       // For now, we'll handle this like a landingpad instruction (by placing
    311       // the
    312       // result in its own group, and having that group alias externals).
    313       addNode(&Inst, getAttrUnknown());
    314     }
    315 
    316     static bool isFunctionExternal(Function *Fn) {
    317       return !Fn->hasExactDefinition();
    318     }
    319 
    320     bool tryInterproceduralAnalysis(CallSite CS,
    321                                     const SmallVectorImpl<Function *> &Fns) {
    322       assert(Fns.size() > 0);
    323 
    324       if (CS.arg_size() > MaxSupportedArgsInSummary)
    325         return false;
    326 
    327       // Exit early if we'll fail anyway
    328       for (auto *Fn : Fns) {
    329         if (isFunctionExternal(Fn) || Fn->isVarArg())
    330           return false;
    331         // Fail if the caller does not provide enough arguments
    332         assert(Fn->arg_size() <= CS.arg_size());
    333         if (!AA.getAliasSummary(*Fn))
    334           return false;
    335       }
    336 
    337       for (auto *Fn : Fns) {
    338         auto Summary = AA.getAliasSummary(*Fn);
    339         assert(Summary != nullptr);
    340 
    341         auto &RetParamRelations = Summary->RetParamRelations;
    342         for (auto &Relation : RetParamRelations) {
    343           auto IRelation = instantiateExternalRelation(Relation, CS);
    344           if (IRelation.hasValue()) {
    345             Graph.addNode(IRelation->From);
    346             Graph.addNode(IRelation->To);
    347             Graph.addEdge(IRelation->From, IRelation->To);
    348           }
    349         }
    350 
    351         auto &RetParamAttributes = Summary->RetParamAttributes;
    352         for (auto &Attribute : RetParamAttributes) {
    353           auto IAttr = instantiateExternalAttribute(Attribute, CS);
    354           if (IAttr.hasValue())
    355             Graph.addNode(IAttr->IValue, IAttr->Attr);
    356         }
    357       }
    358 
    359       return true;
    360     }
    361 
    362     void visitCallSite(CallSite CS) {
    363       auto Inst = CS.getInstruction();
    364 
    365       // Make sure all arguments and return value are added to the graph first
    366       for (Value *V : CS.args())
    367         if (V->getType()->isPointerTy())
    368           addNode(V);
    369       if (Inst->getType()->isPointerTy())
    370         addNode(Inst);
    371 
    372       // Check if Inst is a call to a library function that
    373       // allocates/deallocates
    374       // on the heap. Those kinds of functions do not introduce any aliases.
    375       // TODO: address other common library functions such as realloc(),
    376       // strdup(),
    377       // etc.
    378       if (isMallocLikeFn(Inst, &TLI) || isCallocLikeFn(Inst, &TLI) ||
    379           isFreeCall(Inst, &TLI))
    380         return;
    381 
    382       // TODO: Add support for noalias args/all the other fun function
    383       // attributes
    384       // that we can tack on.
    385       SmallVector<Function *, 4> Targets;
    386       if (getPossibleTargets(CS, Targets))
    387         if (tryInterproceduralAnalysis(CS, Targets))
    388           return;
    389 
    390       // Because the function is opaque, we need to note that anything
    391       // could have happened to the arguments (unless the function is marked
    392       // readonly or readnone), and that the result could alias just about
    393       // anything, too (unless the result is marked noalias).
    394       if (!CS.onlyReadsMemory())
    395         for (Value *V : CS.args()) {
    396           if (V->getType()->isPointerTy()) {
    397             // The argument itself escapes.
    398             Graph.addAttr(InstantiatedValue{V, 0}, getAttrEscaped());
    399             // The fate of argument memory is unknown. Note that since
    400             // AliasAttrs is transitive with respect to dereference, we only
    401             // need to specify it for the first-level memory.
    402             Graph.addNode(InstantiatedValue{V, 1}, getAttrUnknown());
    403           }
    404         }
    405 
    406       if (Inst->getType()->isPointerTy()) {
    407         auto *Fn = CS.getCalledFunction();
    408         if (Fn == nullptr || !Fn->doesNotAlias(0))
    409           // No need to call addNode() since we've added Inst at the
    410           // beginning of this function and we know it is not a global.
    411           Graph.addAttr(InstantiatedValue{Inst, 0}, getAttrUnknown());
    412       }
    413     }
    414 
    415     /// Because vectors/aggregates are immutable and unaddressable, there's
    416     /// nothing we can do to coax a value out of them, other than calling
    417     /// Extract{Element,Value}. We can effectively treat them as pointers to
    418     /// arbitrary memory locations we can store in and load from.
    419     void visitExtractElementInst(ExtractElementInst &Inst) {
    420       auto *Ptr = Inst.getVectorOperand();
    421       auto *Val = &Inst;
    422       addDerefEdge(Ptr, Val);
    423     }
    424 
    425     void visitInsertElementInst(InsertElementInst &Inst) {
    426       auto *Vec = Inst.getOperand(0);
    427       auto *Val = Inst.getOperand(1);
    428       addAssignEdge(Vec, &Inst);
    429       addDerefEdge(&Inst, Val);
    430     }
    431 
    432     void visitLandingPadInst(LandingPadInst &Inst) {
    433       // Exceptions come from "nowhere", from our analysis' perspective.
    434       // So we place the instruction its own group, noting that said group may
    435       // alias externals
    436       addNode(&Inst, getAttrUnknown());
    437     }
    438 
    439     void visitInsertValueInst(InsertValueInst &Inst) {
    440       auto *Agg = Inst.getOperand(0);
    441       auto *Val = Inst.getOperand(1);
    442       addAssignEdge(Agg, &Inst);
    443       addDerefEdge(&Inst, Val);
    444     }
    445 
    446     void visitExtractValueInst(ExtractValueInst &Inst) {
    447       auto *Ptr = Inst.getAggregateOperand();
    448       addDerefEdge(Ptr, &Inst);
    449     }
    450 
    451     void visitShuffleVectorInst(ShuffleVectorInst &Inst) {
    452       auto *From1 = Inst.getOperand(0);
    453       auto *From2 = Inst.getOperand(1);
    454       addAssignEdge(From1, &Inst);
    455       addAssignEdge(From2, &Inst);
    456     }
    457 
    458     void visitConstantExpr(ConstantExpr *CE) {
    459       switch (CE->getOpcode()) {
    460       default:
    461         llvm_unreachable("Unknown instruction type encountered!");
    462 // Build the switch statement using the Instruction.def file.
    463 #define HANDLE_INST(NUM, OPCODE, CLASS)                                        \
    464   case Instruction::OPCODE:                                                    \
    465     this->visit##OPCODE(*(CLASS *)CE);                                         \
    466     break;
    467 #include "llvm/IR/Instruction.def"
    468       }
    469     }
    470   };
    471 
    472   // Helper functions
    473 
    474   // Determines whether or not we an instruction is useless to us (e.g.
    475   // FenceInst)
    476   static bool hasUsefulEdges(Instruction *Inst) {
    477     bool IsNonInvokeRetTerminator = isa<TerminatorInst>(Inst) &&
    478                                     !isa<InvokeInst>(Inst) &&
    479                                     !isa<ReturnInst>(Inst);
    480     return !isa<CmpInst>(Inst) && !isa<FenceInst>(Inst) &&
    481            !IsNonInvokeRetTerminator;
    482   }
    483 
    484   void addArgumentToGraph(Argument &Arg) {
    485     if (Arg.getType()->isPointerTy()) {
    486       Graph.addNode(InstantiatedValue{&Arg, 0},
    487                     getGlobalOrArgAttrFromValue(Arg));
    488       // Pointees of a formal parameter is known to the caller
    489       Graph.addNode(InstantiatedValue{&Arg, 1}, getAttrCaller());
    490     }
    491   }
    492 
    493   // Given an Instruction, this will add it to the graph, along with any
    494   // Instructions that are potentially only available from said Instruction
    495   // For example, given the following line:
    496   //   %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2
    497   // addInstructionToGraph would add both the `load` and `getelementptr`
    498   // instructions to the graph appropriately.
    499   void addInstructionToGraph(GetEdgesVisitor &Visitor, Instruction &Inst) {
    500     if (!hasUsefulEdges(&Inst))
    501       return;
    502 
    503     Visitor.visit(Inst);
    504   }
    505 
    506   // Builds the graph needed for constructing the StratifiedSets for the given
    507   // function
    508   void buildGraphFrom(Function &Fn) {
    509     GetEdgesVisitor Visitor(*this);
    510 
    511     for (auto &Bb : Fn.getBasicBlockList())
    512       for (auto &Inst : Bb.getInstList())
    513         addInstructionToGraph(Visitor, Inst);
    514 
    515     for (auto &Arg : Fn.args())
    516       addArgumentToGraph(Arg);
    517   }
    518 
    519 public:
    520   CFLGraphBuilder(CFLAA &Analysis, const TargetLibraryInfo &TLI, Function &Fn)
    521       : Analysis(Analysis), TLI(TLI) {
    522     buildGraphFrom(Fn);
    523   }
    524 
    525   const CFLGraph &getCFLGraph() const { return Graph; }
    526   const SmallVector<Value *, 4> &getReturnValues() const {
    527     return ReturnedValues;
    528   }
    529 };
    530 }
    531 }
    532 
    533 #endif
    534