1 //===- NaryReassociate.h - Reassociate n-ary expressions --------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass reassociates n-ary add expressions and eliminates the redundancy 11 // exposed by the reassociation. 12 // 13 // A motivating example: 14 // 15 // void foo(int a, int b) { 16 // bar(a + b); 17 // bar((a + 2) + b); 18 // } 19 // 20 // An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify 21 // the above code to 22 // 23 // int t = a + b; 24 // bar(t); 25 // bar(t + 2); 26 // 27 // However, the Reassociate pass is unable to do that because it processes each 28 // instruction individually and believes (a + 2) + b is the best form according 29 // to its rank system. 30 // 31 // To address this limitation, NaryReassociate reassociates an expression in a 32 // form that reuses existing instructions. As a result, NaryReassociate can 33 // reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that 34 // (a + b) is computed before. 35 // 36 // NaryReassociate works as follows. For every instruction in the form of (a + 37 // b) + c, it checks whether a + c or b + c is already computed by a dominating 38 // instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b + 39 // c) + a and removes the redundancy accordingly. To efficiently look up whether 40 // an expression is computed before, we store each instruction seen and its SCEV 41 // into an SCEV-to-instruction map. 42 // 43 // Although the algorithm pattern-matches only ternary additions, it 44 // automatically handles many >3-ary expressions by walking through the function 45 // in the depth-first order. For example, given 46 // 47 // (a + c) + d 48 // ((a + b) + c) + d 49 // 50 // NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites 51 // ((a + c) + b) + d into ((a + c) + d) + b. 52 // 53 // Finally, the above dominator-based algorithm may need to be run multiple 54 // iterations before emitting optimal code. One source of this need is that we 55 // only split an operand when it is used only once. The above algorithm can 56 // eliminate an instruction and decrease the usage count of its operands. As a 57 // result, an instruction that previously had multiple uses may become a 58 // single-use instruction and thus eligible for split consideration. For 59 // example, 60 // 61 // ac = a + c 62 // ab = a + b 63 // abc = ab + c 64 // ab2 = ab + b 65 // ab2c = ab2 + c 66 // 67 // In the first iteration, we cannot reassociate abc to ac+b because ab is used 68 // twice. However, we can reassociate ab2c to abc+b in the first iteration. As a 69 // result, ab2 becomes dead and ab will be used only once in the second 70 // iteration. 71 // 72 // Limitations and TODO items: 73 // 74 // 1) We only considers n-ary adds and muls for now. This should be extended 75 // and generalized. 76 // 77 //===----------------------------------------------------------------------===// 78 79 #ifndef LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H 80 #define LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H 81 82 #include "llvm/ADT/DenseMap.h" 83 #include "llvm/ADT/SmallVector.h" 84 #include "llvm/IR/PassManager.h" 85 #include "llvm/IR/ValueHandle.h" 86 87 namespace llvm { 88 89 class AssumptionCache; 90 class BinaryOperator; 91 class DataLayout; 92 class DominatorTree; 93 class Function; 94 class GetElementPtrInst; 95 class Instruction; 96 class ScalarEvolution; 97 class SCEV; 98 class TargetLibraryInfo; 99 class TargetTransformInfo; 100 class Type; 101 class Value; 102 103 class NaryReassociatePass : public PassInfoMixin<NaryReassociatePass> { 104 public: 105 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 106 107 // Glue for old PM. 108 bool runImpl(Function &F, AssumptionCache *AC_, DominatorTree *DT_, 109 ScalarEvolution *SE_, TargetLibraryInfo *TLI_, 110 TargetTransformInfo *TTI_); 111 112 private: 113 // Runs only one iteration of the dominator-based algorithm. See the header 114 // comments for why we need multiple iterations. 115 bool doOneIteration(Function &F); 116 117 // Reassociates I for better CSE. 118 Instruction *tryReassociate(Instruction *I); 119 120 // Reassociate GEP for better CSE. 121 Instruction *tryReassociateGEP(GetElementPtrInst *GEP); 122 123 // Try splitting GEP at the I-th index and see whether either part can be 124 // CSE'ed. This is a helper function for tryReassociateGEP. 125 // 126 // \p IndexedType The element type indexed by GEP's I-th index. This is 127 // equivalent to 128 // GEP->getIndexedType(GEP->getPointerOperand(), 0-th index, 129 // ..., i-th index). 130 GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP, 131 unsigned I, Type *IndexedType); 132 133 // Given GEP's I-th index = LHS + RHS, see whether &Base[..][LHS][..] or 134 // &Base[..][RHS][..] can be CSE'ed and rewrite GEP accordingly. 135 GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP, 136 unsigned I, Value *LHS, 137 Value *RHS, Type *IndexedType); 138 139 // Reassociate binary operators for better CSE. 140 Instruction *tryReassociateBinaryOp(BinaryOperator *I); 141 142 // A helper function for tryReassociateBinaryOp. LHS and RHS are explicitly 143 // passed. 144 Instruction *tryReassociateBinaryOp(Value *LHS, Value *RHS, 145 BinaryOperator *I); 146 // Rewrites I to (LHS op RHS) if LHS is computed already. 147 Instruction *tryReassociatedBinaryOp(const SCEV *LHS, Value *RHS, 148 BinaryOperator *I); 149 150 // Tries to match Op1 and Op2 by using V. 151 bool matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1, Value *&Op2); 152 153 // Gets SCEV for (LHS op RHS). 154 const SCEV *getBinarySCEV(BinaryOperator *I, const SCEV *LHS, 155 const SCEV *RHS); 156 157 // Returns the closest dominator of \c Dominatee that computes 158 // \c CandidateExpr. Returns null if not found. 159 Instruction *findClosestMatchingDominator(const SCEV *CandidateExpr, 160 Instruction *Dominatee); 161 162 // GetElementPtrInst implicitly sign-extends an index if the index is shorter 163 // than the pointer size. This function returns whether Index is shorter than 164 // GEP's pointer size, i.e., whether Index needs to be sign-extended in order 165 // to be an index of GEP. 166 bool requiresSignExtension(Value *Index, GetElementPtrInst *GEP); 167 168 AssumptionCache *AC; 169 const DataLayout *DL; 170 DominatorTree *DT; 171 ScalarEvolution *SE; 172 TargetLibraryInfo *TLI; 173 TargetTransformInfo *TTI; 174 175 // A lookup table quickly telling which instructions compute the given SCEV. 176 // Note that there can be multiple instructions at different locations 177 // computing to the same SCEV, so we map a SCEV to an instruction list. For 178 // example, 179 // 180 // if (p1) 181 // foo(a + b); 182 // if (p2) 183 // bar(a + b); 184 DenseMap<const SCEV *, SmallVector<WeakTrackingVH, 2>> SeenExprs; 185 }; 186 187 } // end namespace llvm 188 189 #endif // LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H 190