Home | History | Annotate | Download | only in Scalar
      1 //===-- StraightLineStrengthReduce.cpp - ------------------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements straight-line strength reduction (SLSR). Unlike loop
     11 // strength reduction, this algorithm is designed to reduce arithmetic
     12 // redundancy in straight-line code instead of loops. It has proven to be
     13 // effective in simplifying arithmetic statements derived from an unrolled loop.
     14 // It can also simplify the logic of SeparateConstOffsetFromGEP.
     15 //
     16 // There are many optimizations we can perform in the domain of SLSR. This file
     17 // for now contains only an initial step. Specifically, we look for strength
     18 // reduction candidates in the following forms:
     19 //
     20 // Form 1: B + i * S
     21 // Form 2: (B + i) * S
     22 // Form 3: &B[i * S]
     23 //
     24 // where S is an integer variable, and i is a constant integer. If we found two
     25 // candidates S1 and S2 in the same form and S1 dominates S2, we may rewrite S2
     26 // in a simpler way with respect to S1. For example,
     27 //
     28 // S1: X = B + i * S
     29 // S2: Y = B + i' * S   => X + (i' - i) * S
     30 //
     31 // S1: X = (B + i) * S
     32 // S2: Y = (B + i') * S => X + (i' - i) * S
     33 //
     34 // S1: X = &B[i * S]
     35 // S2: Y = &B[i' * S]   => &X[(i' - i) * S]
     36 //
     37 // Note: (i' - i) * S is folded to the extent possible.
     38 //
     39 // This rewriting is in general a good idea. The code patterns we focus on
     40 // usually come from loop unrolling, so (i' - i) * S is likely the same
     41 // across iterations and can be reused. When that happens, the optimized form
     42 // takes only one add starting from the second iteration.
     43 //
     44 // When such rewriting is possible, we call S1 a "basis" of S2. When S2 has
     45 // multiple bases, we choose to rewrite S2 with respect to its "immediate"
     46 // basis, the basis that is the closest ancestor in the dominator tree.
     47 //
     48 // TODO:
     49 //
     50 // - Floating point arithmetics when fast math is enabled.
     51 //
     52 // - SLSR may decrease ILP at the architecture level. Targets that are very
     53 //   sensitive to ILP may want to disable it. Having SLSR to consider ILP is
     54 //   left as future work.
     55 //
     56 // - When (i' - i) is constant but i and i' are not, we could still perform
     57 //   SLSR.
     58 #include <vector>
     59 
     60 #include "llvm/ADT/DenseSet.h"
     61 #include "llvm/ADT/FoldingSet.h"
     62 #include "llvm/Analysis/ScalarEvolution.h"
     63 #include "llvm/Analysis/TargetTransformInfo.h"
     64 #include "llvm/Analysis/ValueTracking.h"
     65 #include "llvm/IR/DataLayout.h"
     66 #include "llvm/IR/Dominators.h"
     67 #include "llvm/IR/IRBuilder.h"
     68 #include "llvm/IR/Module.h"
     69 #include "llvm/IR/PatternMatch.h"
     70 #include "llvm/Support/raw_ostream.h"
     71 #include "llvm/Transforms/Scalar.h"
     72 #include "llvm/Transforms/Utils/Local.h"
     73 
     74 using namespace llvm;
     75 using namespace PatternMatch;
     76 
     77 namespace {
     78 
     79 class StraightLineStrengthReduce : public FunctionPass {
     80 public:
     81   // SLSR candidate. Such a candidate must be in one of the forms described in
     82   // the header comments.
     83   struct Candidate : public ilist_node<Candidate> {
     84     enum Kind {
     85       Invalid, // reserved for the default constructor
     86       Add,     // B + i * S
     87       Mul,     // (B + i) * S
     88       GEP,     // &B[..][i * S][..]
     89     };
     90 
     91     Candidate()
     92         : CandidateKind(Invalid), Base(nullptr), Index(nullptr),
     93           Stride(nullptr), Ins(nullptr), Basis(nullptr) {}
     94     Candidate(Kind CT, const SCEV *B, ConstantInt *Idx, Value *S,
     95               Instruction *I)
     96         : CandidateKind(CT), Base(B), Index(Idx), Stride(S), Ins(I),
     97           Basis(nullptr) {}
     98     Kind CandidateKind;
     99     const SCEV *Base;
    100     // Note that Index and Stride of a GEP candidate do not necessarily have the
    101     // same integer type. In that case, during rewriting, Stride will be
    102     // sign-extended or truncated to Index's type.
    103     ConstantInt *Index;
    104     Value *Stride;
    105     // The instruction this candidate corresponds to. It helps us to rewrite a
    106     // candidate with respect to its immediate basis. Note that one instruction
    107     // can correspond to multiple candidates depending on how you associate the
    108     // expression. For instance,
    109     //
    110     // (a + 1) * (b + 2)
    111     //
    112     // can be treated as
    113     //
    114     // <Base: a, Index: 1, Stride: b + 2>
    115     //
    116     // or
    117     //
    118     // <Base: b, Index: 2, Stride: a + 1>
    119     Instruction *Ins;
    120     // Points to the immediate basis of this candidate, or nullptr if we cannot
    121     // find any basis for this candidate.
    122     Candidate *Basis;
    123   };
    124 
    125   static char ID;
    126 
    127   StraightLineStrengthReduce()
    128       : FunctionPass(ID), DL(nullptr), DT(nullptr), TTI(nullptr) {
    129     initializeStraightLineStrengthReducePass(*PassRegistry::getPassRegistry());
    130   }
    131 
    132   void getAnalysisUsage(AnalysisUsage &AU) const override {
    133     AU.addRequired<DominatorTreeWrapperPass>();
    134     AU.addRequired<ScalarEvolutionWrapperPass>();
    135     AU.addRequired<TargetTransformInfoWrapperPass>();
    136     // We do not modify the shape of the CFG.
    137     AU.setPreservesCFG();
    138   }
    139 
    140   bool doInitialization(Module &M) override {
    141     DL = &M.getDataLayout();
    142     return false;
    143   }
    144 
    145   bool runOnFunction(Function &F) override;
    146 
    147 private:
    148   // Returns true if Basis is a basis for C, i.e., Basis dominates C and they
    149   // share the same base and stride.
    150   bool isBasisFor(const Candidate &Basis, const Candidate &C);
    151   // Returns whether the candidate can be folded into an addressing mode.
    152   bool isFoldable(const Candidate &C, TargetTransformInfo *TTI,
    153                   const DataLayout *DL);
    154   // Returns true if C is already in a simplest form and not worth being
    155   // rewritten.
    156   bool isSimplestForm(const Candidate &C);
    157   // Checks whether I is in a candidate form. If so, adds all the matching forms
    158   // to Candidates, and tries to find the immediate basis for each of them.
    159   void allocateCandidatesAndFindBasis(Instruction *I);
    160   // Allocate candidates and find bases for Add instructions.
    161   void allocateCandidatesAndFindBasisForAdd(Instruction *I);
    162   // Given I = LHS + RHS, factors RHS into i * S and makes (LHS + i * S) a
    163   // candidate.
    164   void allocateCandidatesAndFindBasisForAdd(Value *LHS, Value *RHS,
    165                                             Instruction *I);
    166   // Allocate candidates and find bases for Mul instructions.
    167   void allocateCandidatesAndFindBasisForMul(Instruction *I);
    168   // Splits LHS into Base + Index and, if succeeds, calls
    169   // allocateCandidatesAndFindBasis.
    170   void allocateCandidatesAndFindBasisForMul(Value *LHS, Value *RHS,
    171                                             Instruction *I);
    172   // Allocate candidates and find bases for GetElementPtr instructions.
    173   void allocateCandidatesAndFindBasisForGEP(GetElementPtrInst *GEP);
    174   // A helper function that scales Idx with ElementSize before invoking
    175   // allocateCandidatesAndFindBasis.
    176   void allocateCandidatesAndFindBasisForGEP(const SCEV *B, ConstantInt *Idx,
    177                                             Value *S, uint64_t ElementSize,
    178                                             Instruction *I);
    179   // Adds the given form <CT, B, Idx, S> to Candidates, and finds its immediate
    180   // basis.
    181   void allocateCandidatesAndFindBasis(Candidate::Kind CT, const SCEV *B,
    182                                       ConstantInt *Idx, Value *S,
    183                                       Instruction *I);
    184   // Rewrites candidate C with respect to Basis.
    185   void rewriteCandidateWithBasis(const Candidate &C, const Candidate &Basis);
    186   // A helper function that factors ArrayIdx to a product of a stride and a
    187   // constant index, and invokes allocateCandidatesAndFindBasis with the
    188   // factorings.
    189   void factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize,
    190                         GetElementPtrInst *GEP);
    191   // Emit code that computes the "bump" from Basis to C. If the candidate is a
    192   // GEP and the bump is not divisible by the element size of the GEP, this
    193   // function sets the BumpWithUglyGEP flag to notify its caller to bump the
    194   // basis using an ugly GEP.
    195   static Value *emitBump(const Candidate &Basis, const Candidate &C,
    196                          IRBuilder<> &Builder, const DataLayout *DL,
    197                          bool &BumpWithUglyGEP);
    198 
    199   const DataLayout *DL;
    200   DominatorTree *DT;
    201   ScalarEvolution *SE;
    202   TargetTransformInfo *TTI;
    203   ilist<Candidate> Candidates;
    204   // Temporarily holds all instructions that are unlinked (but not deleted) by
    205   // rewriteCandidateWithBasis. These instructions will be actually removed
    206   // after all rewriting finishes.
    207   std::vector<Instruction *> UnlinkedInstructions;
    208 };
    209 }  // anonymous namespace
    210 
    211 char StraightLineStrengthReduce::ID = 0;
    212 INITIALIZE_PASS_BEGIN(StraightLineStrengthReduce, "slsr",
    213                       "Straight line strength reduction", false, false)
    214 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
    215 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
    216 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
    217 INITIALIZE_PASS_END(StraightLineStrengthReduce, "slsr",
    218                     "Straight line strength reduction", false, false)
    219 
    220 FunctionPass *llvm::createStraightLineStrengthReducePass() {
    221   return new StraightLineStrengthReduce();
    222 }
    223 
    224 bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,
    225                                             const Candidate &C) {
    226   return (Basis.Ins != C.Ins && // skip the same instruction
    227           // They must have the same type too. Basis.Base == C.Base doesn't
    228           // guarantee their types are the same (PR23975).
    229           Basis.Ins->getType() == C.Ins->getType() &&
    230           // Basis must dominate C in order to rewrite C with respect to Basis.
    231           DT->dominates(Basis.Ins->getParent(), C.Ins->getParent()) &&
    232           // They share the same base, stride, and candidate kind.
    233           Basis.Base == C.Base && Basis.Stride == C.Stride &&
    234           Basis.CandidateKind == C.CandidateKind);
    235 }
    236 
    237 // TODO: use TTI->getGEPCost.
    238 static bool isGEPFoldable(GetElementPtrInst *GEP,
    239                           const TargetTransformInfo *TTI,
    240                           const DataLayout *DL) {
    241   GlobalVariable *BaseGV = nullptr;
    242   int64_t BaseOffset = 0;
    243   bool HasBaseReg = false;
    244   int64_t Scale = 0;
    245 
    246   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getPointerOperand()))
    247     BaseGV = GV;
    248   else
    249     HasBaseReg = true;
    250 
    251   gep_type_iterator GTI = gep_type_begin(GEP);
    252   for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I, ++GTI) {
    253     if (isa<SequentialType>(*GTI)) {
    254       int64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
    255       if (ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I)) {
    256         BaseOffset += ConstIdx->getSExtValue() * ElementSize;
    257       } else {
    258         // Needs scale register.
    259         if (Scale != 0) {
    260           // No addressing mode takes two scale registers.
    261           return false;
    262         }
    263         Scale = ElementSize;
    264       }
    265     } else {
    266       StructType *STy = cast<StructType>(*GTI);
    267       uint64_t Field = cast<ConstantInt>(*I)->getZExtValue();
    268       BaseOffset += DL->getStructLayout(STy)->getElementOffset(Field);
    269     }
    270   }
    271 
    272   unsigned AddrSpace = GEP->getPointerAddressSpace();
    273   return TTI->isLegalAddressingMode(GEP->getType()->getElementType(), BaseGV,
    274                                     BaseOffset, HasBaseReg, Scale, AddrSpace);
    275 }
    276 
    277 // Returns whether (Base + Index * Stride) can be folded to an addressing mode.
    278 static bool isAddFoldable(const SCEV *Base, ConstantInt *Index, Value *Stride,
    279                           TargetTransformInfo *TTI) {
    280   return TTI->isLegalAddressingMode(Base->getType(), nullptr, 0, true,
    281                                     Index->getSExtValue());
    282 }
    283 
    284 bool StraightLineStrengthReduce::isFoldable(const Candidate &C,
    285                                             TargetTransformInfo *TTI,
    286                                             const DataLayout *DL) {
    287   if (C.CandidateKind == Candidate::Add)
    288     return isAddFoldable(C.Base, C.Index, C.Stride, TTI);
    289   if (C.CandidateKind == Candidate::GEP)
    290     return isGEPFoldable(cast<GetElementPtrInst>(C.Ins), TTI, DL);
    291   return false;
    292 }
    293 
    294 // Returns true if GEP has zero or one non-zero index.
    295 static bool hasOnlyOneNonZeroIndex(GetElementPtrInst *GEP) {
    296   unsigned NumNonZeroIndices = 0;
    297   for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I) {
    298     ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
    299     if (ConstIdx == nullptr || !ConstIdx->isZero())
    300       ++NumNonZeroIndices;
    301   }
    302   return NumNonZeroIndices <= 1;
    303 }
    304 
    305 bool StraightLineStrengthReduce::isSimplestForm(const Candidate &C) {
    306   if (C.CandidateKind == Candidate::Add) {
    307     // B + 1 * S or B + (-1) * S
    308     return C.Index->isOne() || C.Index->isMinusOne();
    309   }
    310   if (C.CandidateKind == Candidate::Mul) {
    311     // (B + 0) * S
    312     return C.Index->isZero();
    313   }
    314   if (C.CandidateKind == Candidate::GEP) {
    315     // (char*)B + S or (char*)B - S
    316     return ((C.Index->isOne() || C.Index->isMinusOne()) &&
    317             hasOnlyOneNonZeroIndex(cast<GetElementPtrInst>(C.Ins)));
    318   }
    319   return false;
    320 }
    321 
    322 // TODO: We currently implement an algorithm whose time complexity is linear in
    323 // the number of existing candidates. However, we could do better by using
    324 // ScopedHashTable. Specifically, while traversing the dominator tree, we could
    325 // maintain all the candidates that dominate the basic block being traversed in
    326 // a ScopedHashTable. This hash table is indexed by the base and the stride of
    327 // a candidate. Therefore, finding the immediate basis of a candidate boils down
    328 // to one hash-table look up.
    329 void StraightLineStrengthReduce::allocateCandidatesAndFindBasis(
    330     Candidate::Kind CT, const SCEV *B, ConstantInt *Idx, Value *S,
    331     Instruction *I) {
    332   Candidate C(CT, B, Idx, S, I);
    333   // SLSR can complicate an instruction in two cases:
    334   //
    335   // 1. If we can fold I into an addressing mode, computing I is likely free or
    336   // takes only one instruction.
    337   //
    338   // 2. I is already in a simplest form. For example, when
    339   //      X = B + 8 * S
    340   //      Y = B + S,
    341   //    rewriting Y to X - 7 * S is probably a bad idea.
    342   //
    343   // In the above cases, we still add I to the candidate list so that I can be
    344   // the basis of other candidates, but we leave I's basis blank so that I
    345   // won't be rewritten.
    346   if (!isFoldable(C, TTI, DL) && !isSimplestForm(C)) {
    347     // Try to compute the immediate basis of C.
    348     unsigned NumIterations = 0;
    349     // Limit the scan radius to avoid running in quadratice time.
    350     static const unsigned MaxNumIterations = 50;
    351     for (auto Basis = Candidates.rbegin();
    352          Basis != Candidates.rend() && NumIterations < MaxNumIterations;
    353          ++Basis, ++NumIterations) {
    354       if (isBasisFor(*Basis, C)) {
    355         C.Basis = &(*Basis);
    356         break;
    357       }
    358     }
    359   }
    360   // Regardless of whether we find a basis for C, we need to push C to the
    361   // candidate list so that it can be the basis of other candidates.
    362   Candidates.push_back(C);
    363 }
    364 
    365 void StraightLineStrengthReduce::allocateCandidatesAndFindBasis(
    366     Instruction *I) {
    367   switch (I->getOpcode()) {
    368   case Instruction::Add:
    369     allocateCandidatesAndFindBasisForAdd(I);
    370     break;
    371   case Instruction::Mul:
    372     allocateCandidatesAndFindBasisForMul(I);
    373     break;
    374   case Instruction::GetElementPtr:
    375     allocateCandidatesAndFindBasisForGEP(cast<GetElementPtrInst>(I));
    376     break;
    377   }
    378 }
    379 
    380 void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd(
    381     Instruction *I) {
    382   // Try matching B + i * S.
    383   if (!isa<IntegerType>(I->getType()))
    384     return;
    385 
    386   assert(I->getNumOperands() == 2 && "isn't I an add?");
    387   Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
    388   allocateCandidatesAndFindBasisForAdd(LHS, RHS, I);
    389   if (LHS != RHS)
    390     allocateCandidatesAndFindBasisForAdd(RHS, LHS, I);
    391 }
    392 
    393 void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd(
    394     Value *LHS, Value *RHS, Instruction *I) {
    395   Value *S = nullptr;
    396   ConstantInt *Idx = nullptr;
    397   if (match(RHS, m_Mul(m_Value(S), m_ConstantInt(Idx)))) {
    398     // I = LHS + RHS = LHS + Idx * S
    399     allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I);
    400   } else if (match(RHS, m_Shl(m_Value(S), m_ConstantInt(Idx)))) {
    401     // I = LHS + RHS = LHS + (S << Idx) = LHS + S * (1 << Idx)
    402     APInt One(Idx->getBitWidth(), 1);
    403     Idx = ConstantInt::get(Idx->getContext(), One << Idx->getValue());
    404     allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I);
    405   } else {
    406     // At least, I = LHS + 1 * RHS
    407     ConstantInt *One = ConstantInt::get(cast<IntegerType>(I->getType()), 1);
    408     allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), One, RHS,
    409                                    I);
    410   }
    411 }
    412 
    413 // Returns true if A matches B + C where C is constant.
    414 static bool matchesAdd(Value *A, Value *&B, ConstantInt *&C) {
    415   return (match(A, m_Add(m_Value(B), m_ConstantInt(C))) ||
    416           match(A, m_Add(m_ConstantInt(C), m_Value(B))));
    417 }
    418 
    419 // Returns true if A matches B | C where C is constant.
    420 static bool matchesOr(Value *A, Value *&B, ConstantInt *&C) {
    421   return (match(A, m_Or(m_Value(B), m_ConstantInt(C))) ||
    422           match(A, m_Or(m_ConstantInt(C), m_Value(B))));
    423 }
    424 
    425 void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul(
    426     Value *LHS, Value *RHS, Instruction *I) {
    427   Value *B = nullptr;
    428   ConstantInt *Idx = nullptr;
    429   if (matchesAdd(LHS, B, Idx)) {
    430     // If LHS is in the form of "Base + Index", then I is in the form of
    431     // "(Base + Index) * RHS".
    432     allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I);
    433   } else if (matchesOr(LHS, B, Idx) && haveNoCommonBitsSet(B, Idx, *DL)) {
    434     // If LHS is in the form of "Base | Index" and Base and Index have no common
    435     // bits set, then
    436     //   Base | Index = Base + Index
    437     // and I is thus in the form of "(Base + Index) * RHS".
    438     allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I);
    439   } else {
    440     // Otherwise, at least try the form (LHS + 0) * RHS.
    441     ConstantInt *Zero = ConstantInt::get(cast<IntegerType>(I->getType()), 0);
    442     allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(LHS), Zero, RHS,
    443                                    I);
    444   }
    445 }
    446 
    447 void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul(
    448     Instruction *I) {
    449   // Try matching (B + i) * S.
    450   // TODO: we could extend SLSR to float and vector types.
    451   if (!isa<IntegerType>(I->getType()))
    452     return;
    453 
    454   assert(I->getNumOperands() == 2 && "isn't I a mul?");
    455   Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
    456   allocateCandidatesAndFindBasisForMul(LHS, RHS, I);
    457   if (LHS != RHS) {
    458     // Symmetrically, try to split RHS to Base + Index.
    459     allocateCandidatesAndFindBasisForMul(RHS, LHS, I);
    460   }
    461 }
    462 
    463 void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
    464     const SCEV *B, ConstantInt *Idx, Value *S, uint64_t ElementSize,
    465     Instruction *I) {
    466   // I = B + sext(Idx *nsw S) * ElementSize
    467   //   = B + (sext(Idx) * sext(S)) * ElementSize
    468   //   = B + (sext(Idx) * ElementSize) * sext(S)
    469   // Casting to IntegerType is safe because we skipped vector GEPs.
    470   IntegerType *IntPtrTy = cast<IntegerType>(DL->getIntPtrType(I->getType()));
    471   ConstantInt *ScaledIdx = ConstantInt::get(
    472       IntPtrTy, Idx->getSExtValue() * (int64_t)ElementSize, true);
    473   allocateCandidatesAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I);
    474 }
    475 
    476 void StraightLineStrengthReduce::factorArrayIndex(Value *ArrayIdx,
    477                                                   const SCEV *Base,
    478                                                   uint64_t ElementSize,
    479                                                   GetElementPtrInst *GEP) {
    480   // At least, ArrayIdx = ArrayIdx *nsw 1.
    481   allocateCandidatesAndFindBasisForGEP(
    482       Base, ConstantInt::get(cast<IntegerType>(ArrayIdx->getType()), 1),
    483       ArrayIdx, ElementSize, GEP);
    484   Value *LHS = nullptr;
    485   ConstantInt *RHS = nullptr;
    486   // One alternative is matching the SCEV of ArrayIdx instead of ArrayIdx
    487   // itself. This would allow us to handle the shl case for free. However,
    488   // matching SCEVs has two issues:
    489   //
    490   // 1. this would complicate rewriting because the rewriting procedure
    491   // would have to translate SCEVs back to IR instructions. This translation
    492   // is difficult when LHS is further evaluated to a composite SCEV.
    493   //
    494   // 2. ScalarEvolution is designed to be control-flow oblivious. It tends
    495   // to strip nsw/nuw flags which are critical for SLSR to trace into
    496   // sext'ed multiplication.
    497   if (match(ArrayIdx, m_NSWMul(m_Value(LHS), m_ConstantInt(RHS)))) {
    498     // SLSR is currently unsafe if i * S may overflow.
    499     // GEP = Base + sext(LHS *nsw RHS) * ElementSize
    500     allocateCandidatesAndFindBasisForGEP(Base, RHS, LHS, ElementSize, GEP);
    501   } else if (match(ArrayIdx, m_NSWShl(m_Value(LHS), m_ConstantInt(RHS)))) {
    502     // GEP = Base + sext(LHS <<nsw RHS) * ElementSize
    503     //     = Base + sext(LHS *nsw (1 << RHS)) * ElementSize
    504     APInt One(RHS->getBitWidth(), 1);
    505     ConstantInt *PowerOf2 =
    506         ConstantInt::get(RHS->getContext(), One << RHS->getValue());
    507     allocateCandidatesAndFindBasisForGEP(Base, PowerOf2, LHS, ElementSize, GEP);
    508   }
    509 }
    510 
    511 void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
    512     GetElementPtrInst *GEP) {
    513   // TODO: handle vector GEPs
    514   if (GEP->getType()->isVectorTy())
    515     return;
    516 
    517   SmallVector<const SCEV *, 4> IndexExprs;
    518   for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I)
    519     IndexExprs.push_back(SE->getSCEV(*I));
    520 
    521   gep_type_iterator GTI = gep_type_begin(GEP);
    522   for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I) {
    523     if (!isa<SequentialType>(*GTI++))
    524       continue;
    525 
    526     const SCEV *OrigIndexExpr = IndexExprs[I - 1];
    527     IndexExprs[I - 1] = SE->getZero(OrigIndexExpr->getType());
    528 
    529     // The base of this candidate is GEP's base plus the offsets of all
    530     // indices except this current one.
    531     const SCEV *BaseExpr = SE->getGEPExpr(GEP->getSourceElementType(),
    532                                           SE->getSCEV(GEP->getPointerOperand()),
    533                                           IndexExprs, GEP->isInBounds());
    534     Value *ArrayIdx = GEP->getOperand(I);
    535     uint64_t ElementSize = DL->getTypeAllocSize(*GTI);
    536     factorArrayIndex(ArrayIdx, BaseExpr, ElementSize, GEP);
    537     // When ArrayIdx is the sext of a value, we try to factor that value as
    538     // well.  Handling this case is important because array indices are
    539     // typically sign-extended to the pointer size.
    540     Value *TruncatedArrayIdx = nullptr;
    541     if (match(ArrayIdx, m_SExt(m_Value(TruncatedArrayIdx))))
    542       factorArrayIndex(TruncatedArrayIdx, BaseExpr, ElementSize, GEP);
    543 
    544     IndexExprs[I - 1] = OrigIndexExpr;
    545   }
    546 }
    547 
    548 // A helper function that unifies the bitwidth of A and B.
    549 static void unifyBitWidth(APInt &A, APInt &B) {
    550   if (A.getBitWidth() < B.getBitWidth())
    551     A = A.sext(B.getBitWidth());
    552   else if (A.getBitWidth() > B.getBitWidth())
    553     B = B.sext(A.getBitWidth());
    554 }
    555 
    556 Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
    557                                             const Candidate &C,
    558                                             IRBuilder<> &Builder,
    559                                             const DataLayout *DL,
    560                                             bool &BumpWithUglyGEP) {
    561   APInt Idx = C.Index->getValue(), BasisIdx = Basis.Index->getValue();
    562   unifyBitWidth(Idx, BasisIdx);
    563   APInt IndexOffset = Idx - BasisIdx;
    564 
    565   BumpWithUglyGEP = false;
    566   if (Basis.CandidateKind == Candidate::GEP) {
    567     APInt ElementSize(
    568         IndexOffset.getBitWidth(),
    569         DL->getTypeAllocSize(
    570             cast<GetElementPtrInst>(Basis.Ins)->getType()->getElementType()));
    571     APInt Q, R;
    572     APInt::sdivrem(IndexOffset, ElementSize, Q, R);
    573     if (R.getSExtValue() == 0)
    574       IndexOffset = Q;
    575     else
    576       BumpWithUglyGEP = true;
    577   }
    578 
    579   // Compute Bump = C - Basis = (i' - i) * S.
    580   // Common case 1: if (i' - i) is 1, Bump = S.
    581   if (IndexOffset.getSExtValue() == 1)
    582     return C.Stride;
    583   // Common case 2: if (i' - i) is -1, Bump = -S.
    584   if (IndexOffset.getSExtValue() == -1)
    585     return Builder.CreateNeg(C.Stride);
    586 
    587   // Otherwise, Bump = (i' - i) * sext/trunc(S). Note that (i' - i) and S may
    588   // have different bit widths.
    589   IntegerType *DeltaType =
    590       IntegerType::get(Basis.Ins->getContext(), IndexOffset.getBitWidth());
    591   Value *ExtendedStride = Builder.CreateSExtOrTrunc(C.Stride, DeltaType);
    592   if (IndexOffset.isPowerOf2()) {
    593     // If (i' - i) is a power of 2, Bump = sext/trunc(S) << log(i' - i).
    594     ConstantInt *Exponent = ConstantInt::get(DeltaType, IndexOffset.logBase2());
    595     return Builder.CreateShl(ExtendedStride, Exponent);
    596   }
    597   if ((-IndexOffset).isPowerOf2()) {
    598     // If (i - i') is a power of 2, Bump = -sext/trunc(S) << log(i' - i).
    599     ConstantInt *Exponent =
    600         ConstantInt::get(DeltaType, (-IndexOffset).logBase2());
    601     return Builder.CreateNeg(Builder.CreateShl(ExtendedStride, Exponent));
    602   }
    603   Constant *Delta = ConstantInt::get(DeltaType, IndexOffset);
    604   return Builder.CreateMul(ExtendedStride, Delta);
    605 }
    606 
    607 void StraightLineStrengthReduce::rewriteCandidateWithBasis(
    608     const Candidate &C, const Candidate &Basis) {
    609   assert(C.CandidateKind == Basis.CandidateKind && C.Base == Basis.Base &&
    610          C.Stride == Basis.Stride);
    611   // We run rewriteCandidateWithBasis on all candidates in a post-order, so the
    612   // basis of a candidate cannot be unlinked before the candidate.
    613   assert(Basis.Ins->getParent() != nullptr && "the basis is unlinked");
    614 
    615   // An instruction can correspond to multiple candidates. Therefore, instead of
    616   // simply deleting an instruction when we rewrite it, we mark its parent as
    617   // nullptr (i.e. unlink it) so that we can skip the candidates whose
    618   // instruction is already rewritten.
    619   if (!C.Ins->getParent())
    620     return;
    621 
    622   IRBuilder<> Builder(C.Ins);
    623   bool BumpWithUglyGEP;
    624   Value *Bump = emitBump(Basis, C, Builder, DL, BumpWithUglyGEP);
    625   Value *Reduced = nullptr; // equivalent to but weaker than C.Ins
    626   switch (C.CandidateKind) {
    627   case Candidate::Add:
    628   case Candidate::Mul:
    629     // C = Basis + Bump
    630     if (BinaryOperator::isNeg(Bump)) {
    631       // If Bump is a neg instruction, emit C = Basis - (-Bump).
    632       Reduced =
    633           Builder.CreateSub(Basis.Ins, BinaryOperator::getNegArgument(Bump));
    634       // We only use the negative argument of Bump, and Bump itself may be
    635       // trivially dead.
    636       RecursivelyDeleteTriviallyDeadInstructions(Bump);
    637     } else {
    638       // It's tempting to preserve nsw on Bump and/or Reduced. However, it's
    639       // usually unsound, e.g.,
    640       //
    641       // X = (-2 +nsw 1) *nsw INT_MAX
    642       // Y = (-2 +nsw 3) *nsw INT_MAX
    643       //   =>
    644       // Y = X + 2 * INT_MAX
    645       //
    646       // Neither + and * in the resultant expression are nsw.
    647       Reduced = Builder.CreateAdd(Basis.Ins, Bump);
    648     }
    649     break;
    650   case Candidate::GEP:
    651     {
    652       Type *IntPtrTy = DL->getIntPtrType(C.Ins->getType());
    653       bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds();
    654       if (BumpWithUglyGEP) {
    655         // C = (char *)Basis + Bump
    656         unsigned AS = Basis.Ins->getType()->getPointerAddressSpace();
    657         Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS);
    658         Reduced = Builder.CreateBitCast(Basis.Ins, CharTy);
    659         if (InBounds)
    660           Reduced =
    661               Builder.CreateInBoundsGEP(Builder.getInt8Ty(), Reduced, Bump);
    662         else
    663           Reduced = Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump);
    664         Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType());
    665       } else {
    666         // C = gep Basis, Bump
    667         // Canonicalize bump to pointer size.
    668         Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy);
    669         if (InBounds)
    670           Reduced = Builder.CreateInBoundsGEP(nullptr, Basis.Ins, Bump);
    671         else
    672           Reduced = Builder.CreateGEP(nullptr, Basis.Ins, Bump);
    673       }
    674     }
    675     break;
    676   default:
    677     llvm_unreachable("C.CandidateKind is invalid");
    678   };
    679   Reduced->takeName(C.Ins);
    680   C.Ins->replaceAllUsesWith(Reduced);
    681   // Unlink C.Ins so that we can skip other candidates also corresponding to
    682   // C.Ins. The actual deletion is postponed to the end of runOnFunction.
    683   C.Ins->removeFromParent();
    684   UnlinkedInstructions.push_back(C.Ins);
    685 }
    686 
    687 bool StraightLineStrengthReduce::runOnFunction(Function &F) {
    688   if (skipOptnoneFunction(F))
    689     return false;
    690 
    691   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
    692   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
    693   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
    694   // Traverse the dominator tree in the depth-first order. This order makes sure
    695   // all bases of a candidate are in Candidates when we process it.
    696   for (auto node = GraphTraits<DominatorTree *>::nodes_begin(DT);
    697        node != GraphTraits<DominatorTree *>::nodes_end(DT); ++node) {
    698     for (auto &I : *node->getBlock())
    699       allocateCandidatesAndFindBasis(&I);
    700   }
    701 
    702   // Rewrite candidates in the reverse depth-first order. This order makes sure
    703   // a candidate being rewritten is not a basis for any other candidate.
    704   while (!Candidates.empty()) {
    705     const Candidate &C = Candidates.back();
    706     if (C.Basis != nullptr) {
    707       rewriteCandidateWithBasis(C, *C.Basis);
    708     }
    709     Candidates.pop_back();
    710   }
    711 
    712   // Delete all unlink instructions.
    713   for (auto *UnlinkedInst : UnlinkedInstructions) {
    714     for (unsigned I = 0, E = UnlinkedInst->getNumOperands(); I != E; ++I) {
    715       Value *Op = UnlinkedInst->getOperand(I);
    716       UnlinkedInst->setOperand(I, nullptr);
    717       RecursivelyDeleteTriviallyDeadInstructions(Op);
    718     }
    719     delete UnlinkedInst;
    720   }
    721   bool Ret = !UnlinkedInstructions.empty();
    722   UnlinkedInstructions.clear();
    723   return Ret;
    724 }
    725