Home | History | Annotate | Download | only in Utils
      1 //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains an optimization for div and rem on architectures that
     11 // execute short instructions significantly faster than longer instructions.
     12 // For example, on Intel Atom 32-bit divides are slow enough that during
     13 // runtime it is profitable to check the value of the operands, and if they are
     14 // positive and less than 256 use an unsigned 8-bit divide.
     15 //
     16 //===----------------------------------------------------------------------===//
     17 
     18 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
     19 #include "llvm/ADT/DenseMap.h"
     20 #include "llvm/IR/Function.h"
     21 #include "llvm/IR/IRBuilder.h"
     22 #include "llvm/IR/Instructions.h"
     23 
     24 using namespace llvm;
     25 
     26 #define DEBUG_TYPE "bypass-slow-division"
     27 
     28 namespace {
     29   struct DivOpInfo {
     30     bool SignedOp;
     31     Value *Dividend;
     32     Value *Divisor;
     33 
     34     DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
     35       : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
     36   };
     37 
     38   struct DivPhiNodes {
     39     PHINode *Quotient;
     40     PHINode *Remainder;
     41 
     42     DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
     43       : Quotient(InQuotient), Remainder(InRemainder) {}
     44   };
     45 }
     46 
     47 namespace llvm {
     48   template<>
     49   struct DenseMapInfo<DivOpInfo> {
     50     static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
     51       return Val1.SignedOp == Val2.SignedOp &&
     52              Val1.Dividend == Val2.Dividend &&
     53              Val1.Divisor == Val2.Divisor;
     54     }
     55 
     56     static DivOpInfo getEmptyKey() {
     57       return DivOpInfo(false, nullptr, nullptr);
     58     }
     59 
     60     static DivOpInfo getTombstoneKey() {
     61       return DivOpInfo(true, nullptr, nullptr);
     62     }
     63 
     64     static unsigned getHashValue(const DivOpInfo &Val) {
     65       return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
     66                         reinterpret_cast<uintptr_t>(Val.Divisor)) ^
     67                         (unsigned)Val.SignedOp;
     68     }
     69   };
     70 
     71   typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
     72 }
     73 
     74 // insertFastDiv - Substitutes the div/rem instruction with code that checks the
     75 // value of the operands and uses a shorter-faster div/rem instruction when
     76 // possible and the longer-slower div/rem instruction otherwise.
     77 static bool insertFastDiv(Instruction *I, IntegerType *BypassType,
     78                           bool UseDivOp, bool UseSignedOp,
     79                           DivCacheTy &PerBBDivCache) {
     80   Function *F = I->getParent()->getParent();
     81   // Get instruction operands
     82   Value *Dividend = I->getOperand(0);
     83   Value *Divisor = I->getOperand(1);
     84 
     85   if (isa<ConstantInt>(Divisor) ||
     86       (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
     87     // Operations with immediate values should have
     88     // been solved and replaced during compile time.
     89     return false;
     90   }
     91 
     92   // Basic Block is split before divide
     93   BasicBlock *MainBB = &*I->getParent();
     94   BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I);
     95 
     96   // Add new basic block for slow divide operation
     97   BasicBlock *SlowBB =
     98       BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
     99   SlowBB->moveBefore(SuccessorBB);
    100   IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
    101   Value *SlowQuotientV;
    102   Value *SlowRemainderV;
    103   if (UseSignedOp) {
    104     SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
    105     SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
    106   } else {
    107     SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
    108     SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
    109   }
    110   SlowBuilder.CreateBr(SuccessorBB);
    111 
    112   // Add new basic block for fast divide operation
    113   BasicBlock *FastBB =
    114       BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
    115   FastBB->moveBefore(SlowBB);
    116   IRBuilder<> FastBuilder(FastBB, FastBB->begin());
    117   Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
    118                                                 BypassType);
    119   Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
    120                                                  BypassType);
    121 
    122   // udiv/urem because optimization only handles positive numbers
    123   Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
    124                                                       ShortDivisorV);
    125   Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
    126                                                   ShortDivisorV);
    127   Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
    128                                                 ShortQuotientV,
    129                                                 Dividend->getType());
    130   Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
    131                                                  ShortRemainderV,
    132                                                  Dividend->getType());
    133   FastBuilder.CreateBr(SuccessorBB);
    134 
    135   // Phi nodes for result of div and rem
    136   IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
    137   PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
    138   QuoPhi->addIncoming(SlowQuotientV, SlowBB);
    139   QuoPhi->addIncoming(FastQuotientV, FastBB);
    140   PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
    141   RemPhi->addIncoming(SlowRemainderV, SlowBB);
    142   RemPhi->addIncoming(FastRemainderV, FastBB);
    143 
    144   // Replace I with appropriate phi node
    145   if (UseDivOp)
    146     I->replaceAllUsesWith(QuoPhi);
    147   else
    148     I->replaceAllUsesWith(RemPhi);
    149   I->eraseFromParent();
    150 
    151   // Combine operands into a single value with OR for value testing below
    152   MainBB->getInstList().back().eraseFromParent();
    153   IRBuilder<> MainBuilder(MainBB, MainBB->end());
    154   Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
    155 
    156   // BitMask is inverted to check if the operands are
    157   // larger than the bypass type
    158   uint64_t BitMask = ~BypassType->getBitMask();
    159   Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
    160 
    161   // Compare operand values and branch
    162   Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
    163   Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
    164   MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
    165 
    166   // Cache phi nodes to be used later in place of other instances
    167   // of div or rem with the same sign, dividend, and divisor
    168   DivOpInfo Key(UseSignedOp, Dividend, Divisor);
    169   DivPhiNodes Value(QuoPhi, RemPhi);
    170   PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
    171   return true;
    172 }
    173 
    174 // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder from
    175 // the current BB if operands and operation are identical. Otherwise calls
    176 // insertFastDiv to perform the optimization and caches the resulting dividend
    177 // and remainder.
    178 static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType,
    179                                  bool UseDivOp, bool UseSignedOp,
    180                                  DivCacheTy &PerBBDivCache) {
    181   // Get instruction operands
    182   DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1));
    183   DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
    184 
    185   if (CacheI == PerBBDivCache.end()) {
    186     // If previous instance does not exist, insert fast div
    187     return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache);
    188   }
    189 
    190   // Replace operation value with previously generated phi node
    191   DivPhiNodes &Value = CacheI->second;
    192   if (UseDivOp) {
    193     // Replace all uses of div instruction with quotient phi node
    194     I->replaceAllUsesWith(Value.Quotient);
    195   } else {
    196     // Replace all uses of rem instruction with remainder phi node
    197     I->replaceAllUsesWith(Value.Remainder);
    198   }
    199 
    200   // Remove redundant operation
    201   I->eraseFromParent();
    202   return true;
    203 }
    204 
    205 // bypassSlowDivision - This optimization identifies DIV instructions in a BB
    206 // that can be profitably bypassed and carried out with a shorter, faster
    207 // divide.
    208 bool llvm::bypassSlowDivision(
    209     BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidths) {
    210   DivCacheTy DivCache;
    211 
    212   bool MadeChange = false;
    213   Instruction* Next = &*BB->begin();
    214   while (Next != nullptr) {
    215     // We may add instructions immediately after I, but we want to skip over
    216     // them.
    217     Instruction* I = Next;
    218     Next = Next->getNextNode();
    219 
    220     // Get instruction details
    221     unsigned Opcode = I->getOpcode();
    222     bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
    223     bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
    224     bool UseSignedOp = Opcode == Instruction::SDiv ||
    225                        Opcode == Instruction::SRem;
    226 
    227     // Only optimize div or rem ops
    228     if (!UseDivOp && !UseRemOp)
    229       continue;
    230 
    231     // Skip division on vector types, only optimize integer instructions
    232     if (!I->getType()->isIntegerTy())
    233       continue;
    234 
    235     // Get bitwidth of div/rem instruction
    236     IntegerType *T = cast<IntegerType>(I->getType());
    237     unsigned int bitwidth = T->getBitWidth();
    238 
    239     // Continue if bitwidth is not bypassed
    240     DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
    241     if (BI == BypassWidths.end())
    242       continue;
    243 
    244     // Get type for div/rem instruction with bypass bitwidth
    245     IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
    246 
    247     MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache);
    248   }
    249 
    250   return MadeChange;
    251 }
    252