Home | History | Annotate | Download | only in Utils
      1 //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains an optimization for div and rem on architectures that
     11 // execute short instructions significantly faster than longer instructions.
     12 // For example, on Intel Atom 32-bit divides are slow enough that during
     13 // runtime it is profitable to check the value of the operands, and if they are
     14 // positive and less than 256 use an unsigned 8-bit divide.
     15 //
     16 //===----------------------------------------------------------------------===//
     17 
     18 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
     19 #include "llvm/ADT/DenseMap.h"
     20 #include "llvm/IR/Function.h"
     21 #include "llvm/IR/IRBuilder.h"
     22 #include "llvm/IR/Instructions.h"
     23 
     24 using namespace llvm;
     25 
     26 #define DEBUG_TYPE "bypass-slow-division"
     27 
     28 namespace {
     29   struct DivOpInfo {
     30     bool SignedOp;
     31     Value *Dividend;
     32     Value *Divisor;
     33 
     34     DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
     35       : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
     36   };
     37 
     38   struct DivPhiNodes {
     39     PHINode *Quotient;
     40     PHINode *Remainder;
     41 
     42     DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
     43       : Quotient(InQuotient), Remainder(InRemainder) {}
     44   };
     45 }
     46 
     47 namespace llvm {
     48   template<>
     49   struct DenseMapInfo<DivOpInfo> {
     50     static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
     51       return Val1.SignedOp == Val2.SignedOp &&
     52              Val1.Dividend == Val2.Dividend &&
     53              Val1.Divisor == Val2.Divisor;
     54     }
     55 
     56     static DivOpInfo getEmptyKey() {
     57       return DivOpInfo(false, nullptr, nullptr);
     58     }
     59 
     60     static DivOpInfo getTombstoneKey() {
     61       return DivOpInfo(true, nullptr, nullptr);
     62     }
     63 
     64     static unsigned getHashValue(const DivOpInfo &Val) {
     65       return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
     66                         reinterpret_cast<uintptr_t>(Val.Divisor)) ^
     67                         (unsigned)Val.SignedOp;
     68     }
     69   };
     70 
     71   typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
     72 }
     73 
     74 // insertFastDiv - Substitutes the div/rem instruction with code that checks the
     75 // value of the operands and uses a shorter-faster div/rem instruction when
     76 // possible and the longer-slower div/rem instruction otherwise.
     77 static bool insertFastDiv(Function &F,
     78                           Function::iterator &I,
     79                           BasicBlock::iterator &J,
     80                           IntegerType *BypassType,
     81                           bool UseDivOp,
     82                           bool UseSignedOp,
     83                           DivCacheTy &PerBBDivCache) {
     84   // Get instruction operands
     85   Instruction *Instr = &*J;
     86   Value *Dividend = Instr->getOperand(0);
     87   Value *Divisor = Instr->getOperand(1);
     88 
     89   if (isa<ConstantInt>(Divisor) ||
     90       (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
     91     // Operations with immediate values should have
     92     // been solved and replaced during compile time.
     93     return false;
     94   }
     95 
     96   // Basic Block is split before divide
     97   BasicBlock *MainBB = &*I;
     98   BasicBlock *SuccessorBB = I->splitBasicBlock(J);
     99   ++I; //advance iterator I to successorBB
    100 
    101   // Add new basic block for slow divide operation
    102   BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "",
    103                                           MainBB->getParent(), SuccessorBB);
    104   SlowBB->moveBefore(SuccessorBB);
    105   IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
    106   Value *SlowQuotientV;
    107   Value *SlowRemainderV;
    108   if (UseSignedOp) {
    109     SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
    110     SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
    111   } else {
    112     SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
    113     SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
    114   }
    115   SlowBuilder.CreateBr(SuccessorBB);
    116 
    117   // Add new basic block for fast divide operation
    118   BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "",
    119                                           MainBB->getParent(), SuccessorBB);
    120   FastBB->moveBefore(SlowBB);
    121   IRBuilder<> FastBuilder(FastBB, FastBB->begin());
    122   Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
    123                                                 BypassType);
    124   Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
    125                                                  BypassType);
    126 
    127   // udiv/urem because optimization only handles positive numbers
    128   Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
    129                                                       ShortDivisorV);
    130   Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
    131                                                   ShortDivisorV);
    132   Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
    133                                                 ShortQuotientV,
    134                                                 Dividend->getType());
    135   Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
    136                                                  ShortRemainderV,
    137                                                  Dividend->getType());
    138   FastBuilder.CreateBr(SuccessorBB);
    139 
    140   // Phi nodes for result of div and rem
    141   IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
    142   PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
    143   QuoPhi->addIncoming(SlowQuotientV, SlowBB);
    144   QuoPhi->addIncoming(FastQuotientV, FastBB);
    145   PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
    146   RemPhi->addIncoming(SlowRemainderV, SlowBB);
    147   RemPhi->addIncoming(FastRemainderV, FastBB);
    148 
    149   // Replace Instr with appropriate phi node
    150   if (UseDivOp)
    151     Instr->replaceAllUsesWith(QuoPhi);
    152   else
    153     Instr->replaceAllUsesWith(RemPhi);
    154   Instr->eraseFromParent();
    155 
    156   // Combine operands into a single value with OR for value testing below
    157   MainBB->getInstList().back().eraseFromParent();
    158   IRBuilder<> MainBuilder(MainBB, MainBB->end());
    159   Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
    160 
    161   // BitMask is inverted to check if the operands are
    162   // larger than the bypass type
    163   uint64_t BitMask = ~BypassType->getBitMask();
    164   Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
    165 
    166   // Compare operand values and branch
    167   Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
    168   Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
    169   MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
    170 
    171   // point iterator J at first instruction of successorBB
    172   J = I->begin();
    173 
    174   // Cache phi nodes to be used later in place of other instances
    175   // of div or rem with the same sign, dividend, and divisor
    176   DivOpInfo Key(UseSignedOp, Dividend, Divisor);
    177   DivPhiNodes Value(QuoPhi, RemPhi);
    178   PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
    179   return true;
    180 }
    181 
    182 // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if
    183 // operands and operation are identical. Otherwise call insertFastDiv to perform
    184 // the optimization and cache the resulting dividend and remainder.
    185 static bool reuseOrInsertFastDiv(Function &F,
    186                                  Function::iterator &I,
    187                                  BasicBlock::iterator &J,
    188                                  IntegerType *BypassType,
    189                                  bool UseDivOp,
    190                                  bool UseSignedOp,
    191                                  DivCacheTy &PerBBDivCache) {
    192   // Get instruction operands
    193   Instruction *Instr = &*J;
    194   DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
    195   DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
    196 
    197   if (CacheI == PerBBDivCache.end()) {
    198     // If previous instance does not exist, insert fast div
    199     return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp,
    200                          PerBBDivCache);
    201   }
    202 
    203   // Replace operation value with previously generated phi node
    204   DivPhiNodes &Value = CacheI->second;
    205   if (UseDivOp) {
    206     // Replace all uses of div instruction with quotient phi node
    207     J->replaceAllUsesWith(Value.Quotient);
    208   } else {
    209     // Replace all uses of rem instruction with remainder phi node
    210     J->replaceAllUsesWith(Value.Remainder);
    211   }
    212 
    213   // Advance to next operation
    214   ++J;
    215 
    216   // Remove redundant operation
    217   Instr->eraseFromParent();
    218   return true;
    219 }
    220 
    221 // bypassSlowDivision - This optimization identifies DIV instructions that can
    222 // be profitably bypassed and carried out with a shorter, faster divide.
    223 bool llvm::bypassSlowDivision(Function &F,
    224                               Function::iterator &I,
    225                               const DenseMap<unsigned int, unsigned int> &BypassWidths) {
    226   DivCacheTy DivCache;
    227 
    228   bool MadeChange = false;
    229   for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) {
    230 
    231     // Get instruction details
    232     unsigned Opcode = J->getOpcode();
    233     bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
    234     bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
    235     bool UseSignedOp = Opcode == Instruction::SDiv ||
    236                        Opcode == Instruction::SRem;
    237 
    238     // Only optimize div or rem ops
    239     if (!UseDivOp && !UseRemOp)
    240       continue;
    241 
    242     // Skip division on vector types, only optimize integer instructions
    243     if (!J->getType()->isIntegerTy())
    244       continue;
    245 
    246     // Get bitwidth of div/rem instruction
    247     IntegerType *T = cast<IntegerType>(J->getType());
    248     unsigned int bitwidth = T->getBitWidth();
    249 
    250     // Continue if bitwidth is not bypassed
    251     DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
    252     if (BI == BypassWidths.end())
    253       continue;
    254 
    255     // Get type for div/rem instruction with bypass bitwidth
    256     IntegerType *BT = IntegerType::get(J->getContext(), BI->second);
    257 
    258     MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp,
    259                                        UseSignedOp, DivCache);
    260   }
    261 
    262   return MadeChange;
    263 }
    264