1 //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains an optimization for div and rem on architectures that 11 // execute short instructions significantly faster than longer instructions. 12 // For example, on Intel Atom 32-bit divides are slow enough that during 13 // runtime it is profitable to check the value of the operands, and if they are 14 // positive and less than 256 use an unsigned 8-bit divide. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #define DEBUG_TYPE "bypass-slow-division" 19 #include "llvm/Transforms/Utils/BypassSlowDivision.h" 20 #include "llvm/ADT/DenseMap.h" 21 #include "llvm/IR/Function.h" 22 #include "llvm/IR/IRBuilder.h" 23 #include "llvm/IR/Instructions.h" 24 25 using namespace llvm; 26 27 namespace { 28 struct DivOpInfo { 29 bool SignedOp; 30 Value *Dividend; 31 Value *Divisor; 32 33 DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor) 34 : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} 35 }; 36 37 struct DivPhiNodes { 38 PHINode *Quotient; 39 PHINode *Remainder; 40 41 DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder) 42 : Quotient(InQuotient), Remainder(InRemainder) {} 43 }; 44 } 45 46 namespace llvm { 47 template<> 48 struct DenseMapInfo<DivOpInfo> { 49 static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) { 50 return Val1.SignedOp == Val2.SignedOp && 51 Val1.Dividend == Val2.Dividend && 52 Val1.Divisor == Val2.Divisor; 53 } 54 55 static DivOpInfo getEmptyKey() { 56 return DivOpInfo(false, 0, 0); 57 } 58 59 static DivOpInfo getTombstoneKey() { 60 return DivOpInfo(true, 0, 0); 61 } 62 63 static unsigned getHashValue(const DivOpInfo &Val) { 64 return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^ 65 reinterpret_cast<uintptr_t>(Val.Divisor)) ^ 66 (unsigned)Val.SignedOp; 67 } 68 }; 69 70 typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy; 71 } 72 73 // insertFastDiv - Substitutes the div/rem instruction with code that checks the 74 // value of the operands and uses a shorter-faster div/rem instruction when 75 // possible and the longer-slower div/rem instruction otherwise. 76 static bool insertFastDiv(Function &F, 77 Function::iterator &I, 78 BasicBlock::iterator &J, 79 IntegerType *BypassType, 80 bool UseDivOp, 81 bool UseSignedOp, 82 DivCacheTy &PerBBDivCache) { 83 // Get instruction operands 84 Instruction *Instr = J; 85 Value *Dividend = Instr->getOperand(0); 86 Value *Divisor = Instr->getOperand(1); 87 88 if (isa<ConstantInt>(Divisor) || 89 (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) { 90 // Operations with immediate values should have 91 // been solved and replaced during compile time. 92 return false; 93 } 94 95 // Basic Block is split before divide 96 BasicBlock *MainBB = I; 97 BasicBlock *SuccessorBB = I->splitBasicBlock(J); 98 ++I; //advance iterator I to successorBB 99 100 // Add new basic block for slow divide operation 101 BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "", 102 MainBB->getParent(), SuccessorBB); 103 SlowBB->moveBefore(SuccessorBB); 104 IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); 105 Value *SlowQuotientV; 106 Value *SlowRemainderV; 107 if (UseSignedOp) { 108 SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor); 109 SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor); 110 } else { 111 SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor); 112 SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor); 113 } 114 SlowBuilder.CreateBr(SuccessorBB); 115 116 // Add new basic block for fast divide operation 117 BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "", 118 MainBB->getParent(), SuccessorBB); 119 FastBB->moveBefore(SlowBB); 120 IRBuilder<> FastBuilder(FastBB, FastBB->begin()); 121 Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, 122 BypassType); 123 Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend, 124 BypassType); 125 126 // udiv/urem because optimization only handles positive numbers 127 Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV, 128 ShortDivisorV); 129 Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV, 130 ShortDivisorV); 131 Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt, 132 ShortQuotientV, 133 Dividend->getType()); 134 Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt, 135 ShortRemainderV, 136 Dividend->getType()); 137 FastBuilder.CreateBr(SuccessorBB); 138 139 // Phi nodes for result of div and rem 140 IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin()); 141 PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); 142 QuoPhi->addIncoming(SlowQuotientV, SlowBB); 143 QuoPhi->addIncoming(FastQuotientV, FastBB); 144 PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); 145 RemPhi->addIncoming(SlowRemainderV, SlowBB); 146 RemPhi->addIncoming(FastRemainderV, FastBB); 147 148 // Replace Instr with appropriate phi node 149 if (UseDivOp) 150 Instr->replaceAllUsesWith(QuoPhi); 151 else 152 Instr->replaceAllUsesWith(RemPhi); 153 Instr->eraseFromParent(); 154 155 // Combine operands into a single value with OR for value testing below 156 MainBB->getInstList().back().eraseFromParent(); 157 IRBuilder<> MainBuilder(MainBB, MainBB->end()); 158 Value *OrV = MainBuilder.CreateOr(Dividend, Divisor); 159 160 // BitMask is inverted to check if the operands are 161 // larger than the bypass type 162 uint64_t BitMask = ~BypassType->getBitMask(); 163 Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); 164 165 // Compare operand values and branch 166 Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0); 167 Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); 168 MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); 169 170 // point iterator J at first instruction of successorBB 171 J = I->begin(); 172 173 // Cache phi nodes to be used later in place of other instances 174 // of div or rem with the same sign, dividend, and divisor 175 DivOpInfo Key(UseSignedOp, Dividend, Divisor); 176 DivPhiNodes Value(QuoPhi, RemPhi); 177 PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value)); 178 return true; 179 } 180 181 // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if 182 // operands and operation are identical. Otherwise call insertFastDiv to perform 183 // the optimization and cache the resulting dividend and remainder. 184 static bool reuseOrInsertFastDiv(Function &F, 185 Function::iterator &I, 186 BasicBlock::iterator &J, 187 IntegerType *BypassType, 188 bool UseDivOp, 189 bool UseSignedOp, 190 DivCacheTy &PerBBDivCache) { 191 // Get instruction operands 192 Instruction *Instr = J; 193 DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1)); 194 DivCacheTy::iterator CacheI = PerBBDivCache.find(Key); 195 196 if (CacheI == PerBBDivCache.end()) { 197 // If previous instance does not exist, insert fast div 198 return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp, 199 PerBBDivCache); 200 } 201 202 // Replace operation value with previously generated phi node 203 DivPhiNodes &Value = CacheI->second; 204 if (UseDivOp) { 205 // Replace all uses of div instruction with quotient phi node 206 J->replaceAllUsesWith(Value.Quotient); 207 } else { 208 // Replace all uses of rem instruction with remainder phi node 209 J->replaceAllUsesWith(Value.Remainder); 210 } 211 212 // Advance to next operation 213 ++J; 214 215 // Remove redundant operation 216 Instr->eraseFromParent(); 217 return true; 218 } 219 220 // bypassSlowDivision - This optimization identifies DIV instructions that can 221 // be profitably bypassed and carried out with a shorter, faster divide. 222 bool llvm::bypassSlowDivision(Function &F, 223 Function::iterator &I, 224 const DenseMap<unsigned int, unsigned int> &BypassWidths) { 225 DivCacheTy DivCache; 226 227 bool MadeChange = false; 228 for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) { 229 230 // Get instruction details 231 unsigned Opcode = J->getOpcode(); 232 bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; 233 bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem; 234 bool UseSignedOp = Opcode == Instruction::SDiv || 235 Opcode == Instruction::SRem; 236 237 // Only optimize div or rem ops 238 if (!UseDivOp && !UseRemOp) 239 continue; 240 241 // Skip division on vector types, only optimize integer instructions 242 if (!J->getType()->isIntegerTy()) 243 continue; 244 245 // Get bitwidth of div/rem instruction 246 IntegerType *T = cast<IntegerType>(J->getType()); 247 unsigned int bitwidth = T->getBitWidth(); 248 249 // Continue if bitwidth is not bypassed 250 DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth); 251 if (BI == BypassWidths.end()) 252 continue; 253 254 // Get type for div/rem instruction with bypass bitwidth 255 IntegerType *BT = IntegerType::get(J->getContext(), BI->second); 256 257 MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp, 258 UseSignedOp, DivCache); 259 } 260 261 return MadeChange; 262 } 263