1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements inline cost analysis. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Analysis/InlineCost.h" 15 #include "llvm/Support/CallSite.h" 16 #include "llvm/CallingConv.h" 17 #include "llvm/IntrinsicInst.h" 18 #include "llvm/Target/TargetData.h" 19 #include "llvm/ADT/SmallPtrSet.h" 20 21 using namespace llvm; 22 23 /// callIsSmall - If a call is likely to lower to a single target instruction, 24 /// or is otherwise deemed small return true. 25 /// TODO: Perhaps calls like memcpy, strcpy, etc? 26 bool llvm::callIsSmall(const Function *F) { 27 if (!F) return false; 28 29 if (F->hasLocalLinkage()) return false; 30 31 if (!F->hasName()) return false; 32 33 StringRef Name = F->getName(); 34 35 // These will all likely lower to a single selection DAG node. 36 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || 37 Name == "fabs" || Name == "fabsf" || Name == "fabsl" || 38 Name == "sin" || Name == "sinf" || Name == "sinl" || 39 Name == "cos" || Name == "cosf" || Name == "cosl" || 40 Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ) 41 return true; 42 43 // These are all likely to be optimized into something smaller. 44 if (Name == "pow" || Name == "powf" || Name == "powl" || 45 Name == "exp2" || Name == "exp2l" || Name == "exp2f" || 46 Name == "floor" || Name == "floorf" || Name == "ceil" || 47 Name == "round" || Name == "ffs" || Name == "ffsl" || 48 Name == "abs" || Name == "labs" || Name == "llabs") 49 return true; 50 51 return false; 52 } 53 54 /// analyzeBasicBlock - Fill in the current structure with information gleaned 55 /// from the specified block. 56 void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, 57 const TargetData *TD) { 58 ++NumBlocks; 59 unsigned NumInstsBeforeThisBB = NumInsts; 60 for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); 61 II != E; ++II) { 62 if (isa<PHINode>(II)) continue; // PHI nodes don't count. 63 64 // Special handling for calls. 65 if (isa<CallInst>(II) || isa<InvokeInst>(II)) { 66 if (isa<DbgInfoIntrinsic>(II)) 67 continue; // Debug intrinsics don't count as size. 68 69 ImmutableCallSite CS(cast<Instruction>(II)); 70 71 if (const Function *F = CS.getCalledFunction()) { 72 // If a function is both internal and has a single use, then it is 73 // extremely likely to get inlined in the future (it was probably 74 // exposed by an interleaved devirtualization pass). 75 if (F->hasInternalLinkage() && F->hasOneUse()) 76 ++NumInlineCandidates; 77 78 // If this call is to function itself, then the function is recursive. 79 // Inlining it into other functions is a bad idea, because this is 80 // basically just a form of loop peeling, and our metrics aren't useful 81 // for that case. 82 if (F == BB->getParent()) 83 isRecursive = true; 84 } 85 86 if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) { 87 // Each argument to a call takes on average one instruction to set up. 88 NumInsts += CS.arg_size(); 89 90 // We don't want inline asm to count as a call - that would prevent loop 91 // unrolling. The argument setup cost is still real, though. 92 if (!isa<InlineAsm>(CS.getCalledValue())) 93 ++NumCalls; 94 } 95 } 96 97 if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { 98 if (!AI->isStaticAlloca()) 99 this->usesDynamicAlloca = true; 100 } 101 102 if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy()) 103 ++NumVectorInsts; 104 105 if (const CastInst *CI = dyn_cast<CastInst>(II)) { 106 // Noop casts, including ptr <-> int, don't count. 107 if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || 108 isa<PtrToIntInst>(CI)) 109 continue; 110 // trunc to a native type is free (assuming the target has compare and 111 // shift-right of the same width). 112 if (isa<TruncInst>(CI) && TD && 113 TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType()))) 114 continue; 115 // Result of a cmp instruction is often extended (to be used by other 116 // cmp instructions, logical or return instructions). These are usually 117 // nop on most sane targets. 118 if (isa<CmpInst>(CI->getOperand(0))) 119 continue; 120 } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){ 121 // If a GEP has all constant indices, it will probably be folded with 122 // a load/store. 123 if (GEPI->hasAllConstantIndices()) 124 continue; 125 } 126 127 ++NumInsts; 128 } 129 130 if (isa<ReturnInst>(BB->getTerminator())) 131 ++NumRets; 132 133 // We never want to inline functions that contain an indirectbr. This is 134 // incorrect because all the blockaddress's (in static global initializers 135 // for example) would be referring to the original function, and this indirect 136 // jump would jump from the inlined copy of the function into the original 137 // function which is extremely undefined behavior. 138 if (isa<IndirectBrInst>(BB->getTerminator())) 139 containsIndirectBr = true; 140 141 // Remember NumInsts for this BB. 142 NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB; 143 } 144 145 // CountCodeReductionForConstant - Figure out an approximation for how many 146 // instructions will be constant folded if the specified value is constant. 147 // 148 unsigned CodeMetrics::CountCodeReductionForConstant(Value *V) { 149 unsigned Reduction = 0; 150 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ 151 User *U = *UI; 152 if (isa<BranchInst>(U) || isa<SwitchInst>(U)) { 153 // We will be able to eliminate all but one of the successors. 154 const TerminatorInst &TI = cast<TerminatorInst>(*U); 155 const unsigned NumSucc = TI.getNumSuccessors(); 156 unsigned Instrs = 0; 157 for (unsigned I = 0; I != NumSucc; ++I) 158 Instrs += NumBBInsts[TI.getSuccessor(I)]; 159 // We don't know which blocks will be eliminated, so use the average size. 160 Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc; 161 } else { 162 // Figure out if this instruction will be removed due to simple constant 163 // propagation. 164 Instruction &Inst = cast<Instruction>(*U); 165 166 // We can't constant propagate instructions which have effects or 167 // read memory. 168 // 169 // FIXME: It would be nice to capture the fact that a load from a 170 // pointer-to-constant-global is actually a *really* good thing to zap. 171 // Unfortunately, we don't know the pointer that may get propagated here, 172 // so we can't make this decision. 173 if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || 174 isa<AllocaInst>(Inst)) 175 continue; 176 177 bool AllOperandsConstant = true; 178 for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) 179 if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) { 180 AllOperandsConstant = false; 181 break; 182 } 183 184 if (AllOperandsConstant) { 185 // We will get to remove this instruction... 186 Reduction += InlineConstants::InstrCost; 187 188 // And any other instructions that use it which become constants 189 // themselves. 190 Reduction += CountCodeReductionForConstant(&Inst); 191 } 192 } 193 } 194 return Reduction; 195 } 196 197 // CountCodeReductionForAlloca - Figure out an approximation of how much smaller 198 // the function will be if it is inlined into a context where an argument 199 // becomes an alloca. 200 // 201 unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) { 202 if (!V->getType()->isPointerTy()) return 0; // Not a pointer 203 unsigned Reduction = 0; 204 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ 205 Instruction *I = cast<Instruction>(*UI); 206 if (isa<LoadInst>(I) || isa<StoreInst>(I)) 207 Reduction += InlineConstants::InstrCost; 208 else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { 209 // If the GEP has variable indices, we won't be able to do much with it. 210 if (GEP->hasAllConstantIndices()) 211 Reduction += CountCodeReductionForAlloca(GEP); 212 } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) { 213 // Track pointer through bitcasts. 214 Reduction += CountCodeReductionForAlloca(BCI); 215 } else { 216 // If there is some other strange instruction, we're not going to be able 217 // to do much if we inline this. 218 return 0; 219 } 220 } 221 222 return Reduction; 223 } 224 225 /// analyzeFunction - Fill in the current structure with information gleaned 226 /// from the specified function. 227 void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) { 228 // If this function contains a call that "returns twice" (e.g., setjmp or 229 // _setjmp), never inline it. This is a hack because we depend on the user 230 // marking their local variables as volatile if they are live across a setjmp 231 // call, and they probably won't do this in callers. 232 callsSetJmp = F->callsFunctionThatReturnsTwice(); 233 234 // Look at the size of the callee. 235 for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) 236 analyzeBasicBlock(&*BB, TD); 237 } 238 239 /// analyzeFunction - Fill in the current structure with information gleaned 240 /// from the specified function. 241 void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F, 242 const TargetData *TD) { 243 Metrics.analyzeFunction(F, TD); 244 245 // A function with exactly one return has it removed during the inlining 246 // process (see InlineFunction), so don't count it. 247 // FIXME: This knowledge should really be encoded outside of FunctionInfo. 248 if (Metrics.NumRets==1) 249 --Metrics.NumInsts; 250 251 // Check out all of the arguments to the function, figuring out how much 252 // code can be eliminated if one of the arguments is a constant. 253 ArgumentWeights.reserve(F->arg_size()); 254 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) 255 ArgumentWeights.push_back(ArgInfo(Metrics.CountCodeReductionForConstant(I), 256 Metrics.CountCodeReductionForAlloca(I))); 257 } 258 259 /// NeverInline - returns true if the function should never be inlined into 260 /// any caller 261 bool InlineCostAnalyzer::FunctionInfo::NeverInline() { 262 return (Metrics.callsSetJmp || Metrics.isRecursive || 263 Metrics.containsIndirectBr); 264 } 265 // getSpecializationBonus - The heuristic used to determine the per-call 266 // performance boost for using a specialization of Callee with argument 267 // specializedArgNo replaced by a constant. 268 int InlineCostAnalyzer::getSpecializationBonus(Function *Callee, 269 SmallVectorImpl<unsigned> &SpecializedArgNos) 270 { 271 if (Callee->mayBeOverridden()) 272 return 0; 273 274 int Bonus = 0; 275 // If this function uses the coldcc calling convention, prefer not to 276 // specialize it. 277 if (Callee->getCallingConv() == CallingConv::Cold) 278 Bonus -= InlineConstants::ColdccPenalty; 279 280 // Get information about the callee. 281 FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; 282 283 // If we haven't calculated this information yet, do so now. 284 if (CalleeFI->Metrics.NumBlocks == 0) 285 CalleeFI->analyzeFunction(Callee, TD); 286 287 unsigned ArgNo = 0; 288 unsigned i = 0; 289 for (Function::arg_iterator I = Callee->arg_begin(), E = Callee->arg_end(); 290 I != E; ++I, ++ArgNo) 291 if (ArgNo == SpecializedArgNos[i]) { 292 ++i; 293 Bonus += CountBonusForConstant(I); 294 } 295 296 // Calls usually take a long time, so they make the specialization gain 297 // smaller. 298 Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty; 299 300 return Bonus; 301 } 302 303 // ConstantFunctionBonus - Figure out how much of a bonus we can get for 304 // possibly devirtualizing a function. We'll subtract the size of the function 305 // we may wish to inline from the indirect call bonus providing a limit on 306 // growth. Leave an upper limit of 0 for the bonus - we don't want to penalize 307 // inlining because we decide we don't want to give a bonus for 308 // devirtualizing. 309 int InlineCostAnalyzer::ConstantFunctionBonus(CallSite CS, Constant *C) { 310 311 // This could just be NULL. 312 if (!C) return 0; 313 314 Function *F = dyn_cast<Function>(C); 315 if (!F) return 0; 316 317 int Bonus = InlineConstants::IndirectCallBonus + getInlineSize(CS, F); 318 return (Bonus > 0) ? 0 : Bonus; 319 } 320 321 // CountBonusForConstant - Figure out an approximation for how much per-call 322 // performance boost we can expect if the specified value is constant. 323 int InlineCostAnalyzer::CountBonusForConstant(Value *V, Constant *C) { 324 unsigned Bonus = 0; 325 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ 326 User *U = *UI; 327 if (CallInst *CI = dyn_cast<CallInst>(U)) { 328 // Turning an indirect call into a direct call is a BIG win 329 if (CI->getCalledValue() == V) 330 Bonus += ConstantFunctionBonus(CallSite(CI), C); 331 } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) { 332 // Turning an indirect call into a direct call is a BIG win 333 if (II->getCalledValue() == V) 334 Bonus += ConstantFunctionBonus(CallSite(II), C); 335 } 336 // FIXME: Eliminating conditional branches and switches should 337 // also yield a per-call performance boost. 338 else { 339 // Figure out the bonuses that wll accrue due to simple constant 340 // propagation. 341 Instruction &Inst = cast<Instruction>(*U); 342 343 // We can't constant propagate instructions which have effects or 344 // read memory. 345 // 346 // FIXME: It would be nice to capture the fact that a load from a 347 // pointer-to-constant-global is actually a *really* good thing to zap. 348 // Unfortunately, we don't know the pointer that may get propagated here, 349 // so we can't make this decision. 350 if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || 351 isa<AllocaInst>(Inst)) 352 continue; 353 354 bool AllOperandsConstant = true; 355 for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) 356 if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) { 357 AllOperandsConstant = false; 358 break; 359 } 360 361 if (AllOperandsConstant) 362 Bonus += CountBonusForConstant(&Inst); 363 } 364 } 365 366 return Bonus; 367 } 368 369 int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) { 370 // Get information about the callee. 371 FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; 372 373 // If we haven't calculated this information yet, do so now. 374 if (CalleeFI->Metrics.NumBlocks == 0) 375 CalleeFI->analyzeFunction(Callee, TD); 376 377 // InlineCost - This value measures how good of an inline candidate this call 378 // site is to inline. A lower inline cost make is more likely for the call to 379 // be inlined. This value may go negative. 380 // 381 int InlineCost = 0; 382 383 // Compute any size reductions we can expect due to arguments being passed into 384 // the function. 385 // 386 unsigned ArgNo = 0; 387 CallSite::arg_iterator I = CS.arg_begin(); 388 for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end(); 389 FI != FE; ++I, ++FI, ++ArgNo) { 390 391 // If an alloca is passed in, inlining this function is likely to allow 392 // significant future optimization possibilities (like scalar promotion, and 393 // scalarization), so encourage the inlining of the function. 394 // 395 if (isa<AllocaInst>(I)) 396 InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight; 397 398 // If this is a constant being passed into the function, use the argument 399 // weights calculated for the callee to determine how much will be folded 400 // away with this information. 401 else if (isa<Constant>(I)) 402 InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight; 403 } 404 405 // Each argument passed in has a cost at both the caller and the callee 406 // sides. Measurements show that each argument costs about the same as an 407 // instruction. 408 InlineCost -= (CS.arg_size() * InlineConstants::InstrCost); 409 410 // Now that we have considered all of the factors that make the call site more 411 // likely to be inlined, look at factors that make us not want to inline it. 412 413 // Calls usually take a long time, so they make the inlining gain smaller. 414 InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty; 415 416 // Look at the size of the callee. Each instruction counts as 5. 417 InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost; 418 419 return InlineCost; 420 } 421 422 int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) { 423 // Get information about the callee. 424 FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; 425 426 // If we haven't calculated this information yet, do so now. 427 if (CalleeFI->Metrics.NumBlocks == 0) 428 CalleeFI->analyzeFunction(Callee, TD); 429 430 bool isDirectCall = CS.getCalledFunction() == Callee; 431 Instruction *TheCall = CS.getInstruction(); 432 int Bonus = 0; 433 434 // If there is only one call of the function, and it has internal linkage, 435 // make it almost guaranteed to be inlined. 436 // 437 if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall) 438 Bonus += InlineConstants::LastCallToStaticBonus; 439 440 // If the instruction after the call, or if the normal destination of the 441 // invoke is an unreachable instruction, the function is noreturn. As such, 442 // there is little point in inlining this. 443 if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { 444 if (isa<UnreachableInst>(II->getNormalDest()->begin())) 445 Bonus += InlineConstants::NoreturnPenalty; 446 } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall))) 447 Bonus += InlineConstants::NoreturnPenalty; 448 449 // If this function uses the coldcc calling convention, prefer not to inline 450 // it. 451 if (Callee->getCallingConv() == CallingConv::Cold) 452 Bonus += InlineConstants::ColdccPenalty; 453 454 // Add to the inline quality for properties that make the call valuable to 455 // inline. This includes factors that indicate that the result of inlining 456 // the function will be optimizable. Currently this just looks at arguments 457 // passed into the function. 458 // 459 CallSite::arg_iterator I = CS.arg_begin(); 460 for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end(); 461 FI != FE; ++I, ++FI) 462 // Compute any constant bonus due to inlining we want to give here. 463 if (isa<Constant>(I)) 464 Bonus += CountBonusForConstant(FI, cast<Constant>(I)); 465 466 return Bonus; 467 } 468 469 // getInlineCost - The heuristic used to determine if we should inline the 470 // function call or not. 471 // 472 InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, 473 SmallPtrSet<const Function*, 16> &NeverInline) { 474 return getInlineCost(CS, CS.getCalledFunction(), NeverInline); 475 } 476 477 InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, 478 Function *Callee, 479 SmallPtrSet<const Function*, 16> &NeverInline) { 480 Instruction *TheCall = CS.getInstruction(); 481 Function *Caller = TheCall->getParent()->getParent(); 482 483 // Don't inline functions which can be redefined at link-time to mean 484 // something else. Don't inline functions marked noinline or call sites 485 // marked noinline. 486 if (Callee->mayBeOverridden() || 487 Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee) || 488 CS.isNoInline()) 489 return llvm::InlineCost::getNever(); 490 491 // Get information about the callee. 492 FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; 493 494 // If we haven't calculated this information yet, do so now. 495 if (CalleeFI->Metrics.NumBlocks == 0) 496 CalleeFI->analyzeFunction(Callee, TD); 497 498 // If we should never inline this, return a huge cost. 499 if (CalleeFI->NeverInline()) 500 return InlineCost::getNever(); 501 502 // FIXME: It would be nice to kill off CalleeFI->NeverInline. Then we 503 // could move this up and avoid computing the FunctionInfo for 504 // things we are going to just return always inline for. This 505 // requires handling setjmp somewhere else, however. 506 if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline)) 507 return InlineCost::getAlways(); 508 509 if (CalleeFI->Metrics.usesDynamicAlloca) { 510 // Get information about the caller. 511 FunctionInfo &CallerFI = CachedFunctionInfo[Caller]; 512 513 // If we haven't calculated this information yet, do so now. 514 if (CallerFI.Metrics.NumBlocks == 0) { 515 CallerFI.analyzeFunction(Caller, TD); 516 517 // Recompute the CalleeFI pointer, getting Caller could have invalidated 518 // it. 519 CalleeFI = &CachedFunctionInfo[Callee]; 520 } 521 522 // Don't inline a callee with dynamic alloca into a caller without them. 523 // Functions containing dynamic alloca's are inefficient in various ways; 524 // don't create more inefficiency. 525 if (!CallerFI.Metrics.usesDynamicAlloca) 526 return InlineCost::getNever(); 527 } 528 529 // InlineCost - This value measures how good of an inline candidate this call 530 // site is to inline. A lower inline cost make is more likely for the call to 531 // be inlined. This value may go negative due to the fact that bonuses 532 // are negative numbers. 533 // 534 int InlineCost = getInlineSize(CS, Callee) + getInlineBonuses(CS, Callee); 535 return llvm::InlineCost::get(InlineCost); 536 } 537 538 // getSpecializationCost - The heuristic used to determine the code-size 539 // impact of creating a specialized version of Callee with argument 540 // SpecializedArgNo replaced by a constant. 541 InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee, 542 SmallVectorImpl<unsigned> &SpecializedArgNos) 543 { 544 // Don't specialize functions which can be redefined at link-time to mean 545 // something else. 546 if (Callee->mayBeOverridden()) 547 return llvm::InlineCost::getNever(); 548 549 // Get information about the callee. 550 FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; 551 552 // If we haven't calculated this information yet, do so now. 553 if (CalleeFI->Metrics.NumBlocks == 0) 554 CalleeFI->analyzeFunction(Callee, TD); 555 556 int Cost = 0; 557 558 // Look at the original size of the callee. Each instruction counts as 5. 559 Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost; 560 561 // Offset that with the amount of code that can be constant-folded 562 // away with the given arguments replaced by constants. 563 for (SmallVectorImpl<unsigned>::iterator an = SpecializedArgNos.begin(), 564 ae = SpecializedArgNos.end(); an != ae; ++an) 565 Cost -= CalleeFI->ArgumentWeights[*an].ConstantWeight; 566 567 return llvm::InlineCost::get(Cost); 568 } 569 570 // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a 571 // higher threshold to determine if the function call should be inlined. 572 float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) { 573 Function *Callee = CS.getCalledFunction(); 574 575 // Get information about the callee. 576 FunctionInfo &CalleeFI = CachedFunctionInfo[Callee]; 577 578 // If we haven't calculated this information yet, do so now. 579 if (CalleeFI.Metrics.NumBlocks == 0) 580 CalleeFI.analyzeFunction(Callee, TD); 581 582 float Factor = 1.0f; 583 // Single BB functions are often written to be inlined. 584 if (CalleeFI.Metrics.NumBlocks == 1) 585 Factor += 0.5f; 586 587 // Be more aggressive if the function contains a good chunk (if it mades up 588 // at least 10% of the instructions) of vector instructions. 589 if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/2) 590 Factor += 2.0f; 591 else if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/10) 592 Factor += 1.5f; 593 return Factor; 594 } 595 596 /// growCachedCostInfo - update the cached cost info for Caller after Callee has 597 /// been inlined. 598 void 599 InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) { 600 CodeMetrics &CallerMetrics = CachedFunctionInfo[Caller].Metrics; 601 602 // For small functions we prefer to recalculate the cost for better accuracy. 603 if (CallerMetrics.NumBlocks < 10 && CallerMetrics.NumInsts < 1000) { 604 resetCachedCostInfo(Caller); 605 return; 606 } 607 608 // For large functions, we can save a lot of computation time by skipping 609 // recalculations. 610 if (CallerMetrics.NumCalls > 0) 611 --CallerMetrics.NumCalls; 612 613 if (Callee == 0) return; 614 615 CodeMetrics &CalleeMetrics = CachedFunctionInfo[Callee].Metrics; 616 617 // If we don't have metrics for the callee, don't recalculate them just to 618 // update an approximation in the caller. Instead, just recalculate the 619 // caller info from scratch. 620 if (CalleeMetrics.NumBlocks == 0) { 621 resetCachedCostInfo(Caller); 622 return; 623 } 624 625 // Since CalleeMetrics were already calculated, we know that the CallerMetrics 626 // reference isn't invalidated: both were in the DenseMap. 627 CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca; 628 629 // FIXME: If any of these three are true for the callee, the callee was 630 // not inlined into the caller, so I think they're redundant here. 631 CallerMetrics.callsSetJmp |= CalleeMetrics.callsSetJmp; 632 CallerMetrics.isRecursive |= CalleeMetrics.isRecursive; 633 CallerMetrics.containsIndirectBr |= CalleeMetrics.containsIndirectBr; 634 635 CallerMetrics.NumInsts += CalleeMetrics.NumInsts; 636 CallerMetrics.NumBlocks += CalleeMetrics.NumBlocks; 637 CallerMetrics.NumCalls += CalleeMetrics.NumCalls; 638 CallerMetrics.NumVectorInsts += CalleeMetrics.NumVectorInsts; 639 CallerMetrics.NumRets += CalleeMetrics.NumRets; 640 641 // analyzeBasicBlock counts each function argument as an inst. 642 if (CallerMetrics.NumInsts >= Callee->arg_size()) 643 CallerMetrics.NumInsts -= Callee->arg_size(); 644 else 645 CallerMetrics.NumInsts = 0; 646 647 // We are not updating the argument weights. We have already determined that 648 // Caller is a fairly large function, so we accept the loss of precision. 649 } 650 651 /// clear - empty the cache of inline costs 652 void InlineCostAnalyzer::clear() { 653 CachedFunctionInfo.clear(); 654 } 655