1 //===-- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // \file 11 // This file implements a TargetTransformInfo analysis pass specific to the 12 // AMDGPU target machine. It uses the target's detailed information to provide 13 // more precise answers to certain TTI queries, while letting the target 14 // independent and default TTI implementations handle the rest. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "AMDGPU.h" 19 #include "AMDGPUTargetMachine.h" 20 #include "llvm/Analysis/LoopInfo.h" 21 #include "llvm/Analysis/TargetTransformInfo.h" 22 #include "llvm/Analysis/ValueTracking.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Target/CostTable.h" 25 #include "llvm/Target/TargetLowering.h" 26 using namespace llvm; 27 28 #define DEBUG_TYPE "AMDGPUtti" 29 30 // Declare the pass initialization routine locally as target-specific passes 31 // don't have a target-wide initialization entry point, and so we rely on the 32 // pass constructor initialization. 33 namespace llvm { 34 void initializeAMDGPUTTIPass(PassRegistry &); 35 } 36 37 namespace { 38 39 class AMDGPUTTI final : public ImmutablePass, public TargetTransformInfo { 40 const AMDGPUTargetMachine *TM; 41 const AMDGPUSubtarget *ST; 42 const AMDGPUTargetLowering *TLI; 43 44 /// Estimate the overhead of scalarizing an instruction. Insert and Extract 45 /// are set if the result needs to be inserted and/or extracted from vectors. 46 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; 47 48 public: 49 AMDGPUTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) { 50 llvm_unreachable("This pass cannot be directly constructed"); 51 } 52 53 AMDGPUTTI(const AMDGPUTargetMachine *TM) 54 : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), 55 TLI(TM->getTargetLowering()) { 56 initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry()); 57 } 58 59 void initializePass() override { pushTTIStack(this); } 60 61 void getAnalysisUsage(AnalysisUsage &AU) const override { 62 TargetTransformInfo::getAnalysisUsage(AU); 63 } 64 65 /// Pass identification. 66 static char ID; 67 68 /// Provide necessary pointer adjustments for the two base classes. 69 void *getAdjustedAnalysisPointer(const void *ID) override { 70 if (ID == &TargetTransformInfo::ID) 71 return (TargetTransformInfo *)this; 72 return this; 73 } 74 75 bool hasBranchDivergence() const override; 76 77 void getUnrollingPreferences(Loop *L, 78 UnrollingPreferences &UP) const override; 79 80 /// @} 81 }; 82 83 } // end anonymous namespace 84 85 INITIALIZE_AG_PASS(AMDGPUTTI, TargetTransformInfo, "AMDGPUtti", 86 "AMDGPU Target Transform Info", true, true, false) 87 char AMDGPUTTI::ID = 0; 88 89 ImmutablePass * 90 llvm::createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM) { 91 return new AMDGPUTTI(TM); 92 } 93 94 bool AMDGPUTTI::hasBranchDivergence() const { return true; } 95 96 void AMDGPUTTI::getUnrollingPreferences(Loop *L, 97 UnrollingPreferences &UP) const { 98 for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end(); 99 BI != BE; ++BI) { 100 BasicBlock *BB = *BI; 101 for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); 102 I != E; ++I) { 103 const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I); 104 if (!GEP) 105 continue; 106 const Value *Ptr = GEP->getPointerOperand(); 107 const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr)); 108 if (Alloca) { 109 // We want to do whatever we can to limit the number of alloca 110 // instructions that make it through to the code generator. allocas 111 // require us to use indirect addressing, which is slow and prone to 112 // compiler bugs. If this loop does an address calculation on an 113 // alloca ptr, then we want to use a higher than normal loop unroll 114 // threshold. This will give SROA a better chance to eliminate these 115 // allocas. 116 // 117 // Don't use the maximum allowed value here as it will make some 118 // programs way too big. 119 UP.Threshold = 500; 120 } 121 } 122 } 123 } 124