Home | History | Annotate | Download | only in AArch64
      1 //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements the AArch64 specific subclass of TargetSubtarget.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "AArch64Subtarget.h"
     15 
     16 #include "AArch64.h"
     17 #include "AArch64InstrInfo.h"
     18 #include "AArch64PBQPRegAlloc.h"
     19 #include "AArch64TargetMachine.h"
     20 
     21 #include "AArch64CallLowering.h"
     22 #include "AArch64LegalizerInfo.h"
     23 #include "AArch64RegisterBankInfo.h"
     24 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
     25 #include "llvm/CodeGen/MachineScheduler.h"
     26 #include "llvm/IR/GlobalValue.h"
     27 #include "llvm/Support/TargetParser.h"
     28 
     29 using namespace llvm;
     30 
     31 #define DEBUG_TYPE "aarch64-subtarget"
     32 
     33 #define GET_SUBTARGETINFO_CTOR
     34 #define GET_SUBTARGETINFO_TARGET_DESC
     35 #include "AArch64GenSubtargetInfo.inc"
     36 
     37 static cl::opt<bool>
     38 EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
     39                      "converter pass"), cl::init(true), cl::Hidden);
     40 
     41 // If OS supports TBI, use this flag to enable it.
     42 static cl::opt<bool>
     43 UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
     44                          "an address is ignored"), cl::init(false), cl::Hidden);
     45 
     46 static cl::opt<bool>
     47     UseNonLazyBind("aarch64-enable-nonlazybind",
     48                    cl::desc("Call nonlazybind functions via direct GOT load"),
     49                    cl::init(false), cl::Hidden);
     50 
     51 AArch64Subtarget &
     52 AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
     53                                                   StringRef CPUString) {
     54   // Determine default and user-specified characteristics
     55 
     56   if (CPUString.empty())
     57     CPUString = "generic";
     58 
     59   ParseSubtargetFeatures(CPUString, FS);
     60   initializeProperties();
     61 
     62   return *this;
     63 }
     64 
     65 void AArch64Subtarget::initializeProperties() {
     66   // Initialize CPU specific properties. We should add a tablegen feature for
     67   // this in the future so we can specify it together with the subtarget
     68   // features.
     69   switch (ARMProcFamily) {
     70   case Cyclone:
     71     CacheLineSize = 64;
     72     PrefetchDistance = 280;
     73     MinPrefetchStride = 2048;
     74     MaxPrefetchIterationsAhead = 3;
     75     break;
     76   case CortexA57:
     77     MaxInterleaveFactor = 4;
     78     PrefFunctionAlignment = 4;
     79     break;
     80   case ExynosM1:
     81     MaxInterleaveFactor = 4;
     82     MaxJumpTableSize = 8;
     83     PrefFunctionAlignment = 4;
     84     PrefLoopAlignment = 3;
     85     break;
     86   case ExynosM3:
     87     MaxInterleaveFactor = 4;
     88     MaxJumpTableSize = 20;
     89     PrefFunctionAlignment = 5;
     90     PrefLoopAlignment = 4;
     91     break;
     92   case Falkor:
     93     MaxInterleaveFactor = 4;
     94     // FIXME: remove this to enable 64-bit SLP if performance looks good.
     95     MinVectorRegisterBitWidth = 128;
     96     CacheLineSize = 128;
     97     PrefetchDistance = 820;
     98     MinPrefetchStride = 2048;
     99     MaxPrefetchIterationsAhead = 8;
    100     break;
    101   case Saphira:
    102     MaxInterleaveFactor = 4;
    103     // FIXME: remove this to enable 64-bit SLP if performance looks good.
    104     MinVectorRegisterBitWidth = 128;
    105     break;
    106   case Kryo:
    107     MaxInterleaveFactor = 4;
    108     VectorInsertExtractBaseCost = 2;
    109     CacheLineSize = 128;
    110     PrefetchDistance = 740;
    111     MinPrefetchStride = 1024;
    112     MaxPrefetchIterationsAhead = 11;
    113     // FIXME: remove this to enable 64-bit SLP if performance looks good.
    114     MinVectorRegisterBitWidth = 128;
    115     break;
    116   case ThunderX2T99:
    117     CacheLineSize = 64;
    118     PrefFunctionAlignment = 3;
    119     PrefLoopAlignment = 2;
    120     MaxInterleaveFactor = 4;
    121     PrefetchDistance = 128;
    122     MinPrefetchStride = 1024;
    123     MaxPrefetchIterationsAhead = 4;
    124     // FIXME: remove this to enable 64-bit SLP if performance looks good.
    125     MinVectorRegisterBitWidth = 128;
    126     break;
    127   case ThunderX:
    128   case ThunderXT88:
    129   case ThunderXT81:
    130   case ThunderXT83:
    131     CacheLineSize = 128;
    132     PrefFunctionAlignment = 3;
    133     PrefLoopAlignment = 2;
    134     // FIXME: remove this to enable 64-bit SLP if performance looks good.
    135     MinVectorRegisterBitWidth = 128;
    136     break;
    137   case CortexA35: break;
    138   case CortexA53:
    139     PrefFunctionAlignment = 3;
    140     break;
    141   case CortexA55: break;
    142   case CortexA72:
    143   case CortexA73:
    144   case CortexA75:
    145     PrefFunctionAlignment = 4;
    146     break;
    147   case Others: break;
    148   }
    149 }
    150 
    151 AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
    152                                    const std::string &FS,
    153                                    const TargetMachine &TM, bool LittleEndian)
    154     : AArch64GenSubtargetInfo(TT, CPU, FS),
    155       ReserveX18(AArch64::isX18ReservedByDefault(TT)), IsLittle(LittleEndian),
    156       TargetTriple(TT), FrameLowering(),
    157       InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),
    158       TLInfo(TM, *this) {
    159   CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
    160   Legalizer.reset(new AArch64LegalizerInfo(*this));
    161 
    162   auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
    163 
    164   // FIXME: At this point, we can't rely on Subtarget having RBI.
    165   // It's awkward to mix passing RBI and the Subtarget; should we pass
    166   // TII/TRI as well?
    167   InstSelector.reset(createAArch64InstructionSelector(
    168       *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
    169 
    170   RegBankInfo.reset(RBI);
    171 }
    172 
    173 const CallLowering *AArch64Subtarget::getCallLowering() const {
    174   return CallLoweringInfo.get();
    175 }
    176 
    177 const InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
    178   return InstSelector.get();
    179 }
    180 
    181 const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
    182   return Legalizer.get();
    183 }
    184 
    185 const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
    186   return RegBankInfo.get();
    187 }
    188 
    189 /// Find the target operand flags that describe how a global value should be
    190 /// referenced for the current subtarget.
    191 unsigned char
    192 AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
    193                                           const TargetMachine &TM) const {
    194   // MachO large model always goes via a GOT, simply to get a single 8-byte
    195   // absolute relocation on all global addresses.
    196   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
    197     return AArch64II::MO_GOT;
    198 
    199   unsigned Flags = GV->hasDLLImportStorageClass() ? AArch64II::MO_DLLIMPORT
    200                                                   : AArch64II::MO_NO_FLAG;
    201 
    202   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
    203     return AArch64II::MO_GOT | Flags;
    204 
    205   // The small code model's direct accesses use ADRP, which cannot
    206   // necessarily produce the value 0 (if the code is above 4GB).
    207   if (useSmallAddressing() && GV->hasExternalWeakLinkage())
    208     return AArch64II::MO_GOT | Flags;
    209 
    210   return Flags;
    211 }
    212 
    213 unsigned char AArch64Subtarget::classifyGlobalFunctionReference(
    214     const GlobalValue *GV, const TargetMachine &TM) const {
    215   // MachO large model always goes via a GOT, because we don't have the
    216   // relocations available to do anything else..
    217   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
    218       !GV->hasInternalLinkage())
    219     return AArch64II::MO_GOT;
    220 
    221   // NonLazyBind goes via GOT unless we know it's available locally.
    222   auto *F = dyn_cast<Function>(GV);
    223   if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
    224       !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
    225     return AArch64II::MO_GOT;
    226 
    227   return AArch64II::MO_NO_FLAG;
    228 }
    229 
    230 void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
    231                                            unsigned NumRegionInstrs) const {
    232   // LNT run (at least on Cyclone) showed reasonably significant gains for
    233   // bi-directional scheduling. 253.perlbmk.
    234   Policy.OnlyTopDown = false;
    235   Policy.OnlyBottomUp = false;
    236   // Enabling or Disabling the latency heuristic is a close call: It seems to
    237   // help nearly no benchmark on out-of-order architectures, on the other hand
    238   // it regresses register pressure on a few benchmarking.
    239   Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
    240 }
    241 
    242 bool AArch64Subtarget::enableEarlyIfConversion() const {
    243   return EnableEarlyIfConvert;
    244 }
    245 
    246 bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
    247   if (!UseAddressTopByteIgnored)
    248     return false;
    249 
    250   if (TargetTriple.isiOS()) {
    251     unsigned Major, Minor, Micro;
    252     TargetTriple.getiOSVersion(Major, Minor, Micro);
    253     return Major >= 8;
    254   }
    255 
    256   return false;
    257 }
    258 
    259 std::unique_ptr<PBQPRAConstraint>
    260 AArch64Subtarget::getCustomPBQPConstraints() const {
    261   return balanceFPOps() ? llvm::make_unique<A57ChainingConstraint>() : nullptr;
    262 }
    263 
    264 void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
    265   // We usually compute max call frame size after ISel. Do the computation now
    266   // if the .mir file didn't specify it. Note that this will probably give you
    267   // bogus values after PEI has eliminated the callframe setup/destroy pseudo
    268   // instructions, specify explicitely if you need it to be correct.
    269   MachineFrameInfo &MFI = MF.getFrameInfo();
    270   if (!MFI.isMaxCallFrameSizeComputed())
    271     MFI.computeMaxCallFrameSize(MF);
    272 }
    273