Home | History | Annotate | Download | only in PowerPC
      1 //===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // Top-level implementation for the PowerPC target.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "PPCTargetMachine.h"
     15 #include "PPC.h"
     16 #include "PPCTargetObjectFile.h"
     17 #include "PPCTargetTransformInfo.h"
     18 #include "llvm/CodeGen/LiveVariables.h"
     19 #include "llvm/CodeGen/Passes.h"
     20 #include "llvm/CodeGen/TargetPassConfig.h"
     21 #include "llvm/IR/Function.h"
     22 #include "llvm/IR/LegacyPassManager.h"
     23 #include "llvm/MC/MCStreamer.h"
     24 #include "llvm/Support/CommandLine.h"
     25 #include "llvm/Support/FormattedStream.h"
     26 #include "llvm/Support/TargetRegistry.h"
     27 #include "llvm/Target/TargetOptions.h"
     28 #include "llvm/Transforms/Scalar.h"
     29 using namespace llvm;
     30 
     31 static cl::
     32 opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
     33                         cl::desc("Disable CTR loops for PPC"));
     34 
     35 static cl::
     36 opt<bool> DisablePreIncPrep("disable-ppc-preinc-prep", cl::Hidden,
     37                             cl::desc("Disable PPC loop preinc prep"));
     38 
     39 static cl::opt<bool>
     40 VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early",
     41   cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early"));
     42 
     43 static cl::
     44 opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
     45                                 cl::desc("Disable VSX Swap Removal for PPC"));
     46 
     47 static cl::
     48 opt<bool> DisableQPXLoadSplat("disable-ppc-qpx-load-splat", cl::Hidden,
     49                               cl::desc("Disable QPX load splat simplification"));
     50 
     51 static cl::
     52 opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden,
     53                             cl::desc("Disable machine peepholes for PPC"));
     54 
     55 static cl::opt<bool>
     56 EnableGEPOpt("ppc-gep-opt", cl::Hidden,
     57              cl::desc("Enable optimizations on complex GEPs"),
     58              cl::init(true));
     59 
     60 static cl::opt<bool>
     61 EnablePrefetch("enable-ppc-prefetching",
     62                   cl::desc("disable software prefetching on PPC"),
     63                   cl::init(false), cl::Hidden);
     64 
     65 static cl::opt<bool>
     66 EnableExtraTOCRegDeps("enable-ppc-extra-toc-reg-deps",
     67                       cl::desc("Add extra TOC register dependencies"),
     68                       cl::init(true), cl::Hidden);
     69 
     70 static cl::opt<bool>
     71 EnableMachineCombinerPass("ppc-machine-combiner",
     72                           cl::desc("Enable the machine combiner pass"),
     73                           cl::init(true), cl::Hidden);
     74 
     75 extern "C" void LLVMInitializePowerPCTarget() {
     76   // Register the targets
     77   RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
     78   RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
     79   RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
     80 
     81   PassRegistry &PR = *PassRegistry::getPassRegistry();
     82   initializePPCBoolRetToIntPass(PR);
     83 }
     84 
     85 /// Return the datalayout string of a subtarget.
     86 static std::string getDataLayoutString(const Triple &T) {
     87   bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le;
     88   std::string Ret;
     89 
     90   // Most PPC* platforms are big endian, PPC64LE is little endian.
     91   if (T.getArch() == Triple::ppc64le)
     92     Ret = "e";
     93   else
     94     Ret = "E";
     95 
     96   Ret += DataLayout::getManglingComponent(T);
     97 
     98   // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
     99   // pointers.
    100   if (!is64Bit || T.getOS() == Triple::Lv2)
    101     Ret += "-p:32:32";
    102 
    103   // Note, the alignment values for f64 and i64 on ppc64 in Darwin
    104   // documentation are wrong; these are correct (i.e. "what gcc does").
    105   if (is64Bit || !T.isOSDarwin())
    106     Ret += "-i64:64";
    107   else
    108     Ret += "-f64:32:64";
    109 
    110   // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
    111   if (is64Bit)
    112     Ret += "-n32:64";
    113   else
    114     Ret += "-n32";
    115 
    116   return Ret;
    117 }
    118 
    119 static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL,
    120                                       const Triple &TT) {
    121   std::string FullFS = FS;
    122 
    123   // Make sure 64-bit features are available when CPUname is generic
    124   if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) {
    125     if (!FullFS.empty())
    126       FullFS = "+64bit," + FullFS;
    127     else
    128       FullFS = "+64bit";
    129   }
    130 
    131   if (OL >= CodeGenOpt::Default) {
    132     if (!FullFS.empty())
    133       FullFS = "+crbits," + FullFS;
    134     else
    135       FullFS = "+crbits";
    136   }
    137 
    138   if (OL != CodeGenOpt::None) {
    139     if (!FullFS.empty())
    140       FullFS = "+invariant-function-descriptors," + FullFS;
    141     else
    142       FullFS = "+invariant-function-descriptors";
    143   }
    144 
    145   return FullFS;
    146 }
    147 
    148 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
    149   // If it isn't a Mach-O file then it's going to be a linux ELF
    150   // object file.
    151   if (TT.isOSDarwin())
    152     return make_unique<TargetLoweringObjectFileMachO>();
    153 
    154   return make_unique<PPC64LinuxTargetObjectFile>();
    155 }
    156 
    157 static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
    158                                                  const TargetOptions &Options) {
    159   if (Options.MCOptions.getABIName().startswith("elfv1"))
    160     return PPCTargetMachine::PPC_ABI_ELFv1;
    161   else if (Options.MCOptions.getABIName().startswith("elfv2"))
    162     return PPCTargetMachine::PPC_ABI_ELFv2;
    163 
    164   assert(Options.MCOptions.getABIName().empty() &&
    165          "Unknown target-abi option!");
    166 
    167   if (!TT.isMacOSX()) {
    168     switch (TT.getArch()) {
    169     case Triple::ppc64le:
    170       return PPCTargetMachine::PPC_ABI_ELFv2;
    171     case Triple::ppc64:
    172       return PPCTargetMachine::PPC_ABI_ELFv1;
    173     default:
    174       // Fallthrough.
    175       ;
    176     }
    177   }
    178   return PPCTargetMachine::PPC_ABI_UNKNOWN;
    179 }
    180 
    181 static Reloc::Model getEffectiveRelocModel(const Triple &TT,
    182                                            Optional<Reloc::Model> RM) {
    183   if (!RM.hasValue()) {
    184     if (TT.isOSDarwin())
    185       return Reloc::DynamicNoPIC;
    186     return Reloc::Static;
    187   }
    188   return *RM;
    189 }
    190 
    191 // The FeatureString here is a little subtle. We are modifying the feature
    192 // string with what are (currently) non-function specific overrides as it goes
    193 // into the LLVMTargetMachine constructor and then using the stored value in the
    194 // Subtarget constructor below it.
    195 PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
    196                                    StringRef CPU, StringRef FS,
    197                                    const TargetOptions &Options,
    198                                    Optional<Reloc::Model> RM,
    199                                    CodeModel::Model CM, CodeGenOpt::Level OL)
    200     : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU,
    201                         computeFSAdditions(FS, OL, TT), Options,
    202                         getEffectiveRelocModel(TT, RM), CM, OL),
    203       TLOF(createTLOF(getTargetTriple())),
    204       TargetABI(computeTargetABI(TT, Options)),
    205       Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) {
    206 
    207   // For the estimates, convergence is quadratic, so we essentially double the
    208   // number of digits correct after every iteration. For both FRE and FRSQRTE,
    209   // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
    210   // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
    211   unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3,
    212            RefinementSteps64 = RefinementSteps + 1;
    213 
    214   this->Options.Reciprocals.setDefaults("sqrtf", true, RefinementSteps);
    215   this->Options.Reciprocals.setDefaults("vec-sqrtf", true, RefinementSteps);
    216   this->Options.Reciprocals.setDefaults("divf", true, RefinementSteps);
    217   this->Options.Reciprocals.setDefaults("vec-divf", true, RefinementSteps);
    218 
    219   this->Options.Reciprocals.setDefaults("sqrtd", true, RefinementSteps64);
    220   this->Options.Reciprocals.setDefaults("vec-sqrtd", true, RefinementSteps64);
    221   this->Options.Reciprocals.setDefaults("divd", true, RefinementSteps64);
    222   this->Options.Reciprocals.setDefaults("vec-divd", true, RefinementSteps64);
    223 
    224   initAsmInfo();
    225 }
    226 
    227 PPCTargetMachine::~PPCTargetMachine() {}
    228 
    229 void PPC32TargetMachine::anchor() { }
    230 
    231 PPC32TargetMachine::PPC32TargetMachine(const Target &T, const Triple &TT,
    232                                        StringRef CPU, StringRef FS,
    233                                        const TargetOptions &Options,
    234                                        Optional<Reloc::Model> RM,
    235                                        CodeModel::Model CM,
    236                                        CodeGenOpt::Level OL)
    237     : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
    238 
    239 void PPC64TargetMachine::anchor() { }
    240 
    241 PPC64TargetMachine::PPC64TargetMachine(const Target &T, const Triple &TT,
    242                                        StringRef CPU, StringRef FS,
    243                                        const TargetOptions &Options,
    244                                        Optional<Reloc::Model> RM,
    245                                        CodeModel::Model CM,
    246                                        CodeGenOpt::Level OL)
    247     : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
    248 
    249 const PPCSubtarget *
    250 PPCTargetMachine::getSubtargetImpl(const Function &F) const {
    251   Attribute CPUAttr = F.getFnAttribute("target-cpu");
    252   Attribute FSAttr = F.getFnAttribute("target-features");
    253 
    254   std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
    255                         ? CPUAttr.getValueAsString().str()
    256                         : TargetCPU;
    257   std::string FS = !FSAttr.hasAttribute(Attribute::None)
    258                        ? FSAttr.getValueAsString().str()
    259                        : TargetFS;
    260 
    261   // FIXME: This is related to the code below to reset the target options,
    262   // we need to know whether or not the soft float flag is set on the
    263   // function before we can generate a subtarget. We also need to use
    264   // it as a key for the subtarget since that can be the only difference
    265   // between two functions.
    266   bool SoftFloat =
    267       F.getFnAttribute("use-soft-float").getValueAsString() == "true";
    268   // If the soft float attribute is set on the function turn on the soft float
    269   // subtarget feature.
    270   if (SoftFloat)
    271     FS += FS.empty() ? "+soft-float" : ",+soft-float";
    272 
    273   auto &I = SubtargetMap[CPU + FS];
    274   if (!I) {
    275     // This needs to be done before we create a new subtarget since any
    276     // creation will depend on the TM and the code generation flags on the
    277     // function that reside in TargetOptions.
    278     resetTargetOptions(F);
    279     I = llvm::make_unique<PPCSubtarget>(
    280         TargetTriple, CPU,
    281         // FIXME: It would be good to have the subtarget additions here
    282         // not necessary. Anything that turns them on/off (overrides) ends
    283         // up being put at the end of the feature string, but the defaults
    284         // shouldn't require adding them. Fixing this means pulling Feature64Bit
    285         // out of most of the target cpus in the .td file and making it set only
    286         // as part of initialization via the TargetTriple.
    287         computeFSAdditions(FS, getOptLevel(), getTargetTriple()), *this);
    288   }
    289   return I.get();
    290 }
    291 
    292 //===----------------------------------------------------------------------===//
    293 // Pass Pipeline Configuration
    294 //===----------------------------------------------------------------------===//
    295 
    296 namespace {
    297 /// PPC Code Generator Pass Configuration Options.
    298 class PPCPassConfig : public TargetPassConfig {
    299 public:
    300   PPCPassConfig(PPCTargetMachine *TM, PassManagerBase &PM)
    301     : TargetPassConfig(TM, PM) {}
    302 
    303   PPCTargetMachine &getPPCTargetMachine() const {
    304     return getTM<PPCTargetMachine>();
    305   }
    306 
    307   void addIRPasses() override;
    308   bool addPreISel() override;
    309   bool addILPOpts() override;
    310   bool addInstSelector() override;
    311   void addMachineSSAOptimization() override;
    312   void addPreRegAlloc() override;
    313   void addPreSched2() override;
    314   void addPreEmitPass() override;
    315 };
    316 } // namespace
    317 
    318 TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
    319   return new PPCPassConfig(this, PM);
    320 }
    321 
    322 void PPCPassConfig::addIRPasses() {
    323   if (TM->getOptLevel() != CodeGenOpt::None)
    324     addPass(createPPCBoolRetToIntPass());
    325   addPass(createAtomicExpandPass(&getPPCTargetMachine()));
    326 
    327   // For the BG/Q (or if explicitly requested), add explicit data prefetch
    328   // intrinsics.
    329   bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ &&
    330                         getOptLevel() != CodeGenOpt::None;
    331   if (EnablePrefetch.getNumOccurrences() > 0)
    332     UsePrefetching = EnablePrefetch;
    333   if (UsePrefetching)
    334     addPass(createLoopDataPrefetchPass());
    335 
    336   if (TM->getOptLevel() >= CodeGenOpt::Default && EnableGEPOpt) {
    337     // Call SeparateConstOffsetFromGEP pass to extract constants within indices
    338     // and lower a GEP with multiple indices to either arithmetic operations or
    339     // multiple GEPs with single index.
    340     addPass(createSeparateConstOffsetFromGEPPass(TM, true));
    341     // Call EarlyCSE pass to find and remove subexpressions in the lowered
    342     // result.
    343     addPass(createEarlyCSEPass());
    344     // Do loop invariant code motion in case part of the lowered result is
    345     // invariant.
    346     addPass(createLICMPass());
    347   }
    348 
    349   TargetPassConfig::addIRPasses();
    350 }
    351 
    352 bool PPCPassConfig::addPreISel() {
    353   if (!DisablePreIncPrep && getOptLevel() != CodeGenOpt::None)
    354     addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine()));
    355 
    356   if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
    357     addPass(createPPCCTRLoops(getPPCTargetMachine()));
    358 
    359   return false;
    360 }
    361 
    362 bool PPCPassConfig::addILPOpts() {
    363   addPass(&EarlyIfConverterID);
    364 
    365   if (EnableMachineCombinerPass)
    366     addPass(&MachineCombinerID);
    367 
    368   return true;
    369 }
    370 
    371 bool PPCPassConfig::addInstSelector() {
    372   // Install an instruction selector.
    373   addPass(createPPCISelDag(getPPCTargetMachine()));
    374 
    375 #ifndef NDEBUG
    376   if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
    377     addPass(createPPCCTRLoopsVerify());
    378 #endif
    379 
    380   addPass(createPPCVSXCopyPass());
    381   return false;
    382 }
    383 
    384 void PPCPassConfig::addMachineSSAOptimization() {
    385   TargetPassConfig::addMachineSSAOptimization();
    386   // For little endian, remove where possible the vector swap instructions
    387   // introduced at code generation to normalize vector element order.
    388   if (TM->getTargetTriple().getArch() == Triple::ppc64le &&
    389       !DisableVSXSwapRemoval)
    390     addPass(createPPCVSXSwapRemovalPass());
    391   // Target-specific peephole cleanups performed after instruction
    392   // selection.
    393   if (!DisableMIPeephole) {
    394     addPass(createPPCMIPeepholePass());
    395     addPass(&DeadMachineInstructionElimID);
    396   }
    397 }
    398 
    399 void PPCPassConfig::addPreRegAlloc() {
    400   if (getOptLevel() != CodeGenOpt::None) {
    401     initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
    402     insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
    403                &PPCVSXFMAMutateID);
    404   }
    405 
    406   // FIXME: We probably don't need to run these for -fPIE.
    407   if (getPPCTargetMachine().isPositionIndependent()) {
    408     // FIXME: LiveVariables should not be necessary here!
    409     // PPCTLSDYnamicCallPass uses LiveIntervals which previously dependet on
    410     // LiveVariables. This (unnecessary) dependency has been removed now,
    411     // however a stage-2 clang build fails without LiveVariables computed here.
    412     addPass(&LiveVariablesID, false);
    413     addPass(createPPCTLSDynamicCallPass());
    414   }
    415   if (EnableExtraTOCRegDeps)
    416     addPass(createPPCTOCRegDepsPass());
    417 }
    418 
    419 void PPCPassConfig::addPreSched2() {
    420   if (getOptLevel() != CodeGenOpt::None) {
    421     addPass(&IfConverterID);
    422 
    423     // This optimization must happen after anything that might do store-to-load
    424     // forwarding. Here we're after RA (and, thus, when spills are inserted)
    425     // but before post-RA scheduling.
    426     if (!DisableQPXLoadSplat)
    427       addPass(createPPCQPXLoadSplatPass());
    428   }
    429 }
    430 
    431 void PPCPassConfig::addPreEmitPass() {
    432   if (getOptLevel() != CodeGenOpt::None)
    433     addPass(createPPCEarlyReturnPass(), false);
    434   // Must run branch selection immediately preceding the asm printer.
    435   addPass(createPPCBranchSelectionPass(), false);
    436 }
    437 
    438 TargetIRAnalysis PPCTargetMachine::getTargetIRAnalysis() {
    439   return TargetIRAnalysis([this](const Function &F) {
    440     return TargetTransformInfo(PPCTTIImpl(this, F));
    441   });
    442 }
    443