Home | History | Annotate | Download | only in X86
      1 //===-- Target.cpp ----------------------------------------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 #include "../Target.h"
     10 
     11 #include "../Latency.h"
     12 #include "../Uops.h"
     13 #include "MCTargetDesc/X86BaseInfo.h"
     14 #include "MCTargetDesc/X86MCTargetDesc.h"
     15 #include "X86.h"
     16 #include "X86RegisterInfo.h"
     17 #include "X86Subtarget.h"
     18 #include "llvm/MC/MCInstBuilder.h"
     19 
     20 namespace exegesis {
     21 
     22 namespace {
     23 
     24 // Common code for X86 Uops and Latency runners.
     25 template <typename Impl> class X86BenchmarkRunner : public Impl {
     26   using Impl::Impl;
     27 
     28   llvm::Expected<SnippetPrototype>
     29   generatePrototype(unsigned Opcode) const override {
     30     // Test whether we can generate a snippet for this instruction.
     31     const auto &InstrInfo = this->State.getInstrInfo();
     32     const auto OpcodeName = InstrInfo.getName(Opcode);
     33     if (OpcodeName.startswith("POPF") || OpcodeName.startswith("PUSHF") ||
     34         OpcodeName.startswith("ADJCALLSTACK")) {
     35       return llvm::make_error<BenchmarkFailure>(
     36           "Unsupported opcode: Push/Pop/AdjCallStack");
     37     }
     38 
     39     // Handle X87.
     40     const auto &InstrDesc = InstrInfo.get(Opcode);
     41     const unsigned FPInstClass = InstrDesc.TSFlags & llvm::X86II::FPTypeMask;
     42     const Instruction Instr(InstrDesc, this->RATC);
     43     switch (FPInstClass) {
     44     case llvm::X86II::NotFP:
     45       break;
     46     case llvm::X86II::ZeroArgFP:
     47       return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
     48     case llvm::X86II::OneArgFP:
     49       return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
     50     case llvm::X86II::OneArgFPRW:
     51     case llvm::X86II::TwoArgFP: {
     52       // These are instructions like
     53       //   - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
     54       //   - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
     55       // They are intrinsically serial and do not modify the state of the stack.
     56       // We generate the same code for latency and uops.
     57       return this->generateSelfAliasingPrototype(Instr);
     58     }
     59     case llvm::X86II::CompareFP:
     60       return Impl::handleCompareFP(Instr);
     61     case llvm::X86II::CondMovFP:
     62       return Impl::handleCondMovFP(Instr);
     63     case llvm::X86II::SpecialFP:
     64       return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
     65     default:
     66       llvm_unreachable("Unknown FP Type!");
     67     }
     68 
     69     // Fallback to generic implementation.
     70     return Impl::Base::generatePrototype(Opcode);
     71   }
     72 };
     73 
     74 class X86LatencyImpl : public LatencyBenchmarkRunner {
     75 protected:
     76   using Base = LatencyBenchmarkRunner;
     77   using Base::Base;
     78   llvm::Expected<SnippetPrototype>
     79   handleCompareFP(const Instruction &Instr) const {
     80     return llvm::make_error<BenchmarkFailure>("Unsupported x87 CompareFP");
     81   }
     82   llvm::Expected<SnippetPrototype>
     83   handleCondMovFP(const Instruction &Instr) const {
     84     return llvm::make_error<BenchmarkFailure>("Unsupported x87 CondMovFP");
     85   }
     86 };
     87 
     88 class X86UopsImpl : public UopsBenchmarkRunner {
     89 protected:
     90   using Base = UopsBenchmarkRunner;
     91   using Base::Base;
     92   // We can compute uops for any FP instruction that does not grow or shrink the
     93   // stack (either do not touch the stack or push as much as they pop).
     94   llvm::Expected<SnippetPrototype>
     95   handleCompareFP(const Instruction &Instr) const {
     96     return generateUnconstrainedPrototype(
     97         Instr, "instruction does not grow/shrink the FP stack");
     98   }
     99   llvm::Expected<SnippetPrototype>
    100   handleCondMovFP(const Instruction &Instr) const {
    101     return generateUnconstrainedPrototype(
    102         Instr, "instruction does not grow/shrink the FP stack");
    103   }
    104 };
    105 
    106 class ExegesisX86Target : public ExegesisTarget {
    107   void addTargetSpecificPasses(llvm::PassManagerBase &PM) const override {
    108     // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
    109     PM.add(llvm::createX86FloatingPointStackifierPass());
    110   }
    111 
    112   std::vector<llvm::MCInst> setRegToConstant(const llvm::MCSubtargetInfo &STI,
    113                                              unsigned Reg) const override {
    114     // GPR.
    115     if (llvm::X86::GR8RegClass.contains(Reg))
    116       return {llvm::MCInstBuilder(llvm::X86::MOV8ri).addReg(Reg).addImm(1)};
    117     if (llvm::X86::GR16RegClass.contains(Reg))
    118       return {llvm::MCInstBuilder(llvm::X86::MOV16ri).addReg(Reg).addImm(1)};
    119     if (llvm::X86::GR32RegClass.contains(Reg))
    120       return {llvm::MCInstBuilder(llvm::X86::MOV32ri).addReg(Reg).addImm(1)};
    121     if (llvm::X86::GR64RegClass.contains(Reg))
    122       return {llvm::MCInstBuilder(llvm::X86::MOV64ri32).addReg(Reg).addImm(1)};
    123     // MMX.
    124     if (llvm::X86::VR64RegClass.contains(Reg))
    125       return setVectorRegToConstant(Reg, 8, llvm::X86::MMX_MOVQ64rm);
    126     // {X,Y,Z}MM.
    127     if (llvm::X86::VR128XRegClass.contains(Reg)) {
    128       if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
    129         return setVectorRegToConstant(Reg, 16, llvm::X86::VMOVDQU32Z128rm);
    130       if (STI.getFeatureBits()[llvm::X86::FeatureAVX])
    131         return setVectorRegToConstant(Reg, 16, llvm::X86::VMOVDQUrm);
    132       return setVectorRegToConstant(Reg, 16, llvm::X86::MOVDQUrm);
    133     }
    134     if (llvm::X86::VR256XRegClass.contains(Reg)) {
    135       if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
    136         return setVectorRegToConstant(Reg, 32, llvm::X86::VMOVDQU32Z256rm);
    137       return setVectorRegToConstant(Reg, 32, llvm::X86::VMOVDQUYrm);
    138     }
    139     if (llvm::X86::VR512RegClass.contains(Reg))
    140       return setVectorRegToConstant(Reg, 64, llvm::X86::VMOVDQU32Zrm);
    141     // X87.
    142     if (llvm::X86::RFP32RegClass.contains(Reg) ||
    143         llvm::X86::RFP64RegClass.contains(Reg) ||
    144         llvm::X86::RFP80RegClass.contains(Reg))
    145       return setVectorRegToConstant(Reg, 8, llvm::X86::LD_Fp64m);
    146     if (Reg == llvm::X86::EFLAGS) {
    147       // Set all flags to 0 but the bits that are "reserved and set to 1".
    148       constexpr const uint32_t kImmValue = 0x00007002u;
    149       std::vector<llvm::MCInst> Result;
    150       Result.push_back(allocateStackSpace(8));
    151       Result.push_back(fillStackSpace(llvm::X86::MOV64mi32, 0, kImmValue));
    152       Result.push_back(llvm::MCInstBuilder(llvm::X86::POPF64)); // Also pops.
    153       return Result;
    154     }
    155     return {};
    156   }
    157 
    158   std::unique_ptr<BenchmarkRunner>
    159   createLatencyBenchmarkRunner(const LLVMState &State) const override {
    160     return llvm::make_unique<X86BenchmarkRunner<X86LatencyImpl>>(State);
    161   }
    162 
    163   std::unique_ptr<BenchmarkRunner>
    164   createUopsBenchmarkRunner(const LLVMState &State) const override {
    165     return llvm::make_unique<X86BenchmarkRunner<X86UopsImpl>>(State);
    166   }
    167 
    168   bool matchesArch(llvm::Triple::ArchType Arch) const override {
    169     return Arch == llvm::Triple::x86_64 || Arch == llvm::Triple::x86;
    170   }
    171 
    172 private:
    173   // setRegToConstant() specialized for a vector register of size
    174   // `RegSizeBytes`. `RMOpcode` is the opcode used to do a memory -> vector
    175   // register load.
    176   static std::vector<llvm::MCInst>
    177   setVectorRegToConstant(const unsigned Reg, const unsigned RegSizeBytes,
    178                          const unsigned RMOpcode) {
    179     // There is no instruction to directly set XMM, go through memory.
    180     // Since vector values can be interpreted as integers of various sizes (8
    181     // to 64 bits) as well as floats and double, so we chose an immediate
    182     // value that has set bits for all byte values and is a normal float/
    183     // double. 0x40404040 is ~32.5 when interpreted as a double and ~3.0f when
    184     // interpreted as a float.
    185     constexpr const uint32_t kImmValue = 0x40404040u;
    186     std::vector<llvm::MCInst> Result;
    187     Result.push_back(allocateStackSpace(RegSizeBytes));
    188     constexpr const unsigned kMov32NumBytes = 4;
    189     for (unsigned Disp = 0; Disp < RegSizeBytes; Disp += kMov32NumBytes) {
    190       Result.push_back(fillStackSpace(llvm::X86::MOV32mi, Disp, kImmValue));
    191     }
    192     Result.push_back(loadToReg(Reg, RMOpcode));
    193     Result.push_back(releaseStackSpace(RegSizeBytes));
    194     return Result;
    195   }
    196 
    197   // Allocates scratch memory on the stack.
    198   static llvm::MCInst allocateStackSpace(unsigned Bytes) {
    199     return llvm::MCInstBuilder(llvm::X86::SUB64ri8)
    200         .addReg(llvm::X86::RSP)
    201         .addReg(llvm::X86::RSP)
    202         .addImm(Bytes);
    203   }
    204 
    205   // Fills scratch memory at offset `OffsetBytes` with value `Imm`.
    206   static llvm::MCInst fillStackSpace(unsigned MovOpcode, unsigned OffsetBytes,
    207                                      uint64_t Imm) {
    208     return llvm::MCInstBuilder(MovOpcode)
    209         // Address = ESP
    210         .addReg(llvm::X86::RSP) // BaseReg
    211         .addImm(1)              // ScaleAmt
    212         .addReg(0)              // IndexReg
    213         .addImm(OffsetBytes)    // Disp
    214         .addReg(0)              // Segment
    215         // Immediate.
    216         .addImm(Imm);
    217   }
    218 
    219   // Loads scratch memory into register `Reg` using opcode `RMOpcode`.
    220   static llvm::MCInst loadToReg(unsigned Reg, unsigned RMOpcode) {
    221     return llvm::MCInstBuilder(RMOpcode)
    222         .addReg(Reg)
    223         // Address = ESP
    224         .addReg(llvm::X86::RSP) // BaseReg
    225         .addImm(1)              // ScaleAmt
    226         .addReg(0)              // IndexReg
    227         .addImm(0)              // Disp
    228         .addReg(0);             // Segment
    229   }
    230 
    231   // Releases scratch memory.
    232   static llvm::MCInst releaseStackSpace(unsigned Bytes) {
    233     return llvm::MCInstBuilder(llvm::X86::ADD64ri8)
    234         .addReg(llvm::X86::RSP)
    235         .addReg(llvm::X86::RSP)
    236         .addImm(Bytes);
    237   }
    238 };
    239 
    240 } // namespace
    241 
    242 static ExegesisTarget *getTheExegesisX86Target() {
    243   static ExegesisX86Target Target;
    244   return &Target;
    245 }
    246 
    247 void InitializeX86ExegesisTarget() {
    248   ExegesisTarget::registerTarget(getTheExegesisX86Target());
    249 }
    250 
    251 } // namespace exegesis
    252