Home | History | Annotate | Download | only in X86
      1 //===-- X86Subtarget.cpp - X86 Subtarget Information ----------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements the X86 specific subclass of TargetSubtargetInfo.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #define DEBUG_TYPE "subtarget"
     15 #include "X86Subtarget.h"
     16 #include "X86InstrInfo.h"
     17 #include "llvm/IR/Attributes.h"
     18 #include "llvm/IR/Function.h"
     19 #include "llvm/IR/GlobalValue.h"
     20 #include "llvm/Support/Debug.h"
     21 #include "llvm/Support/ErrorHandling.h"
     22 #include "llvm/Support/Host.h"
     23 #include "llvm/Support/raw_ostream.h"
     24 #include "llvm/Target/TargetMachine.h"
     25 #include "llvm/Target/TargetOptions.h"
     26 
     27 #define GET_SUBTARGETINFO_TARGET_DESC
     28 #define GET_SUBTARGETINFO_CTOR
     29 #include "X86GenSubtargetInfo.inc"
     30 
     31 using namespace llvm;
     32 
     33 #if defined(_MSC_VER)
     34 #include <intrin.h>
     35 #endif
     36 
     37 /// ClassifyBlockAddressReference - Classify a blockaddress reference for the
     38 /// current subtarget according to how we should reference it in a non-pcrel
     39 /// context.
     40 unsigned char X86Subtarget::
     41 ClassifyBlockAddressReference() const {
     42   if (isPICStyleGOT())    // 32-bit ELF targets.
     43     return X86II::MO_GOTOFF;
     44 
     45   if (isPICStyleStubPIC())   // Darwin/32 in PIC mode.
     46     return X86II::MO_PIC_BASE_OFFSET;
     47 
     48   // Direct static reference to label.
     49   return X86II::MO_NO_FLAG;
     50 }
     51 
     52 /// ClassifyGlobalReference - Classify a global variable reference for the
     53 /// current subtarget according to how we should reference it in a non-pcrel
     54 /// context.
     55 unsigned char X86Subtarget::
     56 ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
     57   // DLLImport only exists on windows, it is implemented as a load from a
     58   // DLLIMPORT stub.
     59   if (GV->hasDLLImportLinkage())
     60     return X86II::MO_DLLIMPORT;
     61 
     62   // Determine whether this is a reference to a definition or a declaration.
     63   // Materializable GVs (in JIT lazy compilation mode) do not require an extra
     64   // load from stub.
     65   bool isDecl = GV->hasAvailableExternallyLinkage();
     66   if (GV->isDeclaration() && !GV->isMaterializable())
     67     isDecl = true;
     68 
     69   // X86-64 in PIC mode.
     70   if (isPICStyleRIPRel()) {
     71     // Large model never uses stubs.
     72     if (TM.getCodeModel() == CodeModel::Large)
     73       return X86II::MO_NO_FLAG;
     74 
     75     if (isTargetDarwin()) {
     76       // If symbol visibility is hidden, the extra load is not needed if
     77       // target is x86-64 or the symbol is definitely defined in the current
     78       // translation unit.
     79       if (GV->hasDefaultVisibility() &&
     80           (isDecl || GV->isWeakForLinker()))
     81         return X86II::MO_GOTPCREL;
     82     } else if (!isTargetWin64()) {
     83       assert(isTargetELF() && "Unknown rip-relative target");
     84 
     85       // Extra load is needed for all externally visible.
     86       if (!GV->hasLocalLinkage() && GV->hasDefaultVisibility())
     87         return X86II::MO_GOTPCREL;
     88     }
     89 
     90     return X86II::MO_NO_FLAG;
     91   }
     92 
     93   if (isPICStyleGOT()) {   // 32-bit ELF targets.
     94     // Extra load is needed for all externally visible.
     95     if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
     96       return X86II::MO_GOTOFF;
     97     return X86II::MO_GOT;
     98   }
     99 
    100   if (isPICStyleStubPIC()) {  // Darwin/32 in PIC mode.
    101     // Determine whether we have a stub reference and/or whether the reference
    102     // is relative to the PIC base or not.
    103 
    104     // If this is a strong reference to a definition, it is definitely not
    105     // through a stub.
    106     if (!isDecl && !GV->isWeakForLinker())
    107       return X86II::MO_PIC_BASE_OFFSET;
    108 
    109     // Unless we have a symbol with hidden visibility, we have to go through a
    110     // normal $non_lazy_ptr stub because this symbol might be resolved late.
    111     if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
    112       return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
    113 
    114     // If symbol visibility is hidden, we have a stub for common symbol
    115     // references and external declarations.
    116     if (isDecl || GV->hasCommonLinkage()) {
    117       // Hidden $non_lazy_ptr reference.
    118       return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE;
    119     }
    120 
    121     // Otherwise, no stub.
    122     return X86II::MO_PIC_BASE_OFFSET;
    123   }
    124 
    125   if (isPICStyleStubNoDynamic()) {  // Darwin/32 in -mdynamic-no-pic mode.
    126     // Determine whether we have a stub reference.
    127 
    128     // If this is a strong reference to a definition, it is definitely not
    129     // through a stub.
    130     if (!isDecl && !GV->isWeakForLinker())
    131       return X86II::MO_NO_FLAG;
    132 
    133     // Unless we have a symbol with hidden visibility, we have to go through a
    134     // normal $non_lazy_ptr stub because this symbol might be resolved late.
    135     if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
    136       return X86II::MO_DARWIN_NONLAZY;
    137 
    138     // Otherwise, no stub.
    139     return X86II::MO_NO_FLAG;
    140   }
    141 
    142   // Direct static reference to global.
    143   return X86II::MO_NO_FLAG;
    144 }
    145 
    146 
    147 /// getBZeroEntry - This function returns the name of a function which has an
    148 /// interface like the non-standard bzero function, if such a function exists on
    149 /// the current subtarget and it is considered prefereable over memset with zero
    150 /// passed as the second argument. Otherwise it returns null.
    151 const char *X86Subtarget::getBZeroEntry() const {
    152   // Darwin 10 has a __bzero entry point for this purpose.
    153   if (getTargetTriple().isMacOSX() &&
    154       !getTargetTriple().isMacOSXVersionLT(10, 6))
    155     return "__bzero";
    156 
    157   return 0;
    158 }
    159 
    160 bool X86Subtarget::hasSinCos() const {
    161   return getTargetTriple().isMacOSX() &&
    162     !getTargetTriple().isMacOSXVersionLT(10, 9) &&
    163     is64Bit();
    164 }
    165 
    166 /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
    167 /// to immediate address.
    168 bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
    169   if (In64BitMode)
    170     return false;
    171   return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
    172 }
    173 
    174 void X86Subtarget::AutoDetectSubtargetFeatures() {
    175   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
    176   unsigned MaxLevel;
    177   union {
    178     unsigned u[3];
    179     char     c[12];
    180   } text;
    181 
    182   if (X86_MC::GetCpuIDAndInfo(0, &MaxLevel, text.u+0, text.u+2, text.u+1) ||
    183       MaxLevel < 1)
    184     return;
    185 
    186   X86_MC::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
    187 
    188   if ((EDX >> 15) & 1) { HasCMov = true;      ToggleFeature(X86::FeatureCMOV); }
    189   if ((EDX >> 23) & 1) { X86SSELevel = MMX;   ToggleFeature(X86::FeatureMMX);  }
    190   if ((EDX >> 25) & 1) { X86SSELevel = SSE1;  ToggleFeature(X86::FeatureSSE1); }
    191   if ((EDX >> 26) & 1) { X86SSELevel = SSE2;  ToggleFeature(X86::FeatureSSE2); }
    192   if (ECX & 0x1)       { X86SSELevel = SSE3;  ToggleFeature(X86::FeatureSSE3); }
    193   if ((ECX >> 9)  & 1) { X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3);}
    194   if ((ECX >> 19) & 1) { X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);}
    195   if ((ECX >> 20) & 1) { X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);}
    196   if ((ECX >> 28) & 1) { X86SSELevel = AVX;   ToggleFeature(X86::FeatureAVX); }
    197 
    198   bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
    199   bool IsAMD   = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
    200 
    201   if ((ECX >> 1) & 0x1) {
    202     HasPCLMUL = true;
    203     ToggleFeature(X86::FeaturePCLMUL);
    204   }
    205   if ((ECX >> 12) & 0x1) {
    206     HasFMA = true;
    207     ToggleFeature(X86::FeatureFMA);
    208   }
    209   if (IsIntel && ((ECX >> 22) & 0x1)) {
    210     HasMOVBE = true;
    211     ToggleFeature(X86::FeatureMOVBE);
    212   }
    213   if ((ECX >> 23) & 0x1) {
    214     HasPOPCNT = true;
    215     ToggleFeature(X86::FeaturePOPCNT);
    216   }
    217   if ((ECX >> 25) & 0x1) {
    218     HasAES = true;
    219     ToggleFeature(X86::FeatureAES);
    220   }
    221   if ((ECX >> 29) & 0x1) {
    222     HasF16C = true;
    223     ToggleFeature(X86::FeatureF16C);
    224   }
    225   if (IsIntel && ((ECX >> 30) & 0x1)) {
    226     HasRDRAND = true;
    227     ToggleFeature(X86::FeatureRDRAND);
    228   }
    229 
    230   if ((ECX >> 13) & 0x1) {
    231     HasCmpxchg16b = true;
    232     ToggleFeature(X86::FeatureCMPXCHG16B);
    233   }
    234 
    235   if (IsIntel || IsAMD) {
    236     // Determine if bit test memory instructions are slow.
    237     unsigned Family = 0;
    238     unsigned Model  = 0;
    239     X86_MC::DetectFamilyModel(EAX, Family, Model);
    240     if (IsAMD || (Family == 6 && Model >= 13)) {
    241       IsBTMemSlow = true;
    242       ToggleFeature(X86::FeatureSlowBTMem);
    243     }
    244 
    245     // If it's an Intel chip since Nehalem and not an Atom chip, unaligned
    246     // memory access is fast. We hard code model numbers here because they
    247     // aren't strictly increasing for Intel chips it seems.
    248     if (IsIntel &&
    249         ((Family == 6 && Model == 0x1E) || // Nehalem: Clarksfield, Lynnfield,
    250                                            //          Jasper Froest
    251          (Family == 6 && Model == 0x1A) || // Nehalem: Bloomfield, Nehalem-EP
    252          (Family == 6 && Model == 0x2E) || // Nehalem: Nehalem-EX
    253          (Family == 6 && Model == 0x25) || // Westmere: Arrandale, Clarksdale
    254          (Family == 6 && Model == 0x2C) || // Westmere: Gulftown, Westmere-EP
    255          (Family == 6 && Model == 0x2F) || // Westmere: Westmere-EX
    256          (Family == 6 && Model == 0x2A) || // SandyBridge
    257          (Family == 6 && Model == 0x2D) || // SandyBridge: SandyBridge-E*
    258          (Family == 6 && Model == 0x3A))) {// IvyBridge
    259       IsUAMemFast = true;
    260       ToggleFeature(X86::FeatureFastUAMem);
    261     }
    262 
    263     // Set processor type. Currently only Atom is detected.
    264     if (Family == 6 &&
    265         (Model == 28 || Model == 38 || Model == 39
    266          || Model == 53 || Model == 54)) {
    267       X86ProcFamily = IntelAtom;
    268 
    269       UseLeaForSP = true;
    270       ToggleFeature(X86::FeatureLeaForSP);
    271     }
    272 
    273     unsigned MaxExtLevel;
    274     X86_MC::GetCpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
    275 
    276     if (MaxExtLevel >= 0x80000001) {
    277       X86_MC::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
    278       if ((EDX >> 29) & 0x1) {
    279         HasX86_64 = true;
    280         ToggleFeature(X86::Feature64Bit);
    281       }
    282       if ((ECX >> 5) & 0x1) {
    283         HasLZCNT = true;
    284         ToggleFeature(X86::FeatureLZCNT);
    285       }
    286       if (IsAMD) {
    287         if ((ECX >> 6) & 0x1) {
    288           HasSSE4A = true;
    289           ToggleFeature(X86::FeatureSSE4A);
    290         }
    291         if ((ECX >> 11) & 0x1) {
    292           HasXOP = true;
    293           ToggleFeature(X86::FeatureXOP);
    294         }
    295         if ((ECX >> 16) & 0x1) {
    296           HasFMA4 = true;
    297           ToggleFeature(X86::FeatureFMA4);
    298         }
    299       }
    300     }
    301   }
    302 
    303   if (MaxLevel >= 7) {
    304     if (!X86_MC::GetCpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX)) {
    305       if (IsIntel && (EBX & 0x1)) {
    306         HasFSGSBase = true;
    307         ToggleFeature(X86::FeatureFSGSBase);
    308       }
    309       if ((EBX >> 3) & 0x1) {
    310         HasBMI = true;
    311         ToggleFeature(X86::FeatureBMI);
    312       }
    313       if (IsIntel && ((EBX >> 5) & 0x1)) {
    314         X86SSELevel = AVX2;
    315         ToggleFeature(X86::FeatureAVX2);
    316       }
    317       if (IsIntel && ((EBX >> 8) & 0x1)) {
    318         HasBMI2 = true;
    319         ToggleFeature(X86::FeatureBMI2);
    320       }
    321       if (IsIntel && ((EBX >> 11) & 0x1)) {
    322         HasRTM = true;
    323         ToggleFeature(X86::FeatureRTM);
    324       }
    325     }
    326   }
    327 }
    328 
    329 void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) {
    330   AttributeSet FnAttrs = MF->getFunction()->getAttributes();
    331   Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
    332                                            "target-cpu");
    333   Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
    334                                           "target-features");
    335   std::string CPU =
    336     !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : "";
    337   std::string FS =
    338     !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
    339   if (!FS.empty()) {
    340     initializeEnvironment();
    341     resetSubtargetFeatures(CPU, FS);
    342   }
    343 }
    344 
    345 void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
    346   std::string CPUName = CPU;
    347   if (!FS.empty() || !CPU.empty()) {
    348     if (CPUName.empty()) {
    349 #if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\
    350     || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
    351       CPUName = sys::getHostCPUName();
    352 #else
    353       CPUName = "generic";
    354 #endif
    355     }
    356 
    357     // Make sure 64-bit features are available in 64-bit mode. (But make sure
    358     // SSE2 can be turned off explicitly.)
    359     std::string FullFS = FS;
    360     if (In64BitMode) {
    361       if (!FullFS.empty())
    362         FullFS = "+64bit,+sse2," + FullFS;
    363       else
    364         FullFS = "+64bit,+sse2";
    365     }
    366 
    367     // If feature string is not empty, parse features string.
    368     ParseSubtargetFeatures(CPUName, FullFS);
    369   } else {
    370     if (CPUName.empty()) {
    371 #if defined (__x86_64__) || defined(__i386__)
    372       CPUName = sys::getHostCPUName();
    373 #else
    374       CPUName = "generic";
    375 #endif
    376     }
    377     // Otherwise, use CPUID to auto-detect feature set.
    378     AutoDetectSubtargetFeatures();
    379 
    380     // Make sure 64-bit features are available in 64-bit mode.
    381     if (In64BitMode) {
    382       HasX86_64 = true; ToggleFeature(X86::Feature64Bit);
    383       HasCMov = true;   ToggleFeature(X86::FeatureCMOV);
    384 
    385       if (X86SSELevel < SSE2) {
    386         X86SSELevel = SSE2;
    387         ToggleFeature(X86::FeatureSSE1);
    388         ToggleFeature(X86::FeatureSSE2);
    389       }
    390     }
    391   }
    392 
    393   // CPUName may have been set by the CPU detection code. Make sure the
    394   // new MCSchedModel is used.
    395   InitMCProcessorInfo(CPUName, FS);
    396 
    397   if (X86ProcFamily == IntelAtom)
    398     PostRAScheduler = true;
    399 
    400   InstrItins = getInstrItineraryForCPU(CPUName);
    401 
    402   // It's important to keep the MCSubtargetInfo feature bits in sync with
    403   // target data structure which is shared with MC code emitter, etc.
    404   if (In64BitMode)
    405     ToggleFeature(X86::Mode64Bit);
    406 
    407   DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
    408                << ", 3DNowLevel " << X863DNowLevel
    409                << ", 64bit " << HasX86_64 << "\n");
    410   assert((!In64BitMode || HasX86_64) &&
    411          "64-bit code requested on a subtarget that doesn't support it!");
    412 
    413   // Stack alignment is 16 bytes on Darwin, Linux and Solaris (both
    414   // 32 and 64 bit) and for all 64-bit targets.
    415   if (StackAlignOverride)
    416     stackAlignment = StackAlignOverride;
    417   else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() ||
    418            In64BitMode)
    419     stackAlignment = 16;
    420 }
    421 
    422 void X86Subtarget::initializeEnvironment() {
    423   X86SSELevel = NoMMXSSE;
    424   X863DNowLevel = NoThreeDNow;
    425   HasCMov = false;
    426   HasX86_64 = false;
    427   HasPOPCNT = false;
    428   HasSSE4A = false;
    429   HasAES = false;
    430   HasPCLMUL = false;
    431   HasFMA = false;
    432   HasFMA4 = false;
    433   HasXOP = false;
    434   HasMOVBE = false;
    435   HasRDRAND = false;
    436   HasF16C = false;
    437   HasFSGSBase = false;
    438   HasLZCNT = false;
    439   HasBMI = false;
    440   HasBMI2 = false;
    441   HasRTM = false;
    442   HasADX = false;
    443   IsBTMemSlow = false;
    444   IsUAMemFast = false;
    445   HasVectorUAMem = false;
    446   HasCmpxchg16b = false;
    447   UseLeaForSP = false;
    448   HasSlowDivide = false;
    449   PostRAScheduler = false;
    450   PadShortFunctions = false;
    451   stackAlignment = 4;
    452   // FIXME: this is a known good value for Yonah. How about others?
    453   MaxInlineSizeThreshold = 128;
    454 }
    455 
    456 X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
    457                            const std::string &FS,
    458                            unsigned StackAlignOverride, bool is64Bit)
    459   : X86GenSubtargetInfo(TT, CPU, FS)
    460   , X86ProcFamily(Others)
    461   , PICStyle(PICStyles::None)
    462   , TargetTriple(TT)
    463   , StackAlignOverride(StackAlignOverride)
    464   , In64BitMode(is64Bit) {
    465   initializeEnvironment();
    466   resetSubtargetFeatures(CPU, FS);
    467 }
    468 
    469 bool X86Subtarget::enablePostRAScheduler(
    470            CodeGenOpt::Level OptLevel,
    471            TargetSubtargetInfo::AntiDepBreakMode& Mode,
    472            RegClassVector& CriticalPathRCs) const {
    473   Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
    474   CriticalPathRCs.clear();
    475   return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
    476 }
    477