1 //===-- X86Subtarget.h - Define Subtarget for the X86 ----------*- C++ -*--===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file declares the X86 specific subclass of TargetSubtargetInfo. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_X86_X86SUBTARGET_H 15 #define LLVM_LIB_TARGET_X86_X86SUBTARGET_H 16 17 #include "X86FrameLowering.h" 18 #include "X86ISelLowering.h" 19 #include "X86InstrInfo.h" 20 #include "X86SelectionDAGInfo.h" 21 #include "llvm/ADT/Triple.h" 22 #include "llvm/IR/CallingConv.h" 23 #include "llvm/Target/TargetSubtargetInfo.h" 24 #include <string> 25 26 #define GET_SUBTARGETINFO_HEADER 27 #include "X86GenSubtargetInfo.inc" 28 29 namespace llvm { 30 class GlobalValue; 31 class StringRef; 32 class TargetMachine; 33 34 /// The X86 backend supports a number of different styles of PIC. 35 /// 36 namespace PICStyles { 37 enum Style { 38 StubPIC, // Used on i386-darwin in pic mode. 39 GOT, // Used on 32 bit elf on when in pic mode. 40 RIPRel, // Used on X86-64 when in pic mode. 41 None // Set when not in pic mode. 42 }; 43 } 44 45 class X86Subtarget final : public X86GenSubtargetInfo { 46 47 protected: 48 enum X86SSEEnum { 49 NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F 50 }; 51 52 enum X863DNowEnum { 53 NoThreeDNow, MMX, ThreeDNow, ThreeDNowA 54 }; 55 56 enum X86ProcFamilyEnum { 57 Others, IntelAtom, IntelSLM 58 }; 59 60 /// X86 processor family: Intel Atom, and others 61 X86ProcFamilyEnum X86ProcFamily; 62 63 /// Which PIC style to use 64 PICStyles::Style PICStyle; 65 66 const TargetMachine &TM; 67 68 /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported. 69 X86SSEEnum X86SSELevel; 70 71 /// MMX, 3DNow, 3DNow Athlon, or none supported. 72 X863DNowEnum X863DNowLevel; 73 74 /// True if the processor supports X87 instructions. 75 bool HasX87; 76 77 /// True if this processor has conditional move instructions 78 /// (generally pentium pro+). 79 bool HasCMov; 80 81 /// True if the processor supports X86-64 instructions. 82 bool HasX86_64; 83 84 /// True if the processor supports POPCNT. 85 bool HasPOPCNT; 86 87 /// True if the processor supports SSE4A instructions. 88 bool HasSSE4A; 89 90 /// Target has AES instructions 91 bool HasAES; 92 93 /// Target has FXSAVE/FXRESTOR instructions 94 bool HasFXSR; 95 96 /// Target has XSAVE instructions 97 bool HasXSAVE; 98 /// Target has XSAVEOPT instructions 99 bool HasXSAVEOPT; 100 /// Target has XSAVEC instructions 101 bool HasXSAVEC; 102 /// Target has XSAVES instructions 103 bool HasXSAVES; 104 105 /// Target has carry-less multiplication 106 bool HasPCLMUL; 107 108 /// Target has 3-operand fused multiply-add 109 bool HasFMA; 110 111 /// Target has 4-operand fused multiply-add 112 bool HasFMA4; 113 114 /// Target has XOP instructions 115 bool HasXOP; 116 117 /// Target has TBM instructions. 118 bool HasTBM; 119 120 /// True if the processor has the MOVBE instruction. 121 bool HasMOVBE; 122 123 /// True if the processor has the RDRAND instruction. 124 bool HasRDRAND; 125 126 /// Processor has 16-bit floating point conversion instructions. 127 bool HasF16C; 128 129 /// Processor has FS/GS base insturctions. 130 bool HasFSGSBase; 131 132 /// Processor has LZCNT instruction. 133 bool HasLZCNT; 134 135 /// Processor has BMI1 instructions. 136 bool HasBMI; 137 138 /// Processor has BMI2 instructions. 139 bool HasBMI2; 140 141 /// Processor has VBMI instructions. 142 bool HasVBMI; 143 144 /// Processor has Integer Fused Multiply Add 145 bool HasIFMA; 146 147 /// Processor has RTM instructions. 148 bool HasRTM; 149 150 /// Processor has HLE. 151 bool HasHLE; 152 153 /// Processor has ADX instructions. 154 bool HasADX; 155 156 /// Processor has SHA instructions. 157 bool HasSHA; 158 159 /// Processor has PRFCHW instructions. 160 bool HasPRFCHW; 161 162 /// Processor has RDSEED instructions. 163 bool HasRDSEED; 164 165 /// Processor has LAHF/SAHF instructions. 166 bool HasLAHFSAHF; 167 168 /// Processor has MONITORX/MWAITX instructions. 169 bool HasMWAITX; 170 171 /// Processor has Prefetch with intent to Write instruction 172 bool HasPFPREFETCHWT1; 173 174 /// True if BT (bit test) of memory instructions are slow. 175 bool IsBTMemSlow; 176 177 /// True if SHLD instructions are slow. 178 bool IsSHLDSlow; 179 180 /// True if unaligned memory accesses of 16-bytes are slow. 181 bool IsUAMem16Slow; 182 183 /// True if unaligned memory accesses of 32-bytes are slow. 184 bool IsUAMem32Slow; 185 186 /// True if SSE operations can have unaligned memory operands. 187 /// This may require setting a configuration bit in the processor. 188 bool HasSSEUnalignedMem; 189 190 /// True if this processor has the CMPXCHG16B instruction; 191 /// this is true for most x86-64 chips, but not the first AMD chips. 192 bool HasCmpxchg16b; 193 194 /// True if the LEA instruction should be used for adjusting 195 /// the stack pointer. This is an optimization for Intel Atom processors. 196 bool UseLeaForSP; 197 198 /// True if there is no performance penalty to writing only the lower parts 199 /// of a YMM register without clearing the upper part. 200 bool HasFastPartialYMMWrite; 201 202 /// True if 8-bit divisions are significantly faster than 203 /// 32-bit divisions and should be used when possible. 204 bool HasSlowDivide32; 205 206 /// True if 16-bit divides are significantly faster than 207 /// 64-bit divisions and should be used when possible. 208 bool HasSlowDivide64; 209 210 /// True if the short functions should be padded to prevent 211 /// a stall when returning too early. 212 bool PadShortFunctions; 213 214 /// True if the Calls with memory reference should be converted 215 /// to a register-based indirect call. 216 bool CallRegIndirect; 217 218 /// True if the LEA instruction inputs have to be ready at address generation 219 /// (AG) time. 220 bool LEAUsesAG; 221 222 /// True if the LEA instruction with certain arguments is slow 223 bool SlowLEA; 224 225 /// True if INC and DEC instructions are slow when writing to flags 226 bool SlowIncDec; 227 228 /// Processor has AVX-512 PreFetch Instructions 229 bool HasPFI; 230 231 /// Processor has AVX-512 Exponential and Reciprocal Instructions 232 bool HasERI; 233 234 /// Processor has AVX-512 Conflict Detection Instructions 235 bool HasCDI; 236 237 /// Processor has AVX-512 Doubleword and Quadword instructions 238 bool HasDQI; 239 240 /// Processor has AVX-512 Byte and Word instructions 241 bool HasBWI; 242 243 /// Processor has AVX-512 Vector Length eXtenstions 244 bool HasVLX; 245 246 /// Processor has PKU extenstions 247 bool HasPKU; 248 249 /// Processor supports MPX - Memory Protection Extensions 250 bool HasMPX; 251 252 /// Processor supports Invalidate Process-Context Identifier 253 bool HasInvPCId; 254 255 /// Processor has VM Functions 256 bool HasVMFUNC; 257 258 /// Processor has Supervisor Mode Access Protection 259 bool HasSMAP; 260 261 /// Processor has Software Guard Extensions 262 bool HasSGX; 263 264 /// Processor supports Flush Cache Line instruction 265 bool HasCLFLUSHOPT; 266 267 /// Processor has Persistent Commit feature 268 bool HasPCOMMIT; 269 270 /// Processor supports Cache Line Write Back instruction 271 bool HasCLWB; 272 273 /// Use software floating point for code generation. 274 bool UseSoftFloat; 275 276 /// The minimum alignment known to hold of the stack frame on 277 /// entry to the function and which must be maintained by every function. 278 unsigned stackAlignment; 279 280 /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops. 281 /// 282 unsigned MaxInlineSizeThreshold; 283 284 /// What processor and OS we're targeting. 285 Triple TargetTriple; 286 287 /// Instruction itineraries for scheduling 288 InstrItineraryData InstrItins; 289 290 private: 291 292 /// Override the stack alignment. 293 unsigned StackAlignOverride; 294 295 /// True if compiling for 64-bit, false for 16-bit or 32-bit. 296 bool In64BitMode; 297 298 /// True if compiling for 32-bit, false for 16-bit or 64-bit. 299 bool In32BitMode; 300 301 /// True if compiling for 16-bit, false for 32-bit or 64-bit. 302 bool In16BitMode; 303 304 X86SelectionDAGInfo TSInfo; 305 // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which 306 // X86TargetLowering needs. 307 X86InstrInfo InstrInfo; 308 X86TargetLowering TLInfo; 309 X86FrameLowering FrameLowering; 310 311 public: 312 /// This constructor initializes the data members to match that 313 /// of the specified triple. 314 /// 315 X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, 316 const X86TargetMachine &TM, unsigned StackAlignOverride); 317 318 const X86TargetLowering *getTargetLowering() const override { 319 return &TLInfo; 320 } 321 const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; } 322 const X86FrameLowering *getFrameLowering() const override { 323 return &FrameLowering; 324 } 325 const X86SelectionDAGInfo *getSelectionDAGInfo() const override { 326 return &TSInfo; 327 } 328 const X86RegisterInfo *getRegisterInfo() const override { 329 return &getInstrInfo()->getRegisterInfo(); 330 } 331 332 /// Returns the minimum alignment known to hold of the 333 /// stack frame on entry to the function and which must be maintained by every 334 /// function for this subtarget. 335 unsigned getStackAlignment() const { return stackAlignment; } 336 337 /// Returns the maximum memset / memcpy size 338 /// that still makes it profitable to inline the call. 339 unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; } 340 341 /// ParseSubtargetFeatures - Parses features string setting specified 342 /// subtarget options. Definition of function is auto generated by tblgen. 343 void ParseSubtargetFeatures(StringRef CPU, StringRef FS); 344 345 private: 346 /// Initialize the full set of dependencies so we can use an initializer 347 /// list for X86Subtarget. 348 X86Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); 349 void initializeEnvironment(); 350 void initSubtargetFeatures(StringRef CPU, StringRef FS); 351 public: 352 /// Is this x86_64? (disregarding specific ABI / programming model) 353 bool is64Bit() const { 354 return In64BitMode; 355 } 356 357 bool is32Bit() const { 358 return In32BitMode; 359 } 360 361 bool is16Bit() const { 362 return In16BitMode; 363 } 364 365 /// Is this x86_64 with the ILP32 programming model (x32 ABI)? 366 bool isTarget64BitILP32() const { 367 return In64BitMode && (TargetTriple.getEnvironment() == Triple::GNUX32 || 368 TargetTriple.isOSNaCl()); 369 } 370 371 /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)? 372 bool isTarget64BitLP64() const { 373 return In64BitMode && (TargetTriple.getEnvironment() != Triple::GNUX32 && 374 !TargetTriple.isOSNaCl()); 375 } 376 377 PICStyles::Style getPICStyle() const { return PICStyle; } 378 void setPICStyle(PICStyles::Style Style) { PICStyle = Style; } 379 380 bool hasX87() const { return HasX87; } 381 bool hasCMov() const { return HasCMov; } 382 bool hasSSE1() const { return X86SSELevel >= SSE1; } 383 bool hasSSE2() const { return X86SSELevel >= SSE2; } 384 bool hasSSE3() const { return X86SSELevel >= SSE3; } 385 bool hasSSSE3() const { return X86SSELevel >= SSSE3; } 386 bool hasSSE41() const { return X86SSELevel >= SSE41; } 387 bool hasSSE42() const { return X86SSELevel >= SSE42; } 388 bool hasAVX() const { return X86SSELevel >= AVX; } 389 bool hasAVX2() const { return X86SSELevel >= AVX2; } 390 bool hasAVX512() const { return X86SSELevel >= AVX512F; } 391 bool hasFp256() const { return hasAVX(); } 392 bool hasInt256() const { return hasAVX2(); } 393 bool hasSSE4A() const { return HasSSE4A; } 394 bool hasMMX() const { return X863DNowLevel >= MMX; } 395 bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } 396 bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } 397 bool hasPOPCNT() const { return HasPOPCNT; } 398 bool hasAES() const { return HasAES; } 399 bool hasFXSR() const { return HasFXSR; } 400 bool hasXSAVE() const { return HasXSAVE; } 401 bool hasXSAVEOPT() const { return HasXSAVEOPT; } 402 bool hasXSAVEC() const { return HasXSAVEC; } 403 bool hasXSAVES() const { return HasXSAVES; } 404 bool hasPCLMUL() const { return HasPCLMUL; } 405 // Prefer FMA4 to FMA - its better for commutation/memory folding and 406 // has equal or better performance on all supported targets. 407 bool hasFMA() const { return HasFMA && !HasFMA4; } 408 bool hasFMA4() const { return HasFMA4; } 409 bool hasAnyFMA() const { return hasFMA() || hasFMA4() || hasAVX512(); } 410 bool hasXOP() const { return HasXOP; } 411 bool hasTBM() const { return HasTBM; } 412 bool hasMOVBE() const { return HasMOVBE; } 413 bool hasRDRAND() const { return HasRDRAND; } 414 bool hasF16C() const { return HasF16C; } 415 bool hasFSGSBase() const { return HasFSGSBase; } 416 bool hasLZCNT() const { return HasLZCNT; } 417 bool hasBMI() const { return HasBMI; } 418 bool hasBMI2() const { return HasBMI2; } 419 bool hasVBMI() const { return HasVBMI; } 420 bool hasIFMA() const { return HasIFMA; } 421 bool hasRTM() const { return HasRTM; } 422 bool hasHLE() const { return HasHLE; } 423 bool hasADX() const { return HasADX; } 424 bool hasSHA() const { return HasSHA; } 425 bool hasPRFCHW() const { return HasPRFCHW; } 426 bool hasRDSEED() const { return HasRDSEED; } 427 bool hasLAHFSAHF() const { return HasLAHFSAHF; } 428 bool hasMWAITX() const { return HasMWAITX; } 429 bool isBTMemSlow() const { return IsBTMemSlow; } 430 bool isSHLDSlow() const { return IsSHLDSlow; } 431 bool isUnalignedMem16Slow() const { return IsUAMem16Slow; } 432 bool isUnalignedMem32Slow() const { return IsUAMem32Slow; } 433 bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; } 434 bool hasCmpxchg16b() const { return HasCmpxchg16b; } 435 bool useLeaForSP() const { return UseLeaForSP; } 436 bool hasFastPartialYMMWrite() const { return HasFastPartialYMMWrite; } 437 bool hasSlowDivide32() const { return HasSlowDivide32; } 438 bool hasSlowDivide64() const { return HasSlowDivide64; } 439 bool padShortFunctions() const { return PadShortFunctions; } 440 bool callRegIndirect() const { return CallRegIndirect; } 441 bool LEAusesAG() const { return LEAUsesAG; } 442 bool slowLEA() const { return SlowLEA; } 443 bool slowIncDec() const { return SlowIncDec; } 444 bool hasCDI() const { return HasCDI; } 445 bool hasPFI() const { return HasPFI; } 446 bool hasERI() const { return HasERI; } 447 bool hasDQI() const { return HasDQI; } 448 bool hasBWI() const { return HasBWI; } 449 bool hasVLX() const { return HasVLX; } 450 bool hasPKU() const { return HasPKU; } 451 bool hasMPX() const { return HasMPX; } 452 453 bool isAtom() const { return X86ProcFamily == IntelAtom; } 454 bool isSLM() const { return X86ProcFamily == IntelSLM; } 455 bool useSoftFloat() const { return UseSoftFloat; } 456 457 /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for 458 /// no-sse2). There isn't any reason to disable it if the target processor 459 /// supports it. 460 bool hasMFence() const { return hasSSE2() || is64Bit(); } 461 462 const Triple &getTargetTriple() const { return TargetTriple; } 463 464 bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } 465 bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); } 466 bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); } 467 bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); } 468 bool isTargetPS4() const { return TargetTriple.isPS4(); } 469 470 bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } 471 bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } 472 bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } 473 474 bool isTargetLinux() const { return TargetTriple.isOSLinux(); } 475 bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); } 476 bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); } 477 bool isTargetAndroid() const { return TargetTriple.isAndroid(); } 478 bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } 479 bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); } 480 bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); } 481 bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); } 482 483 bool isTargetWindowsMSVC() const { 484 return TargetTriple.isWindowsMSVCEnvironment(); 485 } 486 487 bool isTargetKnownWindowsMSVC() const { 488 return TargetTriple.isKnownWindowsMSVCEnvironment(); 489 } 490 491 bool isTargetWindowsCoreCLR() const { 492 return TargetTriple.isWindowsCoreCLREnvironment(); 493 } 494 495 bool isTargetWindowsCygwin() const { 496 return TargetTriple.isWindowsCygwinEnvironment(); 497 } 498 499 bool isTargetWindowsGNU() const { 500 return TargetTriple.isWindowsGNUEnvironment(); 501 } 502 503 bool isTargetWindowsItanium() const { 504 return TargetTriple.isWindowsItaniumEnvironment(); 505 } 506 507 bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); } 508 509 bool isOSWindows() const { return TargetTriple.isOSWindows(); } 510 511 bool isTargetWin64() const { 512 return In64BitMode && TargetTriple.isOSWindows(); 513 } 514 515 bool isTargetWin32() const { 516 return !In64BitMode && (isTargetCygMing() || isTargetKnownWindowsMSVC()); 517 } 518 519 bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; } 520 bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; } 521 522 bool isPICStyleStubPIC() const { 523 return PICStyle == PICStyles::StubPIC; 524 } 525 526 bool isPositionIndependent() const { return TM.isPositionIndependent(); } 527 528 bool isCallingConvWin64(CallingConv::ID CC) const { 529 switch (CC) { 530 // On Win64, all these conventions just use the default convention. 531 case CallingConv::C: 532 case CallingConv::Fast: 533 case CallingConv::X86_FastCall: 534 case CallingConv::X86_StdCall: 535 case CallingConv::X86_ThisCall: 536 case CallingConv::X86_VectorCall: 537 case CallingConv::Intel_OCL_BI: 538 return isTargetWin64(); 539 // This convention allows using the Win64 convention on other targets. 540 case CallingConv::X86_64_Win64: 541 return true; 542 // This convention allows using the SysV convention on Windows targets. 543 case CallingConv::X86_64_SysV: 544 return false; 545 // Otherwise, who knows what this is. 546 default: 547 return false; 548 } 549 } 550 551 /// Classify a global variable reference for the current subtarget according 552 /// to how we should reference it in a non-pcrel context. 553 unsigned char classifyLocalReference(const GlobalValue *GV) const; 554 555 unsigned char classifyGlobalReference(const GlobalValue *GV, 556 const Module &M) const; 557 unsigned char classifyGlobalReference(const GlobalValue *GV) const; 558 559 /// Classify a global function reference for the current subtarget. 560 unsigned char classifyGlobalFunctionReference(const GlobalValue *GV, 561 const Module &M) const; 562 unsigned char classifyGlobalFunctionReference(const GlobalValue *GV) const; 563 564 /// Classify a blockaddress reference for the current subtarget according to 565 /// how we should reference it in a non-pcrel context. 566 unsigned char classifyBlockAddressReference() const; 567 568 /// Return true if the subtarget allows calls to immediate address. 569 bool isLegalToCallImmediateAddr() const; 570 571 /// This function returns the name of a function which has an interface 572 /// like the non-standard bzero function, if such a function exists on 573 /// the current subtarget and it is considered prefereable over 574 /// memset with zero passed as the second argument. Otherwise it 575 /// returns null. 576 const char *getBZeroEntry() const; 577 578 /// This function returns true if the target has sincos() routine in its 579 /// compiler runtime or math libraries. 580 bool hasSinCos() const; 581 582 /// Enable the MachineScheduler pass for all X86 subtargets. 583 bool enableMachineScheduler() const override { return true; } 584 585 bool enableEarlyIfConversion() const override; 586 587 /// Return the instruction itineraries based on the subtarget selection. 588 const InstrItineraryData *getInstrItineraryData() const override { 589 return &InstrItins; 590 } 591 592 AntiDepBreakMode getAntiDepBreakMode() const override { 593 return TargetSubtargetInfo::ANTIDEP_CRITICAL; 594 } 595 }; 596 597 } // End llvm namespace 598 599 #endif 600