1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the interfaces that X86 uses to lower LLVM code into a 11 // selection DAG. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 16 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 17 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/SelectionDAG.h" 20 #include "llvm/CodeGen/TargetLowering.h" 21 #include "llvm/Target/TargetOptions.h" 22 23 namespace llvm { 24 class X86Subtarget; 25 class X86TargetMachine; 26 27 namespace X86ISD { 28 // X86 Specific DAG Nodes 29 enum NodeType : unsigned { 30 // Start the numbering where the builtin ops leave off. 31 FIRST_NUMBER = ISD::BUILTIN_OP_END, 32 33 /// Bit scan forward. 34 BSF, 35 /// Bit scan reverse. 36 BSR, 37 38 /// Double shift instructions. These correspond to 39 /// X86::SHLDxx and X86::SHRDxx instructions. 40 SHLD, 41 SHRD, 42 43 /// Bitwise logical AND of floating point values. This corresponds 44 /// to X86::ANDPS or X86::ANDPD. 45 FAND, 46 47 /// Bitwise logical OR of floating point values. This corresponds 48 /// to X86::ORPS or X86::ORPD. 49 FOR, 50 51 /// Bitwise logical XOR of floating point values. This corresponds 52 /// to X86::XORPS or X86::XORPD. 53 FXOR, 54 55 /// Bitwise logical ANDNOT of floating point values. This 56 /// corresponds to X86::ANDNPS or X86::ANDNPD. 57 FANDN, 58 59 /// These operations represent an abstract X86 call 60 /// instruction, which includes a bunch of information. In particular the 61 /// operands of these node are: 62 /// 63 /// #0 - The incoming token chain 64 /// #1 - The callee 65 /// #2 - The number of arg bytes the caller pushes on the stack. 66 /// #3 - The number of arg bytes the callee pops off the stack. 67 /// #4 - The value to pass in AL/AX/EAX (optional) 68 /// #5 - The value to pass in DL/DX/EDX (optional) 69 /// 70 /// The result values of these nodes are: 71 /// 72 /// #0 - The outgoing token chain 73 /// #1 - The first register result value (optional) 74 /// #2 - The second register result value (optional) 75 /// 76 CALL, 77 78 /// Same as call except it adds the NoTrack prefix. 79 NT_CALL, 80 81 /// This operation implements the lowering for readcyclecounter. 82 RDTSC_DAG, 83 84 /// X86 Read Time-Stamp Counter and Processor ID. 85 RDTSCP_DAG, 86 87 /// X86 Read Performance Monitoring Counters. 88 RDPMC_DAG, 89 90 /// X86 compare and logical compare instructions. 91 CMP, COMI, UCOMI, 92 93 /// X86 bit-test instructions. 94 BT, 95 96 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS 97 /// operand, usually produced by a CMP instruction. 98 SETCC, 99 100 /// X86 Select 101 SELECT, SELECTS, 102 103 // Same as SETCC except it's materialized with a sbb and the value is all 104 // one's or all zero's. 105 SETCC_CARRY, // R = carry_bit ? ~0 : 0 106 107 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. 108 /// Operands are two FP values to compare; result is a mask of 109 /// 0s or 1s. Generally DTRT for C/C++ with NaNs. 110 FSETCC, 111 112 /// X86 FP SETCC, similar to above, but with output as an i1 mask and 113 /// with optional rounding mode. 114 FSETCCM, FSETCCM_RND, 115 116 /// X86 conditional moves. Operand 0 and operand 1 are the two values 117 /// to select from. Operand 2 is the condition code, and operand 3 is the 118 /// flag operand produced by a CMP or TEST instruction. It also writes a 119 /// flag result. 120 CMOV, 121 122 /// X86 conditional branches. Operand 0 is the chain operand, operand 1 123 /// is the block to branch if condition is true, operand 2 is the 124 /// condition code, and operand 3 is the flag operand produced by a CMP 125 /// or TEST instruction. 126 BRCOND, 127 128 /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and 129 /// operand 1 is the target address. 130 NT_BRIND, 131 132 /// Return with a flag operand. Operand 0 is the chain operand, operand 133 /// 1 is the number of bytes of stack to pop. 134 RET_FLAG, 135 136 /// Return from interrupt. Operand 0 is the number of bytes to pop. 137 IRET, 138 139 /// Repeat fill, corresponds to X86::REP_STOSx. 140 REP_STOS, 141 142 /// Repeat move, corresponds to X86::REP_MOVSx. 143 REP_MOVS, 144 145 /// On Darwin, this node represents the result of the popl 146 /// at function entry, used for PIC code. 147 GlobalBaseReg, 148 149 /// A wrapper node for TargetConstantPool, TargetJumpTable, 150 /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, 151 /// MCSymbol and TargetBlockAddress. 152 Wrapper, 153 154 /// Special wrapper used under X86-64 PIC mode for RIP 155 /// relative displacements. 156 WrapperRIP, 157 158 /// Copies a 64-bit value from the low word of an XMM vector 159 /// to an MMX vector. 160 MOVDQ2Q, 161 162 /// Copies a 32-bit value from the low word of a MMX 163 /// vector to a GPR. 164 MMX_MOVD2W, 165 166 /// Copies a GPR into the low 32-bit word of a MMX vector 167 /// and zero out the high word. 168 MMX_MOVW2D, 169 170 /// Extract an 8-bit value from a vector and zero extend it to 171 /// i32, corresponds to X86::PEXTRB. 172 PEXTRB, 173 174 /// Extract a 16-bit value from a vector and zero extend it to 175 /// i32, corresponds to X86::PEXTRW. 176 PEXTRW, 177 178 /// Insert any element of a 4 x float vector into any element 179 /// of a destination 4 x floatvector. 180 INSERTPS, 181 182 /// Insert the lower 8-bits of a 32-bit value to a vector, 183 /// corresponds to X86::PINSRB. 184 PINSRB, 185 186 /// Insert the lower 16-bits of a 32-bit value to a vector, 187 /// corresponds to X86::PINSRW. 188 PINSRW, 189 190 /// Shuffle 16 8-bit values within a vector. 191 PSHUFB, 192 193 /// Compute Sum of Absolute Differences. 194 PSADBW, 195 /// Compute Double Block Packed Sum-Absolute-Differences 196 DBPSADBW, 197 198 /// Bitwise Logical AND NOT of Packed FP values. 199 ANDNP, 200 201 /// Blend where the selector is an immediate. 202 BLENDI, 203 204 /// Dynamic (non-constant condition) vector blend where only the sign bits 205 /// of the condition elements are used. This is used to enforce that the 206 /// condition mask is not valid for generic VSELECT optimizations. 207 SHRUNKBLEND, 208 209 /// Combined add and sub on an FP vector. 210 ADDSUB, 211 212 // FP vector ops with rounding mode. 213 FADD_RND, FADDS_RND, 214 FSUB_RND, FSUBS_RND, 215 FMUL_RND, FMULS_RND, 216 FDIV_RND, FDIVS_RND, 217 FMAX_RND, FMAXS_RND, 218 FMIN_RND, FMINS_RND, 219 FSQRT_RND, FSQRTS_RND, 220 221 // FP vector get exponent. 222 FGETEXP_RND, FGETEXPS_RND, 223 // Extract Normalized Mantissas. 224 VGETMANT, VGETMANT_RND, VGETMANTS, VGETMANTS_RND, 225 // FP Scale. 226 SCALEF, 227 SCALEFS, 228 229 // Integer add/sub with unsigned saturation. 230 ADDUS, 231 SUBUS, 232 233 // Integer add/sub with signed saturation. 234 ADDS, 235 SUBS, 236 237 // Unsigned Integer average. 238 AVG, 239 240 /// Integer horizontal add/sub. 241 HADD, 242 HSUB, 243 244 /// Floating point horizontal add/sub. 245 FHADD, 246 FHSUB, 247 248 // Detect Conflicts Within a Vector 249 CONFLICT, 250 251 /// Floating point max and min. 252 FMAX, FMIN, 253 254 /// Commutative FMIN and FMAX. 255 FMAXC, FMINC, 256 257 /// Scalar intrinsic floating point max and min. 258 FMAXS, FMINS, 259 260 /// Floating point reciprocal-sqrt and reciprocal approximation. 261 /// Note that these typically require refinement 262 /// in order to obtain suitable precision. 263 FRSQRT, FRCP, 264 265 // AVX-512 reciprocal approximations with a little more precision. 266 RSQRT14, RSQRT14S, RCP14, RCP14S, 267 268 // Thread Local Storage. 269 TLSADDR, 270 271 // Thread Local Storage. A call to get the start address 272 // of the TLS block for the current module. 273 TLSBASEADDR, 274 275 // Thread Local Storage. When calling to an OS provided 276 // thunk at the address from an earlier relocation. 277 TLSCALL, 278 279 // Exception Handling helpers. 280 EH_RETURN, 281 282 // SjLj exception handling setjmp. 283 EH_SJLJ_SETJMP, 284 285 // SjLj exception handling longjmp. 286 EH_SJLJ_LONGJMP, 287 288 // SjLj exception handling dispatch. 289 EH_SJLJ_SETUP_DISPATCH, 290 291 /// Tail call return. See X86TargetLowering::LowerCall for 292 /// the list of operands. 293 TC_RETURN, 294 295 // Vector move to low scalar and zero higher vector elements. 296 VZEXT_MOVL, 297 298 // Vector integer zero-extend. 299 VZEXT, 300 // Vector integer signed-extend. 301 VSEXT, 302 303 // Vector integer truncate. 304 VTRUNC, 305 // Vector integer truncate with unsigned/signed saturation. 306 VTRUNCUS, VTRUNCS, 307 308 // Vector FP extend. 309 VFPEXT, VFPEXT_RND, VFPEXTS_RND, 310 311 // Vector FP round. 312 VFPROUND, VFPROUND_RND, VFPROUNDS_RND, 313 314 // 128-bit vector logical left / right shift 315 VSHLDQ, VSRLDQ, 316 317 // Vector shift elements 318 VSHL, VSRL, VSRA, 319 320 // Vector variable shift right arithmetic. 321 // Unlike ISD::SRA, in case shift count greater then element size 322 // use sign bit to fill destination data element. 323 VSRAV, 324 325 // Vector shift elements by immediate 326 VSHLI, VSRLI, VSRAI, 327 328 // Shifts of mask registers. 329 KSHIFTL, KSHIFTR, 330 331 // Bit rotate by immediate 332 VROTLI, VROTRI, 333 334 // Vector packed double/float comparison. 335 CMPP, 336 337 // Vector integer comparisons. 338 PCMPEQ, PCMPGT, 339 340 // v8i16 Horizontal minimum and position. 341 PHMINPOS, 342 343 MULTISHIFT, 344 345 /// Vector comparison generating mask bits for fp and 346 /// integer signed and unsigned data types. 347 CMPM, 348 // Vector comparison with rounding mode for FP values 349 CMPM_RND, 350 351 // Arithmetic operations with FLAGS results. 352 ADD, SUB, ADC, SBB, SMUL, 353 INC, DEC, OR, XOR, AND, 354 355 // Bit field extract. 356 BEXTR, 357 358 // LOW, HI, FLAGS = umul LHS, RHS. 359 UMUL, 360 361 // 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS. 362 SMUL8, UMUL8, 363 364 // 8-bit divrem that zero-extend the high result (AH). 365 UDIVREM8_ZEXT_HREG, 366 SDIVREM8_SEXT_HREG, 367 368 // X86-specific multiply by immediate. 369 MUL_IMM, 370 371 // Vector sign bit extraction. 372 MOVMSK, 373 374 // Vector bitwise comparisons. 375 PTEST, 376 377 // Vector packed fp sign bitwise comparisons. 378 TESTP, 379 380 // OR/AND test for masks. 381 KORTEST, 382 KTEST, 383 384 // ADD for masks. 385 KADD, 386 387 // Several flavors of instructions with vector shuffle behaviors. 388 // Saturated signed/unnsigned packing. 389 PACKSS, 390 PACKUS, 391 // Intra-lane alignr. 392 PALIGNR, 393 // AVX512 inter-lane alignr. 394 VALIGN, 395 PSHUFD, 396 PSHUFHW, 397 PSHUFLW, 398 SHUFP, 399 // VBMI2 Concat & Shift. 400 VSHLD, 401 VSHRD, 402 VSHLDV, 403 VSHRDV, 404 //Shuffle Packed Values at 128-bit granularity. 405 SHUF128, 406 MOVDDUP, 407 MOVSHDUP, 408 MOVSLDUP, 409 MOVLHPS, 410 MOVHLPS, 411 MOVSD, 412 MOVSS, 413 UNPCKL, 414 UNPCKH, 415 VPERMILPV, 416 VPERMILPI, 417 VPERMI, 418 VPERM2X128, 419 420 // Variable Permute (VPERM). 421 // Res = VPERMV MaskV, V0 422 VPERMV, 423 424 // 3-op Variable Permute (VPERMT2). 425 // Res = VPERMV3 V0, MaskV, V1 426 VPERMV3, 427 428 // Bitwise ternary logic. 429 VPTERNLOG, 430 // Fix Up Special Packed Float32/64 values. 431 VFIXUPIMM, 432 VFIXUPIMMS, 433 // Range Restriction Calculation For Packed Pairs of Float32/64 values. 434 VRANGE, VRANGE_RND, VRANGES, VRANGES_RND, 435 // Reduce - Perform Reduction Transformation on scalar\packed FP. 436 VREDUCE, VREDUCE_RND, VREDUCES, VREDUCES_RND, 437 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. 438 // Also used by the legacy (V)ROUND intrinsics where we mask out the 439 // scaling part of the immediate. 440 VRNDSCALE, VRNDSCALE_RND, VRNDSCALES, VRNDSCALES_RND, 441 // Tests Types Of a FP Values for packed types. 442 VFPCLASS, 443 // Tests Types Of a FP Values for scalar types. 444 VFPCLASSS, 445 446 // Broadcast scalar to vector. 447 VBROADCAST, 448 // Broadcast mask to vector. 449 VBROADCASTM, 450 // Broadcast subvector to vector. 451 SUBV_BROADCAST, 452 453 /// SSE4A Extraction and Insertion. 454 EXTRQI, INSERTQI, 455 456 // XOP arithmetic/logical shifts. 457 VPSHA, VPSHL, 458 // XOP signed/unsigned integer comparisons. 459 VPCOM, VPCOMU, 460 // XOP packed permute bytes. 461 VPPERM, 462 // XOP two source permutation. 463 VPERMIL2, 464 465 // Vector multiply packed unsigned doubleword integers. 466 PMULUDQ, 467 // Vector multiply packed signed doubleword integers. 468 PMULDQ, 469 // Vector Multiply Packed UnsignedIntegers with Round and Scale. 470 MULHRS, 471 472 // Multiply and Add Packed Integers. 473 VPMADDUBSW, VPMADDWD, 474 475 // AVX512IFMA multiply and add. 476 // NOTE: These are different than the instruction and perform 477 // op0 x op1 + op2. 478 VPMADD52L, VPMADD52H, 479 480 // VNNI 481 VPDPBUSD, 482 VPDPBUSDS, 483 VPDPWSSD, 484 VPDPWSSDS, 485 486 // FMA nodes. 487 // We use the target independent ISD::FMA for the non-inverted case. 488 FNMADD, 489 FMSUB, 490 FNMSUB, 491 FMADDSUB, 492 FMSUBADD, 493 494 // FMA with rounding mode. 495 FMADD_RND, 496 FNMADD_RND, 497 FMSUB_RND, 498 FNMSUB_RND, 499 FMADDSUB_RND, 500 FMSUBADD_RND, 501 502 // Compress and expand. 503 COMPRESS, 504 EXPAND, 505 506 // Bits shuffle 507 VPSHUFBITQMB, 508 509 // Convert Unsigned/Integer to Floating-Point Value with rounding mode. 510 SINT_TO_FP_RND, UINT_TO_FP_RND, 511 SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND, 512 513 // Vector float/double to signed/unsigned integer. 514 CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND, 515 // Scalar float/double to signed/unsigned integer. 516 CVTS2SI_RND, CVTS2UI_RND, 517 518 // Vector float/double to signed/unsigned integer with truncation. 519 CVTTP2SI, CVTTP2UI, CVTTP2SI_RND, CVTTP2UI_RND, 520 // Scalar float/double to signed/unsigned integer with truncation. 521 CVTTS2SI_RND, CVTTS2UI_RND, 522 523 // Vector signed/unsigned integer to float/double. 524 CVTSI2P, CVTUI2P, 525 526 // Save xmm argument registers to the stack, according to %al. An operator 527 // is needed so that this can be expanded with control flow. 528 VASTART_SAVE_XMM_REGS, 529 530 // Windows's _chkstk call to do stack probing. 531 WIN_ALLOCA, 532 533 // For allocating variable amounts of stack space when using 534 // segmented stacks. Check if the current stacklet has enough space, and 535 // falls back to heap allocation if not. 536 SEG_ALLOCA, 537 538 // Memory barriers. 539 MEMBARRIER, 540 MFENCE, 541 542 // Store FP status word into i16 register. 543 FNSTSW16r, 544 545 // Store contents of %ah into %eflags. 546 SAHF, 547 548 // Get a random integer and indicate whether it is valid in CF. 549 RDRAND, 550 551 // Get a NIST SP800-90B & C compliant random integer and 552 // indicate whether it is valid in CF. 553 RDSEED, 554 555 // SSE42 string comparisons. 556 // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG 557 // will emit one or two instructions based on which results are used. If 558 // flags and index/mask this allows us to use a single instruction since 559 // we won't have to pick and opcode for flags. Instead we can rely on the 560 // DAG to CSE everything and decide at isel. 561 PCMPISTR, 562 PCMPESTR, 563 564 // Test if in transactional execution. 565 XTEST, 566 567 // ERI instructions. 568 RSQRT28, RSQRT28S, RCP28, RCP28S, EXP2, 569 570 // Conversions between float and half-float. 571 CVTPS2PH, CVTPH2PS, CVTPH2PS_RND, 572 573 // Galois Field Arithmetic Instructions 574 GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB, 575 576 // LWP insert record. 577 LWPINS, 578 579 // User level wait 580 UMWAIT, TPAUSE, 581 582 // Compare and swap. 583 LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, 584 LCMPXCHG8_DAG, 585 LCMPXCHG16_DAG, 586 LCMPXCHG8_SAVE_EBX_DAG, 587 LCMPXCHG16_SAVE_RBX_DAG, 588 589 /// LOCK-prefixed arithmetic read-modify-write instructions. 590 /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS) 591 LADD, LSUB, LOR, LXOR, LAND, LINC, LDEC, 592 593 // Load, scalar_to_vector, and zero extend. 594 VZEXT_LOAD, 595 596 // Store FP control world into i16 memory. 597 FNSTCW16m, 598 599 /// This instruction implements FP_TO_SINT with the 600 /// integer destination in memory and a FP reg source. This corresponds 601 /// to the X86::FIST*m instructions and the rounding mode change stuff. It 602 /// has two inputs (token chain and address) and two outputs (int value 603 /// and token chain). 604 FP_TO_INT16_IN_MEM, 605 FP_TO_INT32_IN_MEM, 606 FP_TO_INT64_IN_MEM, 607 608 /// This instruction implements SINT_TO_FP with the 609 /// integer source in memory and FP reg result. This corresponds to the 610 /// X86::FILD*m instructions. It has three inputs (token chain, address, 611 /// and source type) and two outputs (FP value and token chain). FILD_FLAG 612 /// also produces a flag). 613 FILD, 614 FILD_FLAG, 615 616 /// This instruction implements an extending load to FP stack slots. 617 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain 618 /// operand, ptr to load from, and a ValueType node indicating the type 619 /// to load to. 620 FLD, 621 622 /// This instruction implements a truncating store to FP stack 623 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a 624 /// chain operand, value to store, address, and a ValueType to store it 625 /// as. 626 FST, 627 628 /// This instruction grabs the address of the next argument 629 /// from a va_list. (reads and modifies the va_list in memory) 630 VAARG_64, 631 632 // Vector truncating store with unsigned/signed saturation 633 VTRUNCSTOREUS, VTRUNCSTORES, 634 // Vector truncating masked store with unsigned/signed saturation 635 VMTRUNCSTOREUS, VMTRUNCSTORES, 636 637 // X86 specific gather and scatter 638 MGATHER, MSCATTER, 639 640 // WARNING: Do not add anything in the end unless you want the node to 641 // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all 642 // opcodes will be thought as target memory ops! 643 }; 644 } // end namespace X86ISD 645 646 /// Define some predicates that are used for node matching. 647 namespace X86 { 648 /// Returns true if Elt is a constant zero or floating point constant +0.0. 649 bool isZeroNode(SDValue Elt); 650 651 /// Returns true of the given offset can be 652 /// fit into displacement field of the instruction. 653 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, 654 bool hasSymbolicDisplacement = true); 655 656 /// Determines whether the callee is required to pop its 657 /// own arguments. Callee pop is necessary to support tail calls. 658 bool isCalleePop(CallingConv::ID CallingConv, 659 bool is64Bit, bool IsVarArg, bool GuaranteeTCO); 660 661 } // end namespace X86 662 663 //===--------------------------------------------------------------------===// 664 // X86 Implementation of the TargetLowering interface 665 class X86TargetLowering final : public TargetLowering { 666 public: 667 explicit X86TargetLowering(const X86TargetMachine &TM, 668 const X86Subtarget &STI); 669 670 unsigned getJumpTableEncoding() const override; 671 bool useSoftFloat() const override; 672 673 void markLibCallAttributes(MachineFunction *MF, unsigned CC, 674 ArgListTy &Args) const override; 675 676 MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override { 677 return MVT::i8; 678 } 679 680 const MCExpr * 681 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 682 const MachineBasicBlock *MBB, unsigned uid, 683 MCContext &Ctx) const override; 684 685 /// Returns relocation base for the given PIC jumptable. 686 SDValue getPICJumpTableRelocBase(SDValue Table, 687 SelectionDAG &DAG) const override; 688 const MCExpr * 689 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 690 unsigned JTI, MCContext &Ctx) const override; 691 692 /// Return the desired alignment for ByVal aggregate 693 /// function arguments in the caller parameter area. For X86, aggregates 694 /// that contains are placed at 16-byte boundaries while the rest are at 695 /// 4-byte boundaries. 696 unsigned getByValTypeAlignment(Type *Ty, 697 const DataLayout &DL) const override; 698 699 /// Returns the target specific optimal type for load 700 /// and store operations as a result of memset, memcpy, and memmove 701 /// lowering. If DstAlign is zero that means it's safe to destination 702 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 703 /// means there isn't a need to check it against alignment requirement, 704 /// probably because the source does not need to be loaded. If 'IsMemset' is 705 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 706 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 707 /// source is constant so it does not need to be loaded. 708 /// It returns EVT::Other if the type should be determined using generic 709 /// target-independent logic. 710 EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, 711 bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, 712 MachineFunction &MF) const override; 713 714 /// Returns true if it's safe to use load / store of the 715 /// specified type to expand memcpy / memset inline. This is mostly true 716 /// for all types except for some special cases. For example, on X86 717 /// targets without SSE2 f64 load / store are done with fldl / fstpl which 718 /// also does type conversion. Note the specified type doesn't have to be 719 /// legal as the hook is used before type legalization. 720 bool isSafeMemOpType(MVT VT) const override; 721 722 /// Returns true if the target allows unaligned memory accesses of the 723 /// specified type. Returns whether it is "fast" in the last argument. 724 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, 725 bool *Fast) const override; 726 727 /// Provide custom lowering hooks for some operations. 728 /// 729 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 730 731 /// Places new result values for the node in Results (their number 732 /// and types must exactly match those of the original return values of 733 /// the node), or leaves Results empty, which indicates that the node is not 734 /// to be custom lowered after all. 735 void LowerOperationWrapper(SDNode *N, 736 SmallVectorImpl<SDValue> &Results, 737 SelectionDAG &DAG) const override; 738 739 /// Replace the results of node with an illegal result 740 /// type with new values built out of custom code. 741 /// 742 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, 743 SelectionDAG &DAG) const override; 744 745 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 746 747 // Return true if it is profitable to combine a BUILD_VECTOR with a 748 // stride-pattern to a shuffle and a truncate. 749 // Example of such a combine: 750 // v4i32 build_vector((extract_elt V, 1), 751 // (extract_elt V, 3), 752 // (extract_elt V, 5), 753 // (extract_elt V, 7)) 754 // --> 755 // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to 756 // v4i64) 757 bool isDesirableToCombineBuildVectorToShuffleTruncate( 758 ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override; 759 760 /// Return true if the target has native support for 761 /// the specified value type and it is 'desirable' to use the type for the 762 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 763 /// instruction encodings are longer and some i16 instructions are slow. 764 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override; 765 766 /// Return true if the target has native support for the 767 /// specified value type and it is 'desirable' to use the type. e.g. On x86 768 /// i16 is legal, but undesirable since i16 instruction encodings are longer 769 /// and some i16 instructions are slow. 770 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; 771 772 MachineBasicBlock * 773 EmitInstrWithCustomInserter(MachineInstr &MI, 774 MachineBasicBlock *MBB) const override; 775 776 /// This method returns the name of a target specific DAG node. 777 const char *getTargetNodeName(unsigned Opcode) const override; 778 779 bool mergeStoresAfterLegalization() const override { return true; } 780 781 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, 782 const SelectionDAG &DAG) const override; 783 784 bool isCheapToSpeculateCttz() const override; 785 786 bool isCheapToSpeculateCtlz() const override; 787 788 bool isCtlzFast() const override; 789 790 bool hasBitPreservingFPLogic(EVT VT) const override { 791 return VT == MVT::f32 || VT == MVT::f64 || VT.isVector(); 792 } 793 794 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override { 795 // If the pair to store is a mixture of float and int values, we will 796 // save two bitwise instructions and one float-to-int instruction and 797 // increase one store instruction. There is potentially a more 798 // significant benefit because it avoids the float->int domain switch 799 // for input value. So It is more likely a win. 800 if ((LTy.isFloatingPoint() && HTy.isInteger()) || 801 (LTy.isInteger() && HTy.isFloatingPoint())) 802 return true; 803 // If the pair only contains int values, we will save two bitwise 804 // instructions and increase one store instruction (costing one more 805 // store buffer). Since the benefit is more blurred so we leave 806 // such pair out until we get testcase to prove it is a win. 807 return false; 808 } 809 810 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 811 812 bool hasAndNotCompare(SDValue Y) const override; 813 814 bool hasAndNot(SDValue Y) const override; 815 816 bool preferShiftsToClearExtremeBits(SDValue Y) const override; 817 818 bool 819 shouldTransformSignedTruncationCheck(EVT XVT, 820 unsigned KeptBits) const override { 821 // For vectors, we don't have a preference.. 822 if (XVT.isVector()) 823 return false; 824 825 auto VTIsOk = [](EVT VT) -> bool { 826 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || 827 VT == MVT::i64; 828 }; 829 830 // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports. 831 // XVT will be larger than KeptBitsVT. 832 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); 833 return VTIsOk(XVT) && VTIsOk(KeptBitsVT); 834 } 835 836 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { 837 return VT.isScalarInteger(); 838 } 839 840 /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST. 841 MVT hasFastEqualityCompare(unsigned NumBits) const override; 842 843 /// Allow multiple load pairs per block for smaller and faster code. 844 unsigned getMemcmpEqZeroLoadsPerBlock() const override { 845 return 2; 846 } 847 848 /// Return the value type to use for ISD::SETCC. 849 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 850 EVT VT) const override; 851 852 bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, 853 TargetLoweringOpt &TLO) const override; 854 855 /// Determine which of the bits specified in Mask are known to be either 856 /// zero or one and return them in the KnownZero/KnownOne bitsets. 857 void computeKnownBitsForTargetNode(const SDValue Op, 858 KnownBits &Known, 859 const APInt &DemandedElts, 860 const SelectionDAG &DAG, 861 unsigned Depth = 0) const override; 862 863 /// Determine the number of bits in the operation that are sign bits. 864 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 865 const APInt &DemandedElts, 866 const SelectionDAG &DAG, 867 unsigned Depth) const override; 868 869 SDValue unwrapAddress(SDValue N) const override; 870 871 bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA, 872 int64_t &Offset) const override; 873 874 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; 875 876 bool ExpandInlineAsm(CallInst *CI) const override; 877 878 ConstraintType getConstraintType(StringRef Constraint) const override; 879 880 /// Examine constraint string and operand type and determine a weight value. 881 /// The operand object must already have been set up with the operand type. 882 ConstraintWeight 883 getSingleConstraintMatchWeight(AsmOperandInfo &info, 884 const char *constraint) const override; 885 886 const char *LowerXConstraint(EVT ConstraintVT) const override; 887 888 /// Lower the specified operand into the Ops vector. If it is invalid, don't 889 /// add anything to Ops. If hasMemory is true it means one of the asm 890 /// constraint of the inline asm instruction being processed is 'm'. 891 void LowerAsmOperandForConstraint(SDValue Op, 892 std::string &Constraint, 893 std::vector<SDValue> &Ops, 894 SelectionDAG &DAG) const override; 895 896 unsigned 897 getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 898 if (ConstraintCode == "i") 899 return InlineAsm::Constraint_i; 900 else if (ConstraintCode == "o") 901 return InlineAsm::Constraint_o; 902 else if (ConstraintCode == "v") 903 return InlineAsm::Constraint_v; 904 else if (ConstraintCode == "X") 905 return InlineAsm::Constraint_X; 906 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 907 } 908 909 /// Given a physical register constraint 910 /// (e.g. {edx}), return the register number and the register class for the 911 /// register. This should only be used for C_Register constraints. On 912 /// error, this returns a register number of 0. 913 std::pair<unsigned, const TargetRegisterClass *> 914 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 915 StringRef Constraint, MVT VT) const override; 916 917 /// Return true if the addressing mode represented 918 /// by AM is legal for this target, for a load/store of the specified type. 919 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, 920 Type *Ty, unsigned AS, 921 Instruction *I = nullptr) const override; 922 923 /// Return true if the specified immediate is legal 924 /// icmp immediate, that is the target has icmp instructions which can 925 /// compare a register against the immediate without having to materialize 926 /// the immediate into a register. 927 bool isLegalICmpImmediate(int64_t Imm) const override; 928 929 /// Return true if the specified immediate is legal 930 /// add immediate, that is the target has add instructions which can 931 /// add a register and the immediate without having to materialize 932 /// the immediate into a register. 933 bool isLegalAddImmediate(int64_t Imm) const override; 934 935 /// Return the cost of the scaling factor used in the addressing 936 /// mode represented by AM for this target, for a load/store 937 /// of the specified type. 938 /// If the AM is supported, the return value must be >= 0. 939 /// If the AM is not supported, it returns a negative value. 940 int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, 941 unsigned AS) const override; 942 943 bool isVectorShiftByScalarCheap(Type *Ty) const override; 944 945 /// Return true if it's free to truncate a value of 946 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in 947 /// register EAX to i16 by referencing its sub-register AX. 948 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 949 bool isTruncateFree(EVT VT1, EVT VT2) const override; 950 951 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; 952 953 /// Return true if any actual instruction that defines a 954 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result 955 /// register. This does not necessarily include registers defined in 956 /// unknown ways, such as incoming arguments, or copies from unknown 957 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this 958 /// does not necessarily apply to truncate instructions. e.g. on x86-64, 959 /// all instructions that define 32-bit values implicit zero-extend the 960 /// result out to 64 bits. 961 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 962 bool isZExtFree(EVT VT1, EVT VT2) const override; 963 bool isZExtFree(SDValue Val, EVT VT2) const override; 964 965 /// Return true if folding a vector load into ExtVal (a sign, zero, or any 966 /// extend node) is profitable. 967 bool isVectorLoadExtDesirable(SDValue) const override; 968 969 /// Return true if an FMA operation is faster than a pair of fmul and fadd 970 /// instructions. fmuladd intrinsics will be expanded to FMAs when this 971 /// method returns true, otherwise fmuladd is expanded to fmul + fadd. 972 bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; 973 974 /// Return true if it's profitable to narrow 975 /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow 976 /// from i32 to i8 but not from i32 to i16. 977 bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; 978 979 /// Given an intrinsic, checks if on the target the intrinsic will need to map 980 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns 981 /// true and stores the intrinsic information into the IntrinsicInfo that was 982 /// passed to the function. 983 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 984 MachineFunction &MF, 985 unsigned Intrinsic) const override; 986 987 /// Returns true if the target can instruction select the 988 /// specified FP immediate natively. If false, the legalizer will 989 /// materialize the FP immediate as a load from a constant pool. 990 bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; 991 992 /// Targets can use this to indicate that they only support *some* 993 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a 994 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to 995 /// be legal. 996 bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 997 998 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there 999 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a 1000 /// constant pool entry. 1001 bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 1002 1003 /// Returns true if lowering to a jump table is allowed. 1004 bool areJTsAllowed(const Function *Fn) const override; 1005 1006 /// If true, then instruction selection should 1007 /// seek to shrink the FP constant of the specified type to a smaller type 1008 /// in order to save space and / or reduce runtime. 1009 bool ShouldShrinkFPConstant(EVT VT) const override { 1010 // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more 1011 // expensive than a straight movsd. On the other hand, it's important to 1012 // shrink long double fp constant since fldt is very slow. 1013 return !X86ScalarSSEf64 || VT == MVT::f80; 1014 } 1015 1016 /// Return true if we believe it is correct and profitable to reduce the 1017 /// load node to a smaller type. 1018 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 1019 EVT NewVT) const override; 1020 1021 /// Return true if the specified scalar FP type is computed in an SSE 1022 /// register, not on the X87 floating point stack. 1023 bool isScalarFPTypeInSSEReg(EVT VT) const { 1024 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 1025 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 1026 } 1027 1028 /// Returns true if it is beneficial to convert a load of a constant 1029 /// to just the constant itself. 1030 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 1031 Type *Ty) const override; 1032 1033 bool convertSelectOfConstantsToMath(EVT VT) const override; 1034 1035 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 1036 /// with this index. 1037 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 1038 unsigned Index) const override; 1039 1040 bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, 1041 unsigned AddrSpace) const override { 1042 // If we can replace more than 2 scalar stores, there will be a reduction 1043 // in instructions even after we add a vector constant load. 1044 return NumElem > 2; 1045 } 1046 1047 bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT) const override; 1048 1049 /// Intel processors have a unified instruction and data cache 1050 const char * getClearCacheBuiltinName() const override { 1051 return nullptr; // nothing to do, move along. 1052 } 1053 1054 unsigned getRegisterByName(const char* RegName, EVT VT, 1055 SelectionDAG &DAG) const override; 1056 1057 /// If a physical register, this returns the register that receives the 1058 /// exception address on entry to an EH pad. 1059 unsigned 1060 getExceptionPointerRegister(const Constant *PersonalityFn) const override; 1061 1062 /// If a physical register, this returns the register that receives the 1063 /// exception typeid on entry to a landing pad. 1064 unsigned 1065 getExceptionSelectorRegister(const Constant *PersonalityFn) const override; 1066 1067 virtual bool needsFixedCatchObjects() const override; 1068 1069 /// This method returns a target specific FastISel object, 1070 /// or null if the target does not support "fast" ISel. 1071 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1072 const TargetLibraryInfo *libInfo) const override; 1073 1074 /// If the target has a standard location for the stack protector cookie, 1075 /// returns the address of that location. Otherwise, returns nullptr. 1076 Value *getIRStackGuard(IRBuilder<> &IRB) const override; 1077 1078 bool useLoadStackGuardNode() const override; 1079 bool useStackGuardXorFP() const override; 1080 void insertSSPDeclarations(Module &M) const override; 1081 Value *getSDagStackGuard(const Module &M) const override; 1082 Value *getSSPStackGuardCheck(const Module &M) const override; 1083 SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, 1084 const SDLoc &DL) const override; 1085 1086 1087 /// Return true if the target stores SafeStack pointer at a fixed offset in 1088 /// some non-standard address space, and populates the address space and 1089 /// offset as appropriate. 1090 Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override; 1091 1092 SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, 1093 SelectionDAG &DAG) const; 1094 1095 bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; 1096 1097 /// Customize the preferred legalization strategy for certain types. 1098 LegalizeTypeAction getPreferredVectorAction(EVT VT) const override; 1099 1100 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, 1101 EVT VT) const override; 1102 1103 unsigned getNumRegistersForCallingConv(LLVMContext &Context, 1104 CallingConv::ID CC, 1105 EVT VT) const override; 1106 1107 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 1108 1109 bool supportSwiftError() const override; 1110 1111 StringRef getStackProbeSymbolName(MachineFunction &MF) const override; 1112 1113 bool hasVectorBlend() const override { return true; } 1114 1115 unsigned getMaxSupportedInterleaveFactor() const override { return 4; } 1116 1117 /// Lower interleaved load(s) into target specific 1118 /// instructions/intrinsics. 1119 bool lowerInterleavedLoad(LoadInst *LI, 1120 ArrayRef<ShuffleVectorInst *> Shuffles, 1121 ArrayRef<unsigned> Indices, 1122 unsigned Factor) const override; 1123 1124 /// Lower interleaved store(s) into target specific 1125 /// instructions/intrinsics. 1126 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 1127 unsigned Factor) const override; 1128 1129 SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value, 1130 SDValue Addr, SelectionDAG &DAG) 1131 const override; 1132 1133 protected: 1134 std::pair<const TargetRegisterClass *, uint8_t> 1135 findRepresentativeClass(const TargetRegisterInfo *TRI, 1136 MVT VT) const override; 1137 1138 private: 1139 /// Keep a reference to the X86Subtarget around so that we can 1140 /// make the right decision when generating code for different targets. 1141 const X86Subtarget &Subtarget; 1142 1143 /// Select between SSE or x87 floating point ops. 1144 /// When SSE is available, use it for f32 operations. 1145 /// When SSE2 is available, use it for f64 operations. 1146 bool X86ScalarSSEf32; 1147 bool X86ScalarSSEf64; 1148 1149 /// A list of legal FP immediates. 1150 std::vector<APFloat> LegalFPImmediates; 1151 1152 /// Indicate that this x86 target can instruction 1153 /// select the specified FP immediate natively. 1154 void addLegalFPImmediate(const APFloat& Imm) { 1155 LegalFPImmediates.push_back(Imm); 1156 } 1157 1158 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 1159 CallingConv::ID CallConv, bool isVarArg, 1160 const SmallVectorImpl<ISD::InputArg> &Ins, 1161 const SDLoc &dl, SelectionDAG &DAG, 1162 SmallVectorImpl<SDValue> &InVals, 1163 uint32_t *RegMask) const; 1164 SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, 1165 const SmallVectorImpl<ISD::InputArg> &ArgInfo, 1166 const SDLoc &dl, SelectionDAG &DAG, 1167 const CCValAssign &VA, MachineFrameInfo &MFI, 1168 unsigned i) const; 1169 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, 1170 const SDLoc &dl, SelectionDAG &DAG, 1171 const CCValAssign &VA, 1172 ISD::ArgFlagsTy Flags) const; 1173 1174 // Call lowering helpers. 1175 1176 /// Check whether the call is eligible for tail call optimization. Targets 1177 /// that want to do tail call optimization should implement this function. 1178 bool IsEligibleForTailCallOptimization(SDValue Callee, 1179 CallingConv::ID CalleeCC, 1180 bool isVarArg, 1181 bool isCalleeStructRet, 1182 bool isCallerStructRet, 1183 Type *RetTy, 1184 const SmallVectorImpl<ISD::OutputArg> &Outs, 1185 const SmallVectorImpl<SDValue> &OutVals, 1186 const SmallVectorImpl<ISD::InputArg> &Ins, 1187 SelectionDAG& DAG) const; 1188 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, 1189 SDValue Chain, bool IsTailCall, 1190 bool Is64Bit, int FPDiff, 1191 const SDLoc &dl) const; 1192 1193 unsigned GetAlignedArgumentStackSize(unsigned StackSize, 1194 SelectionDAG &DAG) const; 1195 1196 unsigned getAddressSpace(void) const; 1197 1198 std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, 1199 bool isSigned, 1200 bool isReplace) const; 1201 1202 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 1203 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; 1204 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1205 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1206 1207 unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr, 1208 const unsigned char OpFlags = 0) const; 1209 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 1210 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 1211 SDValue LowerGlobalAddress(const GlobalValue *GV, const SDLoc &dl, 1212 int64_t Offset, SelectionDAG &DAG) const; 1213 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 1214 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1215 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; 1216 1217 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1218 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1219 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 1220 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1221 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 1222 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; 1223 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 1224 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; 1225 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 1226 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1227 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 1228 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 1229 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1230 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1231 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 1232 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; 1233 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; 1234 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; 1235 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; 1236 SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; 1237 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 1238 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; 1239 SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; 1240 SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const; 1241 SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const; 1242 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 1243 1244 SDValue 1245 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1246 const SmallVectorImpl<ISD::InputArg> &Ins, 1247 const SDLoc &dl, SelectionDAG &DAG, 1248 SmallVectorImpl<SDValue> &InVals) const override; 1249 SDValue LowerCall(CallLoweringInfo &CLI, 1250 SmallVectorImpl<SDValue> &InVals) const override; 1251 1252 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1253 const SmallVectorImpl<ISD::OutputArg> &Outs, 1254 const SmallVectorImpl<SDValue> &OutVals, 1255 const SDLoc &dl, SelectionDAG &DAG) const override; 1256 1257 bool supportSplitCSR(MachineFunction *MF) const override { 1258 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 1259 MF->getFunction().hasFnAttribute(Attribute::NoUnwind); 1260 } 1261 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 1262 void insertCopiesSplitCSR( 1263 MachineBasicBlock *Entry, 1264 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 1265 1266 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 1267 1268 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 1269 1270 EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, 1271 ISD::NodeType ExtendKind) const override; 1272 1273 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 1274 bool isVarArg, 1275 const SmallVectorImpl<ISD::OutputArg> &Outs, 1276 LLVMContext &Context) const override; 1277 1278 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 1279 1280 TargetLoweringBase::AtomicExpansionKind 1281 shouldExpandAtomicLoadInIR(LoadInst *SI) const override; 1282 bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 1283 TargetLoweringBase::AtomicExpansionKind 1284 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 1285 1286 LoadInst * 1287 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; 1288 1289 bool needsCmpXchgNb(Type *MemType) const; 1290 1291 void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, 1292 MachineBasicBlock *DispatchBB, int FI) const; 1293 1294 // Utility function to emit the low-level va_arg code for X86-64. 1295 MachineBasicBlock * 1296 EmitVAARG64WithCustomInserter(MachineInstr &MI, 1297 MachineBasicBlock *MBB) const; 1298 1299 /// Utility function to emit the xmm reg save portion of va_start. 1300 MachineBasicBlock * 1301 EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr, 1302 MachineBasicBlock *BB) const; 1303 1304 MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1, 1305 MachineInstr &MI2, 1306 MachineBasicBlock *BB) const; 1307 1308 MachineBasicBlock *EmitLoweredSelect(MachineInstr &I, 1309 MachineBasicBlock *BB) const; 1310 1311 MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I, 1312 MachineBasicBlock *BB) const; 1313 1314 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, 1315 MachineBasicBlock *BB) const; 1316 1317 MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI, 1318 MachineBasicBlock *BB) const; 1319 1320 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI, 1321 MachineBasicBlock *BB) const; 1322 1323 MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI, 1324 MachineBasicBlock *BB) const; 1325 1326 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI, 1327 MachineBasicBlock *BB) const; 1328 1329 MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI, 1330 MachineBasicBlock *BB) const; 1331 1332 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, 1333 MachineBasicBlock *MBB) const; 1334 1335 void emitSetJmpShadowStackFix(MachineInstr &MI, 1336 MachineBasicBlock *MBB) const; 1337 1338 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, 1339 MachineBasicBlock *MBB) const; 1340 1341 MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI, 1342 MachineBasicBlock *MBB) const; 1343 1344 MachineBasicBlock *emitFMA3Instr(MachineInstr &MI, 1345 MachineBasicBlock *MBB) const; 1346 1347 MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI, 1348 MachineBasicBlock *MBB) const; 1349 1350 /// Emit nodes that will be selected as "test Op0,Op0", or something 1351 /// equivalent, for use with the given x86 condition code. 1352 SDValue EmitTest(SDValue Op0, unsigned X86CC, const SDLoc &dl, 1353 SelectionDAG &DAG) const; 1354 1355 /// Emit nodes that will be selected as "cmp Op0,Op1", or something 1356 /// equivalent, for use with the given x86 condition code. 1357 SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl, 1358 SelectionDAG &DAG) const; 1359 1360 /// Convert a comparison if required by the subtarget. 1361 SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const; 1362 1363 /// Check if replacement of SQRT with RSQRT should be disabled. 1364 bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override; 1365 1366 /// Use rsqrt* to speed up sqrt calculations. 1367 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1368 int &RefinementSteps, bool &UseOneConstNR, 1369 bool Reciprocal) const override; 1370 1371 /// Use rcp* to speed up fdiv calculations. 1372 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1373 int &RefinementSteps) const override; 1374 1375 /// Reassociate floating point divisions into multiply by reciprocal. 1376 unsigned combineRepeatedFPDivisors() const override; 1377 }; 1378 1379 namespace X86 { 1380 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1381 const TargetLibraryInfo *libInfo); 1382 } // end namespace X86 1383 1384 // Base class for all X86 non-masked store operations. 1385 class X86StoreSDNode : public MemSDNode { 1386 public: 1387 X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, 1388 SDVTList VTs, EVT MemVT, 1389 MachineMemOperand *MMO) 1390 :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {} 1391 const SDValue &getValue() const { return getOperand(1); } 1392 const SDValue &getBasePtr() const { return getOperand(2); } 1393 1394 static bool classof(const SDNode *N) { 1395 return N->getOpcode() == X86ISD::VTRUNCSTORES || 1396 N->getOpcode() == X86ISD::VTRUNCSTOREUS; 1397 } 1398 }; 1399 1400 // Base class for all X86 masked store operations. 1401 // The class has the same order of operands as MaskedStoreSDNode for 1402 // convenience. 1403 class X86MaskedStoreSDNode : public MemSDNode { 1404 public: 1405 X86MaskedStoreSDNode(unsigned Opcode, unsigned Order, 1406 const DebugLoc &dl, SDVTList VTs, EVT MemVT, 1407 MachineMemOperand *MMO) 1408 : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {} 1409 1410 const SDValue &getBasePtr() const { return getOperand(1); } 1411 const SDValue &getMask() const { return getOperand(2); } 1412 const SDValue &getValue() const { return getOperand(3); } 1413 1414 static bool classof(const SDNode *N) { 1415 return N->getOpcode() == X86ISD::VMTRUNCSTORES || 1416 N->getOpcode() == X86ISD::VMTRUNCSTOREUS; 1417 } 1418 }; 1419 1420 // X86 Truncating Store with Signed saturation. 1421 class TruncSStoreSDNode : public X86StoreSDNode { 1422 public: 1423 TruncSStoreSDNode(unsigned Order, const DebugLoc &dl, 1424 SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) 1425 : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {} 1426 1427 static bool classof(const SDNode *N) { 1428 return N->getOpcode() == X86ISD::VTRUNCSTORES; 1429 } 1430 }; 1431 1432 // X86 Truncating Store with Unsigned saturation. 1433 class TruncUSStoreSDNode : public X86StoreSDNode { 1434 public: 1435 TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl, 1436 SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) 1437 : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {} 1438 1439 static bool classof(const SDNode *N) { 1440 return N->getOpcode() == X86ISD::VTRUNCSTOREUS; 1441 } 1442 }; 1443 1444 // X86 Truncating Masked Store with Signed saturation. 1445 class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode { 1446 public: 1447 MaskedTruncSStoreSDNode(unsigned Order, 1448 const DebugLoc &dl, SDVTList VTs, EVT MemVT, 1449 MachineMemOperand *MMO) 1450 : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {} 1451 1452 static bool classof(const SDNode *N) { 1453 return N->getOpcode() == X86ISD::VMTRUNCSTORES; 1454 } 1455 }; 1456 1457 // X86 Truncating Masked Store with Unsigned saturation. 1458 class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode { 1459 public: 1460 MaskedTruncUSStoreSDNode(unsigned Order, 1461 const DebugLoc &dl, SDVTList VTs, EVT MemVT, 1462 MachineMemOperand *MMO) 1463 : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {} 1464 1465 static bool classof(const SDNode *N) { 1466 return N->getOpcode() == X86ISD::VMTRUNCSTOREUS; 1467 } 1468 }; 1469 1470 // X86 specific Gather/Scatter nodes. 1471 // The class has the same order of operands as MaskedGatherScatterSDNode for 1472 // convenience. 1473 class X86MaskedGatherScatterSDNode : public MemSDNode { 1474 public: 1475 X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order, 1476 const DebugLoc &dl, SDVTList VTs, EVT MemVT, 1477 MachineMemOperand *MMO) 1478 : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {} 1479 1480 const SDValue &getBasePtr() const { return getOperand(3); } 1481 const SDValue &getIndex() const { return getOperand(4); } 1482 const SDValue &getMask() const { return getOperand(2); } 1483 const SDValue &getValue() const { return getOperand(1); } 1484 const SDValue &getScale() const { return getOperand(5); } 1485 1486 static bool classof(const SDNode *N) { 1487 return N->getOpcode() == X86ISD::MGATHER || 1488 N->getOpcode() == X86ISD::MSCATTER; 1489 } 1490 }; 1491 1492 class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode { 1493 public: 1494 X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, 1495 EVT MemVT, MachineMemOperand *MMO) 1496 : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT, 1497 MMO) {} 1498 1499 static bool classof(const SDNode *N) { 1500 return N->getOpcode() == X86ISD::MGATHER; 1501 } 1502 }; 1503 1504 class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode { 1505 public: 1506 X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, 1507 EVT MemVT, MachineMemOperand *MMO) 1508 : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT, 1509 MMO) {} 1510 1511 static bool classof(const SDNode *N) { 1512 return N->getOpcode() == X86ISD::MSCATTER; 1513 } 1514 }; 1515 1516 /// Generate unpacklo/unpackhi shuffle mask. 1517 template <typename T = int> 1518 void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo, 1519 bool Unary) { 1520 assert(Mask.empty() && "Expected an empty shuffle mask vector"); 1521 int NumElts = VT.getVectorNumElements(); 1522 int NumEltsInLane = 128 / VT.getScalarSizeInBits(); 1523 for (int i = 0; i < NumElts; ++i) { 1524 unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane; 1525 int Pos = (i % NumEltsInLane) / 2 + LaneStart; 1526 Pos += (Unary ? 0 : NumElts * (i % 2)); 1527 Pos += (Lo ? 0 : NumEltsInLane / 2); 1528 Mask.push_back(Pos); 1529 } 1530 } 1531 1532 /// Helper function to scale a shuffle or target shuffle mask, replacing each 1533 /// mask index with the scaled sequential indices for an equivalent narrowed 1534 /// mask. This is the reverse process to canWidenShuffleElements, but can 1535 /// always succeed. 1536 template <typename T> 1537 void scaleShuffleMask(int Scale, ArrayRef<T> Mask, 1538 SmallVectorImpl<T> &ScaledMask) { 1539 assert(0 < Scale && "Unexpected scaling factor"); 1540 int NumElts = Mask.size(); 1541 ScaledMask.assign(static_cast<size_t>(NumElts * Scale), -1); 1542 1543 for (int i = 0; i != NumElts; ++i) { 1544 int M = Mask[i]; 1545 1546 // Repeat sentinel values in every mask element. 1547 if (M < 0) { 1548 for (int s = 0; s != Scale; ++s) 1549 ScaledMask[(Scale * i) + s] = M; 1550 continue; 1551 } 1552 1553 // Scale mask element and increment across each mask element. 1554 for (int s = 0; s != Scale; ++s) 1555 ScaledMask[(Scale * i) + s] = (Scale * M) + s; 1556 } 1557 } 1558 } // end namespace llvm 1559 1560 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 1561