1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the interfaces that X86 uses to lower LLVM code into a 11 // selection DAG. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef X86ISELLOWERING_H 16 #define X86ISELLOWERING_H 17 18 #include "X86Subtarget.h" 19 #include "X86RegisterInfo.h" 20 #include "X86MachineFunctionInfo.h" 21 #include "llvm/Target/TargetLowering.h" 22 #include "llvm/Target/TargetOptions.h" 23 #include "llvm/CodeGen/FastISel.h" 24 #include "llvm/CodeGen/SelectionDAG.h" 25 #include "llvm/CodeGen/CallingConvLower.h" 26 27 namespace llvm { 28 namespace X86ISD { 29 // X86 Specific DAG Nodes 30 enum NodeType { 31 // Start the numbering where the builtin ops leave off. 32 FIRST_NUMBER = ISD::BUILTIN_OP_END, 33 34 /// BSF - Bit scan forward. 35 /// BSR - Bit scan reverse. 36 BSF, 37 BSR, 38 39 /// SHLD, SHRD - Double shift instructions. These correspond to 40 /// X86::SHLDxx and X86::SHRDxx instructions. 41 SHLD, 42 SHRD, 43 44 /// FAND - Bitwise logical AND of floating point values. This corresponds 45 /// to X86::ANDPS or X86::ANDPD. 46 FAND, 47 48 /// FOR - Bitwise logical OR of floating point values. This corresponds 49 /// to X86::ORPS or X86::ORPD. 50 FOR, 51 52 /// FXOR - Bitwise logical XOR of floating point values. This corresponds 53 /// to X86::XORPS or X86::XORPD. 54 FXOR, 55 56 /// FSRL - Bitwise logical right shift of floating point values. These 57 /// corresponds to X86::PSRLDQ. 58 FSRL, 59 60 /// CALL - These operations represent an abstract X86 call 61 /// instruction, which includes a bunch of information. In particular the 62 /// operands of these node are: 63 /// 64 /// #0 - The incoming token chain 65 /// #1 - The callee 66 /// #2 - The number of arg bytes the caller pushes on the stack. 67 /// #3 - The number of arg bytes the callee pops off the stack. 68 /// #4 - The value to pass in AL/AX/EAX (optional) 69 /// #5 - The value to pass in DL/DX/EDX (optional) 70 /// 71 /// The result values of these nodes are: 72 /// 73 /// #0 - The outgoing token chain 74 /// #1 - The first register result value (optional) 75 /// #2 - The second register result value (optional) 76 /// 77 CALL, 78 79 /// RDTSC_DAG - This operation implements the lowering for 80 /// readcyclecounter 81 RDTSC_DAG, 82 83 /// X86 compare and logical compare instructions. 84 CMP, COMI, UCOMI, 85 86 /// X86 bit-test instructions. 87 BT, 88 89 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS 90 /// operand, usually produced by a CMP instruction. 91 SETCC, 92 93 // Same as SETCC except it's materialized with a sbb and the value is all 94 // one's or all zero's. 95 SETCC_CARRY, // R = carry_bit ? ~0 : 0 96 97 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. 98 /// Operands are two FP values to compare; result is a mask of 99 /// 0s or 1s. Generally DTRT for C/C++ with NaNs. 100 FSETCCss, FSETCCsd, 101 102 /// X86 MOVMSK{pd|ps}, extracts sign bits of two or four FP values, 103 /// result in an integer GPR. Needs masking for scalar result. 104 FGETSIGNx86, 105 106 /// X86 conditional moves. Operand 0 and operand 1 are the two values 107 /// to select from. Operand 2 is the condition code, and operand 3 is the 108 /// flag operand produced by a CMP or TEST instruction. It also writes a 109 /// flag result. 110 CMOV, 111 112 /// X86 conditional branches. Operand 0 is the chain operand, operand 1 113 /// is the block to branch if condition is true, operand 2 is the 114 /// condition code, and operand 3 is the flag operand produced by a CMP 115 /// or TEST instruction. 116 BRCOND, 117 118 /// Return with a flag operand. Operand 0 is the chain operand, operand 119 /// 1 is the number of bytes of stack to pop. 120 RET_FLAG, 121 122 /// REP_STOS - Repeat fill, corresponds to X86::REP_STOSx. 123 REP_STOS, 124 125 /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx. 126 REP_MOVS, 127 128 /// GlobalBaseReg - On Darwin, this node represents the result of the popl 129 /// at function entry, used for PIC code. 130 GlobalBaseReg, 131 132 /// Wrapper - A wrapper node for TargetConstantPool, 133 /// TargetExternalSymbol, and TargetGlobalAddress. 134 Wrapper, 135 136 /// WrapperRIP - Special wrapper used under X86-64 PIC mode for RIP 137 /// relative displacements. 138 WrapperRIP, 139 140 /// MOVQ2DQ - Copies a 64-bit value from an MMX vector to the low word 141 /// of an XMM vector, with the high word zero filled. 142 MOVQ2DQ, 143 144 /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector 145 /// to an MMX vector. If you think this is too close to the previous 146 /// mnemonic, so do I; blame Intel. 147 MOVDQ2Q, 148 149 /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to 150 /// i32, corresponds to X86::PEXTRB. 151 PEXTRB, 152 153 /// PEXTRW - Extract a 16-bit value from a vector and zero extend it to 154 /// i32, corresponds to X86::PEXTRW. 155 PEXTRW, 156 157 /// INSERTPS - Insert any element of a 4 x float vector into any element 158 /// of a destination 4 x floatvector. 159 INSERTPS, 160 161 /// PINSRB - Insert the lower 8-bits of a 32-bit value to a vector, 162 /// corresponds to X86::PINSRB. 163 PINSRB, 164 165 /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector, 166 /// corresponds to X86::PINSRW. 167 PINSRW, MMX_PINSRW, 168 169 /// PSHUFB - Shuffle 16 8-bit values within a vector. 170 PSHUFB, 171 172 /// ANDNP - Bitwise Logical AND NOT of Packed FP values. 173 ANDNP, 174 175 /// PSIGNB/W/D - Copy integer sign. 176 PSIGNB, PSIGNW, PSIGND, 177 178 /// BLEND family of opcodes 179 BLENDV, 180 181 /// FHADD - Floating point horizontal add. 182 FHADD, 183 184 /// FHSUB - Floating point horizontal sub. 185 FHSUB, 186 187 /// FMAX, FMIN - Floating point max and min. 188 /// 189 FMAX, FMIN, 190 191 /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal 192 /// approximation. Note that these typically require refinement 193 /// in order to obtain suitable precision. 194 FRSQRT, FRCP, 195 196 // TLSADDR - Thread Local Storage. 197 TLSADDR, 198 199 // TLSCALL - Thread Local Storage. When calling to an OS provided 200 // thunk at the address from an earlier relocation. 201 TLSCALL, 202 203 // EH_RETURN - Exception Handling helpers. 204 EH_RETURN, 205 206 /// TC_RETURN - Tail call return. 207 /// operand #0 chain 208 /// operand #1 callee (register or absolute) 209 /// operand #2 stack adjustment 210 /// operand #3 optional in flag 211 TC_RETURN, 212 213 // VZEXT_MOVL - Vector move low and zero extend. 214 VZEXT_MOVL, 215 216 // VSHL, VSRL - Vector logical left / right shift. 217 VSHL, VSRL, 218 219 // CMPPD, CMPPS - Vector double/float comparison. 220 // CMPPD, CMPPS - Vector double/float comparison. 221 CMPPD, CMPPS, 222 223 // PCMP* - Vector integer comparisons. 224 PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ, 225 PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ, 226 227 // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results. 228 ADD, SUB, ADC, SBB, SMUL, 229 INC, DEC, OR, XOR, AND, 230 231 ANDN, // ANDN - Bitwise AND NOT with FLAGS results. 232 233 UMUL, // LOW, HI, FLAGS = umul LHS, RHS 234 235 // MUL_IMM - X86 specific multiply by immediate. 236 MUL_IMM, 237 238 // PTEST - Vector bitwise comparisons 239 PTEST, 240 241 // TESTP - Vector packed fp sign bitwise comparisons 242 TESTP, 243 244 // Several flavors of instructions with vector shuffle behaviors. 245 PALIGN, 246 PSHUFD, 247 PSHUFHW, 248 PSHUFLW, 249 PSHUFHW_LD, 250 PSHUFLW_LD, 251 SHUFPD, 252 SHUFPS, 253 MOVDDUP, 254 MOVSHDUP, 255 MOVSLDUP, 256 MOVSHDUP_LD, 257 MOVSLDUP_LD, 258 MOVLHPS, 259 MOVLHPD, 260 MOVHLPS, 261 MOVHLPD, 262 MOVLPS, 263 MOVLPD, 264 MOVSD, 265 MOVSS, 266 UNPCKLPS, 267 UNPCKLPD, 268 VUNPCKLPSY, 269 VUNPCKLPDY, 270 UNPCKHPS, 271 UNPCKHPD, 272 VUNPCKHPSY, 273 VUNPCKHPDY, 274 PUNPCKLBW, 275 PUNPCKLWD, 276 PUNPCKLDQ, 277 PUNPCKLQDQ, 278 PUNPCKHBW, 279 PUNPCKHWD, 280 PUNPCKHDQ, 281 PUNPCKHQDQ, 282 VPERMILPS, 283 VPERMILPSY, 284 VPERMILPD, 285 VPERMILPDY, 286 VPERM2F128, 287 VBROADCAST, 288 289 // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack, 290 // according to %al. An operator is needed so that this can be expanded 291 // with control flow. 292 VASTART_SAVE_XMM_REGS, 293 294 // WIN_ALLOCA - Windows's _chkstk call to do stack probing. 295 WIN_ALLOCA, 296 297 // SEG_ALLOCA - For allocating variable amounts of stack space when using 298 // segmented stacks. Check if the current stacklet has enough space, and 299 // falls back to heap allocation if not. 300 SEG_ALLOCA, 301 302 // Memory barrier 303 MEMBARRIER, 304 MFENCE, 305 SFENCE, 306 LFENCE, 307 308 // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, 309 // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - 310 // Atomic 64-bit binary operations. 311 ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, 312 ATOMSUB64_DAG, 313 ATOMOR64_DAG, 314 ATOMXOR64_DAG, 315 ATOMAND64_DAG, 316 ATOMNAND64_DAG, 317 ATOMSWAP64_DAG, 318 319 // LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap. 320 LCMPXCHG_DAG, 321 LCMPXCHG8_DAG, 322 LCMPXCHG16_DAG, 323 324 // VZEXT_LOAD - Load, scalar_to_vector, and zero extend. 325 VZEXT_LOAD, 326 327 // FNSTCW16m - Store FP control world into i16 memory. 328 FNSTCW16m, 329 330 /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the 331 /// integer destination in memory and a FP reg source. This corresponds 332 /// to the X86::FIST*m instructions and the rounding mode change stuff. It 333 /// has two inputs (token chain and address) and two outputs (int value 334 /// and token chain). 335 FP_TO_INT16_IN_MEM, 336 FP_TO_INT32_IN_MEM, 337 FP_TO_INT64_IN_MEM, 338 339 /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the 340 /// integer source in memory and FP reg result. This corresponds to the 341 /// X86::FILD*m instructions. It has three inputs (token chain, address, 342 /// and source type) and two outputs (FP value and token chain). FILD_FLAG 343 /// also produces a flag). 344 FILD, 345 FILD_FLAG, 346 347 /// FLD - This instruction implements an extending load to FP stack slots. 348 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain 349 /// operand, ptr to load from, and a ValueType node indicating the type 350 /// to load to. 351 FLD, 352 353 /// FST - This instruction implements a truncating store to FP stack 354 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a 355 /// chain operand, value to store, address, and a ValueType to store it 356 /// as. 357 FST, 358 359 /// VAARG_64 - This instruction grabs the address of the next argument 360 /// from a va_list. (reads and modifies the va_list in memory) 361 VAARG_64 362 363 // WARNING: Do not add anything in the end unless you want the node to 364 // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be 365 // thought as target memory ops! 366 }; 367 } 368 369 /// Define some predicates that are used for node matching. 370 namespace X86 { 371 /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand 372 /// specifies a shuffle of elements that is suitable for input to PSHUFD. 373 bool isPSHUFDMask(ShuffleVectorSDNode *N); 374 375 /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand 376 /// specifies a shuffle of elements that is suitable for input to PSHUFD. 377 bool isPSHUFHWMask(ShuffleVectorSDNode *N); 378 379 /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand 380 /// specifies a shuffle of elements that is suitable for input to PSHUFD. 381 bool isPSHUFLWMask(ShuffleVectorSDNode *N); 382 383 /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 384 /// specifies a shuffle of elements that is suitable for input to SHUFP*. 385 bool isSHUFPMask(ShuffleVectorSDNode *N); 386 387 /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 388 /// specifies a shuffle of elements that is suitable for input to MOVHLPS. 389 bool isMOVHLPSMask(ShuffleVectorSDNode *N); 390 391 /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form 392 /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, 393 /// <2, 3, 2, 3> 394 bool isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N); 395 396 /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 397 /// specifies a shuffle of elements that is suitable for MOVLP{S|D}. 398 bool isMOVLPMask(ShuffleVectorSDNode *N); 399 400 /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand 401 /// specifies a shuffle of elements that is suitable for MOVHP{S|D}. 402 /// as well as MOVLHPS. 403 bool isMOVLHPSMask(ShuffleVectorSDNode *N); 404 405 /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 406 /// specifies a shuffle of elements that is suitable for input to UNPCKL. 407 bool isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat = false); 408 409 /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 410 /// specifies a shuffle of elements that is suitable for input to UNPCKH. 411 bool isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat = false); 412 413 /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 414 /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 415 /// <0, 0, 1, 1> 416 bool isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N); 417 418 /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form 419 /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, 420 /// <2, 2, 3, 3> 421 bool isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N); 422 423 /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 424 /// specifies a shuffle of elements that is suitable for input to MOVSS, 425 /// MOVSD, and MOVD, i.e. setting the lowest element. 426 bool isMOVLMask(ShuffleVectorSDNode *N); 427 428 /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 429 /// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 430 bool isMOVSHDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget); 431 432 /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 433 /// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 434 bool isMOVSLDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget); 435 436 /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand 437 /// specifies a shuffle of elements that is suitable for input to MOVDDUP. 438 bool isMOVDDUPMask(ShuffleVectorSDNode *N); 439 440 /// isVEXTRACTF128Index - Return true if the specified 441 /// EXTRACT_SUBVECTOR operand specifies a vector extract that is 442 /// suitable for input to VEXTRACTF128. 443 bool isVEXTRACTF128Index(SDNode *N); 444 445 /// isVINSERTF128Index - Return true if the specified 446 /// INSERT_SUBVECTOR operand specifies a subvector insert that is 447 /// suitable for input to VINSERTF128. 448 bool isVINSERTF128Index(SDNode *N); 449 450 /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 451 /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* 452 /// instructions. 453 unsigned getShuffleSHUFImmediate(SDNode *N); 454 455 /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 456 /// the specified VECTOR_SHUFFLE mask with PSHUFHW instruction. 457 unsigned getShufflePSHUFHWImmediate(SDNode *N); 458 459 /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 460 /// the specified VECTOR_SHUFFLE mask with PSHUFLW instruction. 461 unsigned getShufflePSHUFLWImmediate(SDNode *N); 462 463 /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle 464 /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction. 465 unsigned getShufflePALIGNRImmediate(SDNode *N); 466 467 /// getExtractVEXTRACTF128Immediate - Return the appropriate 468 /// immediate to extract the specified EXTRACT_SUBVECTOR index 469 /// with VEXTRACTF128 instructions. 470 unsigned getExtractVEXTRACTF128Immediate(SDNode *N); 471 472 /// getInsertVINSERTF128Immediate - Return the appropriate 473 /// immediate to insert at the specified INSERT_SUBVECTOR index 474 /// with VINSERTF128 instructions. 475 unsigned getInsertVINSERTF128Immediate(SDNode *N); 476 477 /// isZeroNode - Returns true if Elt is a constant zero or a floating point 478 /// constant +0.0. 479 bool isZeroNode(SDValue Elt); 480 481 /// isOffsetSuitableForCodeModel - Returns true of the given offset can be 482 /// fit into displacement field of the instruction. 483 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, 484 bool hasSymbolicDisplacement = true); 485 486 487 /// isCalleePop - Determines whether the callee is required to pop its 488 /// own arguments. Callee pop is necessary to support tail calls. 489 bool isCalleePop(CallingConv::ID CallingConv, 490 bool is64Bit, bool IsVarArg, bool TailCallOpt); 491 } 492 493 //===--------------------------------------------------------------------===// 494 // X86TargetLowering - X86 Implementation of the TargetLowering interface 495 class X86TargetLowering : public TargetLowering { 496 public: 497 explicit X86TargetLowering(X86TargetMachine &TM); 498 499 virtual unsigned getJumpTableEncoding() const; 500 501 virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; } 502 503 virtual const MCExpr * 504 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 505 const MachineBasicBlock *MBB, unsigned uid, 506 MCContext &Ctx) const; 507 508 /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC 509 /// jumptable. 510 virtual SDValue getPICJumpTableRelocBase(SDValue Table, 511 SelectionDAG &DAG) const; 512 virtual const MCExpr * 513 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 514 unsigned JTI, MCContext &Ctx) const; 515 516 /// getStackPtrReg - Return the stack pointer register we are using: either 517 /// ESP or RSP. 518 unsigned getStackPtrReg() const { return X86StackPtr; } 519 520 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 521 /// function arguments in the caller parameter area. For X86, aggregates 522 /// that contains are placed at 16-byte boundaries while the rest are at 523 /// 4-byte boundaries. 524 virtual unsigned getByValTypeAlignment(Type *Ty) const; 525 526 /// getOptimalMemOpType - Returns the target specific optimal type for load 527 /// and store operations as a result of memset, memcpy, and memmove 528 /// lowering. If DstAlign is zero that means it's safe to destination 529 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 530 /// means there isn't a need to check it against alignment requirement, 531 /// probably because the source does not need to be loaded. If 532 /// 'NonScalarIntSafe' is true, that means it's safe to return a 533 /// non-scalar-integer type, e.g. empty string source, constant, or loaded 534 /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is 535 /// constant so it does not need to be loaded. 536 /// It returns EVT::Other if the type should be determined using generic 537 /// target-independent logic. 538 virtual EVT 539 getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, 540 bool NonScalarIntSafe, bool MemcpyStrSrc, 541 MachineFunction &MF) const; 542 543 /// allowsUnalignedMemoryAccesses - Returns true if the target allows 544 /// unaligned memory accesses. of the specified type. 545 virtual bool allowsUnalignedMemoryAccesses(EVT VT) const { 546 return true; 547 } 548 549 /// LowerOperation - Provide custom lowering hooks for some operations. 550 /// 551 virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; 552 553 /// ReplaceNodeResults - Replace the results of node with an illegal result 554 /// type with new values built out of custom code. 555 /// 556 virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, 557 SelectionDAG &DAG) const; 558 559 560 virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; 561 562 /// isTypeDesirableForOp - Return true if the target has native support for 563 /// the specified value type and it is 'desirable' to use the type for the 564 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 565 /// instruction encodings are longer and some i16 instructions are slow. 566 virtual bool isTypeDesirableForOp(unsigned Opc, EVT VT) const; 567 568 /// isTypeDesirable - Return true if the target has native support for the 569 /// specified value type and it is 'desirable' to use the type. e.g. On x86 570 /// i16 is legal, but undesirable since i16 instruction encodings are longer 571 /// and some i16 instructions are slow. 572 virtual bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const; 573 574 virtual MachineBasicBlock * 575 EmitInstrWithCustomInserter(MachineInstr *MI, 576 MachineBasicBlock *MBB) const; 577 578 579 /// getTargetNodeName - This method returns the name of a target specific 580 /// DAG node. 581 virtual const char *getTargetNodeName(unsigned Opcode) const; 582 583 /// getSetCCResultType - Return the value type to use for ISD::SETCC. 584 virtual EVT getSetCCResultType(EVT VT) const; 585 586 /// computeMaskedBitsForTargetNode - Determine which of the bits specified 587 /// in Mask are known to be either zero or one and return them in the 588 /// KnownZero/KnownOne bitsets. 589 virtual void computeMaskedBitsForTargetNode(const SDValue Op, 590 const APInt &Mask, 591 APInt &KnownZero, 592 APInt &KnownOne, 593 const SelectionDAG &DAG, 594 unsigned Depth = 0) const; 595 596 // ComputeNumSignBitsForTargetNode - Determine the number of bits in the 597 // operation that are sign bits. 598 virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 599 unsigned Depth) const; 600 601 virtual bool 602 isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const; 603 604 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; 605 606 virtual bool ExpandInlineAsm(CallInst *CI) const; 607 608 ConstraintType getConstraintType(const std::string &Constraint) const; 609 610 /// Examine constraint string and operand type and determine a weight value. 611 /// The operand object must already have been set up with the operand type. 612 virtual ConstraintWeight getSingleConstraintMatchWeight( 613 AsmOperandInfo &info, const char *constraint) const; 614 615 virtual const char *LowerXConstraint(EVT ConstraintVT) const; 616 617 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 618 /// vector. If it is invalid, don't add anything to Ops. If hasMemory is 619 /// true it means one of the asm constraint of the inline asm instruction 620 /// being processed is 'm'. 621 virtual void LowerAsmOperandForConstraint(SDValue Op, 622 std::string &Constraint, 623 std::vector<SDValue> &Ops, 624 SelectionDAG &DAG) const; 625 626 /// getRegForInlineAsmConstraint - Given a physical register constraint 627 /// (e.g. {edx}), return the register number and the register class for the 628 /// register. This should only be used for C_Register constraints. On 629 /// error, this returns a register number of 0. 630 std::pair<unsigned, const TargetRegisterClass*> 631 getRegForInlineAsmConstraint(const std::string &Constraint, 632 EVT VT) const; 633 634 /// isLegalAddressingMode - Return true if the addressing mode represented 635 /// by AM is legal for this target, for a load/store of the specified type. 636 virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const; 637 638 /// isTruncateFree - Return true if it's free to truncate a value of 639 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in 640 /// register EAX to i16 by referencing its sub-register AX. 641 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; 642 virtual bool isTruncateFree(EVT VT1, EVT VT2) const; 643 644 /// isZExtFree - Return true if any actual instruction that defines a 645 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result 646 /// register. This does not necessarily include registers defined in 647 /// unknown ways, such as incoming arguments, or copies from unknown 648 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this 649 /// does not necessarily apply to truncate instructions. e.g. on x86-64, 650 /// all instructions that define 32-bit values implicit zero-extend the 651 /// result out to 64 bits. 652 virtual bool isZExtFree(Type *Ty1, Type *Ty2) const; 653 virtual bool isZExtFree(EVT VT1, EVT VT2) const; 654 655 /// isNarrowingProfitable - Return true if it's profitable to narrow 656 /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow 657 /// from i32 to i8 but not from i32 to i16. 658 virtual bool isNarrowingProfitable(EVT VT1, EVT VT2) const; 659 660 /// isFPImmLegal - Returns true if the target can instruction select the 661 /// specified FP immediate natively. If false, the legalizer will 662 /// materialize the FP immediate as a load from a constant pool. 663 virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const; 664 665 /// isShuffleMaskLegal - Targets can use this to indicate that they only 666 /// support *some* VECTOR_SHUFFLE operations, those with specific masks. 667 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask 668 /// values are assumed to be legal. 669 virtual bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask, 670 EVT VT) const; 671 672 /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is 673 /// used by Targets can use this to indicate if there is a suitable 674 /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant 675 /// pool entry. 676 virtual bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask, 677 EVT VT) const; 678 679 /// ShouldShrinkFPConstant - If true, then instruction selection should 680 /// seek to shrink the FP constant of the specified type to a smaller type 681 /// in order to save space and / or reduce runtime. 682 virtual bool ShouldShrinkFPConstant(EVT VT) const { 683 // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more 684 // expensive than a straight movsd. On the other hand, it's important to 685 // shrink long double fp constant since fldt is very slow. 686 return !X86ScalarSSEf64 || VT == MVT::f80; 687 } 688 689 const X86Subtarget* getSubtarget() const { 690 return Subtarget; 691 } 692 693 /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is 694 /// computed in an SSE register, not on the X87 floating point stack. 695 bool isScalarFPTypeInSSEReg(EVT VT) const { 696 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 697 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 698 } 699 700 /// createFastISel - This method returns a target specific FastISel object, 701 /// or null if the target does not support "fast" ISel. 702 virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const; 703 704 /// getStackCookieLocation - Return true if the target stores stack 705 /// protector cookies at a fixed offset in some non-standard address 706 /// space, and populates the address space and offset as 707 /// appropriate. 708 virtual bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const; 709 710 SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, 711 SelectionDAG &DAG) const; 712 713 protected: 714 std::pair<const TargetRegisterClass*, uint8_t> 715 findRepresentativeClass(EVT VT) const; 716 717 private: 718 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 719 /// make the right decision when generating code for different targets. 720 const X86Subtarget *Subtarget; 721 const X86RegisterInfo *RegInfo; 722 const TargetData *TD; 723 724 /// X86StackPtr - X86 physical register used as stack ptr. 725 unsigned X86StackPtr; 726 727 /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 728 /// floating point ops. 729 /// When SSE is available, use it for f32 operations. 730 /// When SSE2 is available, use it for f64 operations. 731 bool X86ScalarSSEf32; 732 bool X86ScalarSSEf64; 733 734 /// LegalFPImmediates - A list of legal fp immediates. 735 std::vector<APFloat> LegalFPImmediates; 736 737 /// addLegalFPImmediate - Indicate that this x86 target can instruction 738 /// select the specified FP immediate natively. 739 void addLegalFPImmediate(const APFloat& Imm) { 740 LegalFPImmediates.push_back(Imm); 741 } 742 743 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 744 CallingConv::ID CallConv, bool isVarArg, 745 const SmallVectorImpl<ISD::InputArg> &Ins, 746 DebugLoc dl, SelectionDAG &DAG, 747 SmallVectorImpl<SDValue> &InVals) const; 748 SDValue LowerMemArgument(SDValue Chain, 749 CallingConv::ID CallConv, 750 const SmallVectorImpl<ISD::InputArg> &ArgInfo, 751 DebugLoc dl, SelectionDAG &DAG, 752 const CCValAssign &VA, MachineFrameInfo *MFI, 753 unsigned i) const; 754 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, 755 DebugLoc dl, SelectionDAG &DAG, 756 const CCValAssign &VA, 757 ISD::ArgFlagsTy Flags) const; 758 759 // Call lowering helpers. 760 761 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 762 /// for tail call optimization. Targets which want to do tail call 763 /// optimization should implement this function. 764 bool IsEligibleForTailCallOptimization(SDValue Callee, 765 CallingConv::ID CalleeCC, 766 bool isVarArg, 767 bool isCalleeStructRet, 768 bool isCallerStructRet, 769 const SmallVectorImpl<ISD::OutputArg> &Outs, 770 const SmallVectorImpl<SDValue> &OutVals, 771 const SmallVectorImpl<ISD::InputArg> &Ins, 772 SelectionDAG& DAG) const; 773 bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const; 774 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, 775 SDValue Chain, bool IsTailCall, bool Is64Bit, 776 int FPDiff, DebugLoc dl) const; 777 778 unsigned GetAlignedArgumentStackSize(unsigned StackSize, 779 SelectionDAG &DAG) const; 780 781 std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, 782 bool isSigned) const; 783 784 SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, 785 SelectionDAG &DAG) const; 786 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 787 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; 788 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; 789 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 790 SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const; 791 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 792 SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const; 793 SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; 794 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 795 SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 796 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 797 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 798 SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, 799 int64_t Offset, SelectionDAG &DAG) const; 800 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 801 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 802 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; 803 SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const; 804 SDValue LowerBITCAST(SDValue op, SelectionDAG &DAG) const; 805 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 806 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 807 SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; 808 SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; 809 SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; 810 SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; 811 SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const; 812 SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const; 813 SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; 814 SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) const; 815 SDValue LowerToBT(SDValue And, ISD::CondCode CC, 816 DebugLoc dl, SelectionDAG &DAG) const; 817 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 818 SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; 819 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 820 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; 821 SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const; 822 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 823 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 824 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 825 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 826 SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; 827 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 828 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 829 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 830 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; 831 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; 832 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 833 SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 834 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; 835 SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const; 836 SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; 837 SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const; 838 SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) const; 839 SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; 840 SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; 841 SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const; 842 843 SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const; 844 SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const; 845 SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const; 846 SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const; 847 SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; 848 SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; 849 850 // Utility functions to help LowerVECTOR_SHUFFLE 851 SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const; 852 853 virtual SDValue 854 LowerFormalArguments(SDValue Chain, 855 CallingConv::ID CallConv, bool isVarArg, 856 const SmallVectorImpl<ISD::InputArg> &Ins, 857 DebugLoc dl, SelectionDAG &DAG, 858 SmallVectorImpl<SDValue> &InVals) const; 859 virtual SDValue 860 LowerCall(SDValue Chain, SDValue Callee, 861 CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, 862 const SmallVectorImpl<ISD::OutputArg> &Outs, 863 const SmallVectorImpl<SDValue> &OutVals, 864 const SmallVectorImpl<ISD::InputArg> &Ins, 865 DebugLoc dl, SelectionDAG &DAG, 866 SmallVectorImpl<SDValue> &InVals) const; 867 868 virtual SDValue 869 LowerReturn(SDValue Chain, 870 CallingConv::ID CallConv, bool isVarArg, 871 const SmallVectorImpl<ISD::OutputArg> &Outs, 872 const SmallVectorImpl<SDValue> &OutVals, 873 DebugLoc dl, SelectionDAG &DAG) const; 874 875 virtual bool isUsedByReturnOnly(SDNode *N) const; 876 877 virtual bool mayBeEmittedAsTailCall(CallInst *CI) const; 878 879 virtual EVT 880 getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, 881 ISD::NodeType ExtendKind) const; 882 883 virtual bool 884 CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 885 bool isVarArg, 886 const SmallVectorImpl<ISD::OutputArg> &Outs, 887 LLVMContext &Context) const; 888 889 void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results, 890 SelectionDAG &DAG, unsigned NewOp) const; 891 892 /// Utility function to emit string processing sse4.2 instructions 893 /// that return in xmm0. 894 /// This takes the instruction to expand, the associated machine basic 895 /// block, the number of args, and whether or not the second arg is 896 /// in memory or not. 897 MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB, 898 unsigned argNum, bool inMem) const; 899 900 /// Utility functions to emit monitor and mwait instructions. These 901 /// need to make sure that the arguments to the intrinsic are in the 902 /// correct registers. 903 MachineBasicBlock *EmitMonitor(MachineInstr *MI, 904 MachineBasicBlock *BB) const; 905 MachineBasicBlock *EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const; 906 907 /// Utility function to emit atomic bitwise operations (and, or, xor). 908 /// It takes the bitwise instruction to expand, the associated machine basic 909 /// block, and the associated X86 opcodes for reg/reg and reg/imm. 910 MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter( 911 MachineInstr *BInstr, 912 MachineBasicBlock *BB, 913 unsigned regOpc, 914 unsigned immOpc, 915 unsigned loadOpc, 916 unsigned cxchgOpc, 917 unsigned notOpc, 918 unsigned EAXreg, 919 TargetRegisterClass *RC, 920 bool invSrc = false) const; 921 922 MachineBasicBlock *EmitAtomicBit6432WithCustomInserter( 923 MachineInstr *BInstr, 924 MachineBasicBlock *BB, 925 unsigned regOpcL, 926 unsigned regOpcH, 927 unsigned immOpcL, 928 unsigned immOpcH, 929 bool invSrc = false) const; 930 931 /// Utility function to emit atomic min and max. It takes the min/max 932 /// instruction to expand, the associated basic block, and the associated 933 /// cmov opcode for moving the min or max value. 934 MachineBasicBlock *EmitAtomicMinMaxWithCustomInserter(MachineInstr *BInstr, 935 MachineBasicBlock *BB, 936 unsigned cmovOpc) const; 937 938 // Utility function to emit the low-level va_arg code for X86-64. 939 MachineBasicBlock *EmitVAARG64WithCustomInserter( 940 MachineInstr *MI, 941 MachineBasicBlock *MBB) const; 942 943 /// Utility function to emit the xmm reg save portion of va_start. 944 MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter( 945 MachineInstr *BInstr, 946 MachineBasicBlock *BB) const; 947 948 MachineBasicBlock *EmitLoweredSelect(MachineInstr *I, 949 MachineBasicBlock *BB) const; 950 951 MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI, 952 MachineBasicBlock *BB) const; 953 954 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI, 955 MachineBasicBlock *BB, 956 bool Is64Bit) const; 957 958 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI, 959 MachineBasicBlock *BB) const; 960 961 MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI, 962 MachineBasicBlock *BB) const; 963 964 /// Emit nodes that will be selected as "test Op0,Op0", or something 965 /// equivalent, for use with the given x86 condition code. 966 SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const; 967 968 /// Emit nodes that will be selected as "cmp Op0,Op1", or something 969 /// equivalent, for use with the given x86 condition code. 970 SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, 971 SelectionDAG &DAG) const; 972 }; 973 974 namespace X86 { 975 FastISel *createFastISel(FunctionLoweringInfo &funcInfo); 976 } 977 } 978 979 #endif // X86ISELLOWERING_H 980