1 //===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the interfaces that PPC uses to lower LLVM code into a 11 // selection DAG. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H 16 #define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H 17 18 #include "PPC.h" 19 #include "PPCInstrInfo.h" 20 #include "PPCRegisterInfo.h" 21 #include "llvm/CodeGen/CallingConvLower.h" 22 #include "llvm/CodeGen/SelectionDAG.h" 23 #include "llvm/Target/TargetLowering.h" 24 25 namespace llvm { 26 namespace PPCISD { 27 enum NodeType : unsigned { 28 // Start the numbering where the builtin ops and target ops leave off. 29 FIRST_NUMBER = ISD::BUILTIN_OP_END, 30 31 /// FSEL - Traditional three-operand fsel node. 32 /// 33 FSEL, 34 35 /// FCFID - The FCFID instruction, taking an f64 operand and producing 36 /// and f64 value containing the FP representation of the integer that 37 /// was temporarily in the f64 operand. 38 FCFID, 39 40 /// Newer FCFID[US] integer-to-floating-point conversion instructions for 41 /// unsigned integers and single-precision outputs. 42 FCFIDU, FCFIDS, FCFIDUS, 43 44 /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 45 /// operand, producing an f64 value containing the integer representation 46 /// of that FP value. 47 FCTIDZ, FCTIWZ, 48 49 /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for 50 /// unsigned integers. 51 FCTIDUZ, FCTIWUZ, 52 53 /// Reciprocal estimate instructions (unary FP ops). 54 FRE, FRSQRTE, 55 56 // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking 57 // three v4f32 operands and producing a v4f32 result. 58 VMADDFP, VNMSUBFP, 59 60 /// VPERM - The PPC VPERM Instruction. 61 /// 62 VPERM, 63 64 /// The CMPB instruction (takes two operands of i32 or i64). 65 CMPB, 66 67 /// Hi/Lo - These represent the high and low 16-bit parts of a global 68 /// address respectively. These nodes have two operands, the first of 69 /// which must be a TargetGlobalAddress, and the second of which must be a 70 /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C', 71 /// though these are usually folded into other nodes. 72 Hi, Lo, 73 74 /// The following two target-specific nodes are used for calls through 75 /// function pointers in the 64-bit SVR4 ABI. 76 77 /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX) 78 /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to 79 /// compute an allocation on the stack. 80 DYNALLOC, 81 82 /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to 83 /// compute an offset from native SP to the address of the most recent 84 /// dynamic alloca. 85 DYNAREAOFFSET, 86 87 /// GlobalBaseReg - On Darwin, this node represents the result of the mflr 88 /// at function entry, used for PIC code. 89 GlobalBaseReg, 90 91 /// These nodes represent the 32-bit PPC shifts that operate on 6-bit 92 /// shift amounts. These nodes are generated by the multi-precision shift 93 /// code. 94 SRL, SRA, SHL, 95 96 /// The combination of sra[wd]i and addze used to implemented signed 97 /// integer division by a power of 2. The first operand is the dividend, 98 /// and the second is the constant shift amount (representing the 99 /// divisor). 100 SRA_ADDZE, 101 102 /// CALL - A direct function call. 103 /// CALL_NOP is a call with the special NOP which follows 64-bit 104 /// SVR4 calls. 105 CALL, CALL_NOP, 106 107 /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a 108 /// MTCTR instruction. 109 MTCTR, 110 111 /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a 112 /// BCTRL instruction. 113 BCTRL, 114 115 /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl 116 /// instruction and the TOC reload required on SVR4 PPC64. 117 BCTRL_LOAD_TOC, 118 119 /// Return with a flag operand, matched by 'blr' 120 RET_FLAG, 121 122 /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction. 123 /// This copies the bits corresponding to the specified CRREG into the 124 /// resultant GPR. Bits corresponding to other CR regs are undefined. 125 MFOCRF, 126 127 /// Direct move from a VSX register to a GPR 128 MFVSR, 129 130 /// Direct move from a GPR to a VSX register (algebraic) 131 MTVSRA, 132 133 /// Direct move from a GPR to a VSX register (zero) 134 MTVSRZ, 135 136 // FIXME: Remove these once the ANDI glue bug is fixed: 137 /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the 138 /// eq or gt bit of CR0 after executing andi. x, 1. This is used to 139 /// implement truncation of i32 or i64 to i1. 140 ANDIo_1_EQ_BIT, ANDIo_1_GT_BIT, 141 142 // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit 143 // target (returns (Lo, Hi)). It takes a chain operand. 144 READ_TIME_BASE, 145 146 // EH_SJLJ_SETJMP - SjLj exception handling setjmp. 147 EH_SJLJ_SETJMP, 148 149 // EH_SJLJ_LONGJMP - SjLj exception handling longjmp. 150 EH_SJLJ_LONGJMP, 151 152 /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* 153 /// instructions. For lack of better number, we use the opcode number 154 /// encoding for the OPC field to identify the compare. For example, 838 155 /// is VCMPGTSH. 156 VCMP, 157 158 /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the 159 /// altivec VCMP*o instructions. For lack of better number, we use the 160 /// opcode number encoding for the OPC field to identify the compare. For 161 /// example, 838 is VCMPGTSH. 162 VCMPo, 163 164 /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This 165 /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the 166 /// condition register to branch on, OPC is the branch opcode to use (e.g. 167 /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is 168 /// an optional input flag argument. 169 COND_BRANCH, 170 171 /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based 172 /// loops. 173 BDNZ, BDZ, 174 175 /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding 176 /// towards zero. Used only as part of the long double-to-int 177 /// conversion sequence. 178 FADDRTZ, 179 180 /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register. 181 MFFS, 182 183 /// TC_RETURN - A tail call return. 184 /// operand #0 chain 185 /// operand #1 callee (register or absolute) 186 /// operand #2 stack adjustment 187 /// operand #3 optional in flag 188 TC_RETURN, 189 190 /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls 191 CR6SET, 192 CR6UNSET, 193 194 /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS 195 /// on PPC32. 196 PPC32_GOT, 197 198 /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and 199 /// local dynamic TLS on PPC32. 200 PPC32_PICGOT, 201 202 /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec 203 /// TLS model, produces an ADDIS8 instruction that adds the GOT 204 /// base to sym\@got\@tprel\@ha. 205 ADDIS_GOT_TPREL_HA, 206 207 /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec 208 /// TLS model, produces a LD instruction with base register G8RReg 209 /// and offset sym\@got\@tprel\@l. This completes the addition that 210 /// finds the offset of "sym" relative to the thread pointer. 211 LD_GOT_TPREL_L, 212 213 /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS 214 /// model, produces an ADD instruction that adds the contents of 215 /// G8RReg to the thread pointer. Symbol contains a relocation 216 /// sym\@tls which is to be replaced by the thread pointer and 217 /// identifies to the linker that the instruction is part of a 218 /// TLS sequence. 219 ADD_TLS, 220 221 /// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS 222 /// model, produces an ADDIS8 instruction that adds the GOT base 223 /// register to sym\@got\@tlsgd\@ha. 224 ADDIS_TLSGD_HA, 225 226 /// %X3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS 227 /// model, produces an ADDI8 instruction that adds G8RReg to 228 /// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by 229 /// ADDIS_TLSGD_L_ADDR until after register assignment. 230 ADDI_TLSGD_L, 231 232 /// %X3 = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS 233 /// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by 234 /// ADDIS_TLSGD_L_ADDR until after register assignment. 235 GET_TLS_ADDR, 236 237 /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that 238 /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following 239 /// register assignment. 240 ADDI_TLSGD_L_ADDR, 241 242 /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS 243 /// model, produces an ADDIS8 instruction that adds the GOT base 244 /// register to sym\@got\@tlsld\@ha. 245 ADDIS_TLSLD_HA, 246 247 /// %X3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS 248 /// model, produces an ADDI8 instruction that adds G8RReg to 249 /// sym\@got\@tlsld\@l and stores the result in X3. Hidden by 250 /// ADDIS_TLSLD_L_ADDR until after register assignment. 251 ADDI_TLSLD_L, 252 253 /// %X3 = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS 254 /// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by 255 /// ADDIS_TLSLD_L_ADDR until after register assignment. 256 GET_TLSLD_ADDR, 257 258 /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that 259 /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion 260 /// following register assignment. 261 ADDI_TLSLD_L_ADDR, 262 263 /// G8RC = ADDIS_DTPREL_HA %X3, Symbol - For the local-dynamic TLS 264 /// model, produces an ADDIS8 instruction that adds X3 to 265 /// sym\@dtprel\@ha. 266 ADDIS_DTPREL_HA, 267 268 /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS 269 /// model, produces an ADDI8 instruction that adds G8RReg to 270 /// sym\@got\@dtprel\@l. 271 ADDI_DTPREL_L, 272 273 /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded 274 /// during instruction selection to optimize a BUILD_VECTOR into 275 /// operations on splats. This is necessary to avoid losing these 276 /// optimizations due to constant folding. 277 VADD_SPLAT, 278 279 /// CHAIN = SC CHAIN, Imm128 - System call. The 7-bit unsigned 280 /// operand identifies the operating system entry point. 281 SC, 282 283 /// CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer. 284 CLRBHRB, 285 286 /// GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch 287 /// history rolling buffer entry. 288 MFBHRBE, 289 290 /// CHAIN = RFEBB CHAIN, State - Return from event-based branch. 291 RFEBB, 292 293 /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little 294 /// endian. Maps to an xxswapd instruction that corrects an lxvd2x 295 /// or stxvd2x instruction. The chain is necessary because the 296 /// sequence replaces a load and needs to provide the same number 297 /// of outputs. 298 XXSWAPD, 299 300 /// QVFPERM = This corresponds to the QPX qvfperm instruction. 301 QVFPERM, 302 303 /// QVGPCI = This corresponds to the QPX qvgpci instruction. 304 QVGPCI, 305 306 /// QVALIGNI = This corresponds to the QPX qvaligni instruction. 307 QVALIGNI, 308 309 /// QVESPLATI = This corresponds to the QPX qvesplati instruction. 310 QVESPLATI, 311 312 /// QBFLT = Access the underlying QPX floating-point boolean 313 /// representation. 314 QBFLT, 315 316 /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a 317 /// byte-swapping store instruction. It byte-swaps the low "Type" bits of 318 /// the GPRC input, then stores it through Ptr. Type can be either i16 or 319 /// i32. 320 STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE, 321 322 /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a 323 /// byte-swapping load instruction. It loads "Type" bits, byte swaps it, 324 /// then puts it in the bottom bits of the GPRC. TYPE can be either i16 325 /// or i32. 326 LBRX, 327 328 /// STFIWX - The STFIWX instruction. The first operand is an input token 329 /// chain, then an f64 value to store, then an address to store it to. 330 STFIWX, 331 332 /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point 333 /// load which sign-extends from a 32-bit integer value into the 334 /// destination 64-bit register. 335 LFIWAX, 336 337 /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point 338 /// load which zero-extends from a 32-bit integer value into the 339 /// destination 64-bit register. 340 LFIWZX, 341 342 /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian. 343 /// Maps directly to an lxvd2x instruction that will be followed by 344 /// an xxswapd. 345 LXVD2X, 346 347 /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian. 348 /// Maps directly to an stxvd2x instruction that will be preceded by 349 /// an xxswapd. 350 STXVD2X, 351 352 /// QBRC, CHAIN = QVLFSb CHAIN, Ptr 353 /// The 4xf32 load used for v4i1 constants. 354 QVLFSb, 355 356 /// GPRC = TOC_ENTRY GA, TOC 357 /// Loads the entry for GA from the TOC, where the TOC base is given by 358 /// the last operand. 359 TOC_ENTRY 360 }; 361 } 362 363 /// Define some predicates that are used for node matching. 364 namespace PPC { 365 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 366 /// VPKUHUM instruction. 367 bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 368 SelectionDAG &DAG); 369 370 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 371 /// VPKUWUM instruction. 372 bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 373 SelectionDAG &DAG); 374 375 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a 376 /// VPKUDUM instruction. 377 bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 378 SelectionDAG &DAG); 379 380 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 381 /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). 382 bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 383 unsigned ShuffleKind, SelectionDAG &DAG); 384 385 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 386 /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). 387 bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 388 unsigned ShuffleKind, SelectionDAG &DAG); 389 390 /// isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for 391 /// a VMRGEW or VMRGOW instruction 392 bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, 393 unsigned ShuffleKind, SelectionDAG &DAG); 394 395 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the 396 /// shift amount, otherwise return -1. 397 int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, 398 SelectionDAG &DAG); 399 400 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 401 /// specifies a splat of a single element that is suitable for input to 402 /// VSPLTB/VSPLTH/VSPLTW. 403 bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize); 404 405 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 406 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 407 unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG); 408 409 /// get_VSPLTI_elt - If this is a build_vector of constants which can be 410 /// formed by using a vspltis[bhw] instruction of the specified element 411 /// size, return the constant being splatted. The ByteSize field indicates 412 /// the number of bytes of each element [124] -> [bhw]. 413 SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG); 414 415 /// If this is a qvaligni shuffle mask, return the shift 416 /// amount, otherwise return -1. 417 int isQVALIGNIShuffleMask(SDNode *N); 418 } 419 420 class PPCTargetLowering : public TargetLowering { 421 const PPCSubtarget &Subtarget; 422 423 public: 424 explicit PPCTargetLowering(const PPCTargetMachine &TM, 425 const PPCSubtarget &STI); 426 427 /// getTargetNodeName() - This method returns the name of a target specific 428 /// DAG node. 429 const char *getTargetNodeName(unsigned Opcode) const override; 430 431 bool useSoftFloat() const override; 432 433 MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { 434 return MVT::i32; 435 } 436 437 bool isCheapToSpeculateCttz() const override { 438 return true; 439 } 440 441 bool isCheapToSpeculateCtlz() const override { 442 return true; 443 } 444 445 /// getSetCCResultType - Return the ISD::SETCC ValueType 446 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 447 EVT VT) const override; 448 449 /// Return true if target always beneficiates from combining into FMA for a 450 /// given value type. This must typically return false on targets where FMA 451 /// takes more cycles to execute than FADD. 452 bool enableAggressiveFMAFusion(EVT VT) const override; 453 454 /// getPreIndexedAddressParts - returns true by value, base pointer and 455 /// offset pointer and addressing mode by reference if the node's address 456 /// can be legally represented as pre-indexed load / store address. 457 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, 458 SDValue &Offset, 459 ISD::MemIndexedMode &AM, 460 SelectionDAG &DAG) const override; 461 462 /// SelectAddressRegReg - Given the specified addressed, check to see if it 463 /// can be represented as an indexed [r+r] operation. Returns false if it 464 /// can be more efficiently represented with [r+imm]. 465 bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, 466 SelectionDAG &DAG) const; 467 468 /// SelectAddressRegImm - Returns true if the address N can be represented 469 /// by a base register plus a signed 16-bit displacement [r+imm], and if it 470 /// is not better represented as reg+reg. If Aligned is true, only accept 471 /// displacements suitable for STD and friends, i.e. multiples of 4. 472 bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, 473 SelectionDAG &DAG, bool Aligned) const; 474 475 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be 476 /// represented as an indexed [r+r] operation. 477 bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, 478 SelectionDAG &DAG) const; 479 480 Sched::Preference getSchedulingPreference(SDNode *N) const override; 481 482 /// LowerOperation - Provide custom lowering hooks for some operations. 483 /// 484 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 485 486 /// ReplaceNodeResults - Replace the results of node with an illegal result 487 /// type with new values built out of custom code. 488 /// 489 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, 490 SelectionDAG &DAG) const override; 491 492 SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const; 493 SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const; 494 495 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 496 497 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 498 std::vector<SDNode *> *Created) const override; 499 500 unsigned getRegisterByName(const char* RegName, EVT VT, 501 SelectionDAG &DAG) const override; 502 503 void computeKnownBitsForTargetNode(const SDValue Op, 504 APInt &KnownZero, 505 APInt &KnownOne, 506 const SelectionDAG &DAG, 507 unsigned Depth = 0) const override; 508 509 unsigned getPrefLoopAlignment(MachineLoop *ML) const override; 510 511 Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, 512 bool IsStore, bool IsLoad) const override; 513 Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, 514 bool IsStore, bool IsLoad) const override; 515 516 MachineBasicBlock * 517 EmitInstrWithCustomInserter(MachineInstr *MI, 518 MachineBasicBlock *MBB) const override; 519 MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, 520 MachineBasicBlock *MBB, 521 unsigned AtomicSize, 522 unsigned BinOpcode) const; 523 MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI, 524 MachineBasicBlock *MBB, 525 bool is8bit, unsigned Opcode) const; 526 527 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI, 528 MachineBasicBlock *MBB) const; 529 530 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI, 531 MachineBasicBlock *MBB) const; 532 533 ConstraintType getConstraintType(StringRef Constraint) const override; 534 535 /// Examine constraint string and operand type and determine a weight value. 536 /// The operand object must already have been set up with the operand type. 537 ConstraintWeight getSingleConstraintMatchWeight( 538 AsmOperandInfo &info, const char *constraint) const override; 539 540 std::pair<unsigned, const TargetRegisterClass *> 541 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 542 StringRef Constraint, MVT VT) const override; 543 544 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 545 /// function arguments in the caller parameter area. This is the actual 546 /// alignment, not its logarithm. 547 unsigned getByValTypeAlignment(Type *Ty, 548 const DataLayout &DL) const override; 549 550 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 551 /// vector. If it is invalid, don't add anything to Ops. 552 void LowerAsmOperandForConstraint(SDValue Op, 553 std::string &Constraint, 554 std::vector<SDValue> &Ops, 555 SelectionDAG &DAG) const override; 556 557 unsigned 558 getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 559 if (ConstraintCode == "es") 560 return InlineAsm::Constraint_es; 561 else if (ConstraintCode == "o") 562 return InlineAsm::Constraint_o; 563 else if (ConstraintCode == "Q") 564 return InlineAsm::Constraint_Q; 565 else if (ConstraintCode == "Z") 566 return InlineAsm::Constraint_Z; 567 else if (ConstraintCode == "Zy") 568 return InlineAsm::Constraint_Zy; 569 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 570 } 571 572 /// isLegalAddressingMode - Return true if the addressing mode represented 573 /// by AM is legal for this target, for a load/store of the specified type. 574 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, 575 Type *Ty, unsigned AS) const override; 576 577 /// isLegalICmpImmediate - Return true if the specified immediate is legal 578 /// icmp immediate, that is the target has icmp instructions which can 579 /// compare a register against the immediate without having to materialize 580 /// the immediate into a register. 581 bool isLegalICmpImmediate(int64_t Imm) const override; 582 583 /// isLegalAddImmediate - Return true if the specified immediate is legal 584 /// add immediate, that is the target has add instructions which can 585 /// add a register and the immediate without having to materialize 586 /// the immediate into a register. 587 bool isLegalAddImmediate(int64_t Imm) const override; 588 589 /// isTruncateFree - Return true if it's free to truncate a value of 590 /// type Ty1 to type Ty2. e.g. On PPC it's free to truncate a i64 value in 591 /// register X1 to i32 by referencing its sub-register R1. 592 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 593 bool isTruncateFree(EVT VT1, EVT VT2) const override; 594 595 bool isZExtFree(SDValue Val, EVT VT2) const override; 596 597 bool isFPExtFree(EVT VT) const override; 598 599 /// \brief Returns true if it is beneficial to convert a load of a constant 600 /// to just the constant itself. 601 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 602 Type *Ty) const override; 603 604 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; 605 606 bool getTgtMemIntrinsic(IntrinsicInfo &Info, 607 const CallInst &I, 608 unsigned Intrinsic) const override; 609 610 /// getOptimalMemOpType - Returns the target specific optimal type for load 611 /// and store operations as a result of memset, memcpy, and memmove 612 /// lowering. If DstAlign is zero that means it's safe to destination 613 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 614 /// means there isn't a need to check it against alignment requirement, 615 /// probably because the source does not need to be loaded. If 'IsMemset' is 616 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 617 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 618 /// source is constant so it does not need to be loaded. 619 /// It returns EVT::Other if the type should be determined using generic 620 /// target-independent logic. 621 EVT 622 getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, 623 bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, 624 MachineFunction &MF) const override; 625 626 /// Is unaligned memory access allowed for the given type, and is it fast 627 /// relative to software emulation. 628 bool allowsMisalignedMemoryAccesses(EVT VT, 629 unsigned AddrSpace, 630 unsigned Align = 1, 631 bool *Fast = nullptr) const override; 632 633 /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster 634 /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be 635 /// expanded to FMAs when this method returns true, otherwise fmuladd is 636 /// expanded to fmul + fadd. 637 bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; 638 639 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 640 641 // Should we expand the build vector with shuffles? 642 bool 643 shouldExpandBuildVectorWithShuffles(EVT VT, 644 unsigned DefinedValues) const override; 645 646 /// createFastISel - This method returns a target-specific FastISel object, 647 /// or null if the target does not support "fast" instruction selection. 648 FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, 649 const TargetLibraryInfo *LibInfo) const override; 650 651 /// \brief Returns true if an argument of type Ty needs to be passed in a 652 /// contiguous block of registers in calling convention CallConv. 653 bool functionArgumentNeedsConsecutiveRegisters( 654 Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override { 655 // We support any array type as "consecutive" block in the parameter 656 // save area. The element type defines the alignment requirement and 657 // whether the argument should go in GPRs, FPRs, or VRs if available. 658 // 659 // Note that clang uses this capability both to implement the ELFv2 660 // homogeneous float/vector aggregate ABI, and to avoid having to use 661 // "byval" when passing aggregates that might fully fit in registers. 662 return Ty->isArrayTy(); 663 } 664 665 /// If a physical register, this returns the register that receives the 666 /// exception address on entry to an EH pad. 667 unsigned 668 getExceptionPointerRegister(const Constant *PersonalityFn) const override; 669 670 /// If a physical register, this returns the register that receives the 671 /// exception typeid on entry to a landing pad. 672 unsigned 673 getExceptionSelectorRegister(const Constant *PersonalityFn) const override; 674 675 private: 676 struct ReuseLoadInfo { 677 SDValue Ptr; 678 SDValue Chain; 679 SDValue ResChain; 680 MachinePointerInfo MPI; 681 bool IsInvariant; 682 unsigned Alignment; 683 AAMDNodes AAInfo; 684 const MDNode *Ranges; 685 686 ReuseLoadInfo() : IsInvariant(false), Alignment(0), Ranges(nullptr) {} 687 }; 688 689 bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI, 690 SelectionDAG &DAG, 691 ISD::LoadExtType ET = ISD::NON_EXTLOAD) const; 692 void spliceIntoChain(SDValue ResChain, SDValue NewResChain, 693 SelectionDAG &DAG) const; 694 695 void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, 696 SelectionDAG &DAG, SDLoc dl) const; 697 SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG, 698 SDLoc dl) const; 699 SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG, 700 SDLoc dl) const; 701 702 SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; 703 SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const; 704 705 bool 706 IsEligibleForTailCallOptimization(SDValue Callee, 707 CallingConv::ID CalleeCC, 708 bool isVarArg, 709 const SmallVectorImpl<ISD::InputArg> &Ins, 710 SelectionDAG& DAG) const; 711 712 SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, 713 int SPDiff, 714 SDValue Chain, 715 SDValue &LROpOut, 716 SDValue &FPOpOut, 717 bool isDarwinABI, 718 SDLoc dl) const; 719 720 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 721 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 722 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 723 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 724 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 725 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 726 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 727 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 728 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 729 SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 730 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, 731 const PPCSubtarget &Subtarget) const; 732 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG, 733 const PPCSubtarget &Subtarget) const; 734 SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG, 735 const PPCSubtarget &Subtarget) const; 736 SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, 737 const PPCSubtarget &Subtarget) const; 738 SDValue LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG, 739 const PPCSubtarget &Subtarget) const; 740 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG, 741 const PPCSubtarget &Subtarget) const; 742 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; 743 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 744 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 745 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; 746 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDLoc dl) const; 747 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 748 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; 749 SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const; 750 SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const; 751 SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const; 752 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 753 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; 754 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 755 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 756 SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; 757 SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; 758 SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; 759 760 SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const; 761 SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const; 762 763 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 764 CallingConv::ID CallConv, bool isVarArg, 765 const SmallVectorImpl<ISD::InputArg> &Ins, 766 SDLoc dl, SelectionDAG &DAG, 767 SmallVectorImpl<SDValue> &InVals) const; 768 SDValue FinishCall(CallingConv::ID CallConv, SDLoc dl, bool isTailCall, 769 bool isVarArg, bool IsPatchPoint, bool hasNest, 770 SelectionDAG &DAG, 771 SmallVector<std::pair<unsigned, SDValue>, 8> 772 &RegsToPass, 773 SDValue InFlag, SDValue Chain, SDValue CallSeqStart, 774 SDValue &Callee, 775 int SPDiff, unsigned NumBytes, 776 const SmallVectorImpl<ISD::InputArg> &Ins, 777 SmallVectorImpl<SDValue> &InVals, 778 ImmutableCallSite *CS) const; 779 780 SDValue 781 LowerFormalArguments(SDValue Chain, 782 CallingConv::ID CallConv, bool isVarArg, 783 const SmallVectorImpl<ISD::InputArg> &Ins, 784 SDLoc dl, SelectionDAG &DAG, 785 SmallVectorImpl<SDValue> &InVals) const override; 786 787 SDValue 788 LowerCall(TargetLowering::CallLoweringInfo &CLI, 789 SmallVectorImpl<SDValue> &InVals) const override; 790 791 bool 792 CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 793 bool isVarArg, 794 const SmallVectorImpl<ISD::OutputArg> &Outs, 795 LLVMContext &Context) const override; 796 797 SDValue 798 LowerReturn(SDValue Chain, 799 CallingConv::ID CallConv, bool isVarArg, 800 const SmallVectorImpl<ISD::OutputArg> &Outs, 801 const SmallVectorImpl<SDValue> &OutVals, 802 SDLoc dl, SelectionDAG &DAG) const override; 803 804 SDValue 805 extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG, 806 SDValue ArgVal, SDLoc dl) const; 807 808 SDValue 809 LowerFormalArguments_Darwin(SDValue Chain, 810 CallingConv::ID CallConv, bool isVarArg, 811 const SmallVectorImpl<ISD::InputArg> &Ins, 812 SDLoc dl, SelectionDAG &DAG, 813 SmallVectorImpl<SDValue> &InVals) const; 814 SDValue 815 LowerFormalArguments_64SVR4(SDValue Chain, 816 CallingConv::ID CallConv, bool isVarArg, 817 const SmallVectorImpl<ISD::InputArg> &Ins, 818 SDLoc dl, SelectionDAG &DAG, 819 SmallVectorImpl<SDValue> &InVals) const; 820 SDValue 821 LowerFormalArguments_32SVR4(SDValue Chain, 822 CallingConv::ID CallConv, bool isVarArg, 823 const SmallVectorImpl<ISD::InputArg> &Ins, 824 SDLoc dl, SelectionDAG &DAG, 825 SmallVectorImpl<SDValue> &InVals) const; 826 827 SDValue 828 createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, 829 SDValue CallSeqStart, ISD::ArgFlagsTy Flags, 830 SelectionDAG &DAG, SDLoc dl) const; 831 832 SDValue 833 LowerCall_Darwin(SDValue Chain, SDValue Callee, 834 CallingConv::ID CallConv, 835 bool isVarArg, bool isTailCall, bool IsPatchPoint, 836 const SmallVectorImpl<ISD::OutputArg> &Outs, 837 const SmallVectorImpl<SDValue> &OutVals, 838 const SmallVectorImpl<ISD::InputArg> &Ins, 839 SDLoc dl, SelectionDAG &DAG, 840 SmallVectorImpl<SDValue> &InVals, 841 ImmutableCallSite *CS) const; 842 SDValue 843 LowerCall_64SVR4(SDValue Chain, SDValue Callee, 844 CallingConv::ID CallConv, 845 bool isVarArg, bool isTailCall, bool IsPatchPoint, 846 const SmallVectorImpl<ISD::OutputArg> &Outs, 847 const SmallVectorImpl<SDValue> &OutVals, 848 const SmallVectorImpl<ISD::InputArg> &Ins, 849 SDLoc dl, SelectionDAG &DAG, 850 SmallVectorImpl<SDValue> &InVals, 851 ImmutableCallSite *CS) const; 852 SDValue 853 LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, 854 bool isVarArg, bool isTailCall, bool IsPatchPoint, 855 const SmallVectorImpl<ISD::OutputArg> &Outs, 856 const SmallVectorImpl<SDValue> &OutVals, 857 const SmallVectorImpl<ISD::InputArg> &Ins, 858 SDLoc dl, SelectionDAG &DAG, 859 SmallVectorImpl<SDValue> &InVals, 860 ImmutableCallSite *CS) const; 861 862 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; 863 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; 864 865 SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const; 866 SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const; 867 SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const; 868 869 SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, 870 unsigned &RefinementSteps, 871 bool &UseOneConstNR) const override; 872 SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, 873 unsigned &RefinementSteps) const override; 874 unsigned combineRepeatedFPDivisors() const override; 875 876 CCAssignFn *useFastISelCCs(unsigned Flag) const; 877 }; 878 879 namespace PPC { 880 FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, 881 const TargetLibraryInfo *LibInfo); 882 } 883 884 bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 885 CCValAssign::LocInfo &LocInfo, 886 ISD::ArgFlagsTy &ArgFlags, 887 CCState &State); 888 889 bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, 890 MVT &LocVT, 891 CCValAssign::LocInfo &LocInfo, 892 ISD::ArgFlagsTy &ArgFlags, 893 CCState &State); 894 895 bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, 896 MVT &LocVT, 897 CCValAssign::LocInfo &LocInfo, 898 ISD::ArgFlagsTy &ArgFlags, 899 CCState &State); 900 } 901 902 #endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H 903