1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the PPCISelLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "PPCISelLowering.h" 15 #include "PPCMachineFunctionInfo.h" 16 #include "PPCPerfectShuffle.h" 17 #include "PPCTargetMachine.h" 18 #include "MCTargetDesc/PPCPredicates.h" 19 #include "llvm/CallingConv.h" 20 #include "llvm/Constants.h" 21 #include "llvm/DerivedTypes.h" 22 #include "llvm/Function.h" 23 #include "llvm/Intrinsics.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/CodeGen/CallingConvLower.h" 26 #include "llvm/CodeGen/MachineFrameInfo.h" 27 #include "llvm/CodeGen/MachineFunction.h" 28 #include "llvm/CodeGen/MachineInstrBuilder.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/CodeGen/SelectionDAG.h" 31 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 32 #include "llvm/Support/CommandLine.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/Support/MathExtras.h" 35 #include "llvm/Support/raw_ostream.h" 36 #include "llvm/Target/TargetOptions.h" 37 using namespace llvm; 38 39 static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 40 CCValAssign::LocInfo &LocInfo, 41 ISD::ArgFlagsTy &ArgFlags, 42 CCState &State); 43 static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, 44 MVT &LocVT, 45 CCValAssign::LocInfo &LocInfo, 46 ISD::ArgFlagsTy &ArgFlags, 47 CCState &State); 48 static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, 49 MVT &LocVT, 50 CCValAssign::LocInfo &LocInfo, 51 ISD::ArgFlagsTy &ArgFlags, 52 CCState &State); 53 54 static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc", 55 cl::desc("enable preincrement load/store generation on PPC (experimental)"), 56 cl::Hidden); 57 58 static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { 59 if (TM.getSubtargetImpl()->isDarwin()) 60 return new TargetLoweringObjectFileMachO(); 61 62 return new TargetLoweringObjectFileELF(); 63 } 64 65 PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) 66 : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) { 67 68 setPow2DivIsCheap(); 69 70 // Use _setjmp/_longjmp instead of setjmp/longjmp. 71 setUseUnderscoreSetJmp(true); 72 setUseUnderscoreLongJmp(true); 73 74 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all 75 // arguments are at least 4/8 bytes aligned. 76 setMinStackArgumentAlignment(TM.getSubtarget<PPCSubtarget>().isPPC64() ? 8:4); 77 78 // Set up the register classes. 79 addRegisterClass(MVT::i32, PPC::GPRCRegisterClass); 80 addRegisterClass(MVT::f32, PPC::F4RCRegisterClass); 81 addRegisterClass(MVT::f64, PPC::F8RCRegisterClass); 82 83 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 84 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 85 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand); 86 87 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 88 89 // PowerPC has pre-inc load and store's. 90 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); 91 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); 92 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); 93 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); 94 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); 95 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); 96 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); 97 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); 98 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); 99 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); 100 101 // This is used in the ppcf128->int sequence. Note it has different semantics 102 // from FP_ROUND: that rounds to nearest, this rounds to zero. 103 setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); 104 105 // We do not currently implment this libm ops for PowerPC. 106 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); 107 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); 108 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); 109 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); 110 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); 111 112 // PowerPC has no SREM/UREM instructions 113 setOperationAction(ISD::SREM, MVT::i32, Expand); 114 setOperationAction(ISD::UREM, MVT::i32, Expand); 115 setOperationAction(ISD::SREM, MVT::i64, Expand); 116 setOperationAction(ISD::UREM, MVT::i64, Expand); 117 118 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. 119 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 120 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 121 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 122 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 123 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 124 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 125 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 126 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 127 128 // We don't support sin/cos/sqrt/fmod/pow 129 setOperationAction(ISD::FSIN , MVT::f64, Expand); 130 setOperationAction(ISD::FCOS , MVT::f64, Expand); 131 setOperationAction(ISD::FREM , MVT::f64, Expand); 132 setOperationAction(ISD::FPOW , MVT::f64, Expand); 133 setOperationAction(ISD::FMA , MVT::f64, Expand); 134 setOperationAction(ISD::FSIN , MVT::f32, Expand); 135 setOperationAction(ISD::FCOS , MVT::f32, Expand); 136 setOperationAction(ISD::FREM , MVT::f32, Expand); 137 setOperationAction(ISD::FPOW , MVT::f32, Expand); 138 setOperationAction(ISD::FMA , MVT::f32, Expand); 139 140 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 141 142 // If we're enabling GP optimizations, use hardware square root 143 if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) { 144 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 145 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 146 } 147 148 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 149 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 150 151 // PowerPC does not have BSWAP, CTPOP or CTTZ 152 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 153 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 154 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 155 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); 156 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); 157 setOperationAction(ISD::BSWAP, MVT::i64 , Expand); 158 setOperationAction(ISD::CTPOP, MVT::i64 , Expand); 159 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 160 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); 161 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); 162 163 // PowerPC does not have ROTR 164 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 165 setOperationAction(ISD::ROTR, MVT::i64 , Expand); 166 167 // PowerPC does not have Select 168 setOperationAction(ISD::SELECT, MVT::i32, Expand); 169 setOperationAction(ISD::SELECT, MVT::i64, Expand); 170 setOperationAction(ISD::SELECT, MVT::f32, Expand); 171 setOperationAction(ISD::SELECT, MVT::f64, Expand); 172 173 // PowerPC wants to turn select_cc of FP into fsel when possible. 174 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 175 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 176 177 // PowerPC wants to optimize integer setcc a bit 178 setOperationAction(ISD::SETCC, MVT::i32, Custom); 179 180 // PowerPC does not have BRCOND which requires SetCC 181 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 182 183 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 184 185 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 186 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 187 188 // PowerPC does not have [U|S]INT_TO_FP 189 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 190 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 191 192 setOperationAction(ISD::BITCAST, MVT::f32, Expand); 193 setOperationAction(ISD::BITCAST, MVT::i32, Expand); 194 setOperationAction(ISD::BITCAST, MVT::i64, Expand); 195 setOperationAction(ISD::BITCAST, MVT::f64, Expand); 196 197 // We cannot sextinreg(i1). Expand to shifts. 198 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 199 200 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 201 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 202 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 203 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 204 205 206 // We want to legalize GlobalAddress and ConstantPool nodes into the 207 // appropriate instructions to materialize the address. 208 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 209 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 210 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 211 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 212 setOperationAction(ISD::JumpTable, MVT::i32, Custom); 213 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 214 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 215 setOperationAction(ISD::BlockAddress, MVT::i64, Custom); 216 setOperationAction(ISD::ConstantPool, MVT::i64, Custom); 217 setOperationAction(ISD::JumpTable, MVT::i64, Custom); 218 219 // TRAP is legal. 220 setOperationAction(ISD::TRAP, MVT::Other, Legal); 221 222 // TRAMPOLINE is custom lowered. 223 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); 224 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); 225 226 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 227 setOperationAction(ISD::VASTART , MVT::Other, Custom); 228 229 if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) { 230 if (TM.getSubtarget<PPCSubtarget>().isPPC64()) { 231 // VAARG always uses double-word chunks, so promote anything smaller. 232 setOperationAction(ISD::VAARG, MVT::i1, Promote); 233 AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); 234 setOperationAction(ISD::VAARG, MVT::i8, Promote); 235 AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); 236 setOperationAction(ISD::VAARG, MVT::i16, Promote); 237 AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); 238 setOperationAction(ISD::VAARG, MVT::i32, Promote); 239 AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); 240 setOperationAction(ISD::VAARG, MVT::Other, Expand); 241 } else { 242 // VAARG is custom lowered with the 32-bit SVR4 ABI. 243 setOperationAction(ISD::VAARG, MVT::Other, Custom); 244 setOperationAction(ISD::VAARG, MVT::i64, Custom); 245 } 246 } else 247 setOperationAction(ISD::VAARG, MVT::Other, Expand); 248 249 // Use the default implementation. 250 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 251 setOperationAction(ISD::VAEND , MVT::Other, Expand); 252 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 253 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); 254 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 255 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); 256 257 // We want to custom lower some of our intrinsics. 258 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 259 260 // Comparisons that require checking two conditions. 261 setCondCodeAction(ISD::SETULT, MVT::f32, Expand); 262 setCondCodeAction(ISD::SETULT, MVT::f64, Expand); 263 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 264 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 265 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); 266 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); 267 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 268 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 269 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); 270 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); 271 setCondCodeAction(ISD::SETONE, MVT::f32, Expand); 272 setCondCodeAction(ISD::SETONE, MVT::f64, Expand); 273 274 if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { 275 // They also have instructions for converting between i64 and fp. 276 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 277 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 278 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 279 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 280 // This is just the low 32 bits of a (signed) fp->i64 conversion. 281 // We cannot do this with Promote because i64 is not a legal type. 282 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 283 284 // FIXME: disable this lowered code. This generates 64-bit register values, 285 // and we don't model the fact that the top part is clobbered by calls. We 286 // need to flag these together so that the value isn't live across a call. 287 //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 288 } else { 289 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 290 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 291 } 292 293 if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) { 294 // 64-bit PowerPC implementations can support i64 types directly 295 addRegisterClass(MVT::i64, PPC::G8RCRegisterClass); 296 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 297 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 298 // 64-bit PowerPC wants to expand i128 shifts itself. 299 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); 300 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); 301 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); 302 } else { 303 // 32-bit PowerPC wants to expand i64 shifts itself. 304 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 305 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 306 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 307 } 308 309 if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) { 310 // First set operation action for all vector types to expand. Then we 311 // will selectively turn on ones that can be effectively codegen'd. 312 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 313 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { 314 MVT::SimpleValueType VT = (MVT::SimpleValueType)i; 315 316 // add/sub are legal for all supported vector VT's. 317 setOperationAction(ISD::ADD , VT, Legal); 318 setOperationAction(ISD::SUB , VT, Legal); 319 320 // We promote all shuffles to v16i8. 321 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); 322 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); 323 324 // We promote all non-typed operations to v4i32. 325 setOperationAction(ISD::AND , VT, Promote); 326 AddPromotedToType (ISD::AND , VT, MVT::v4i32); 327 setOperationAction(ISD::OR , VT, Promote); 328 AddPromotedToType (ISD::OR , VT, MVT::v4i32); 329 setOperationAction(ISD::XOR , VT, Promote); 330 AddPromotedToType (ISD::XOR , VT, MVT::v4i32); 331 setOperationAction(ISD::LOAD , VT, Promote); 332 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); 333 setOperationAction(ISD::SELECT, VT, Promote); 334 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); 335 setOperationAction(ISD::STORE, VT, Promote); 336 AddPromotedToType (ISD::STORE, VT, MVT::v4i32); 337 338 // No other operations are legal. 339 setOperationAction(ISD::MUL , VT, Expand); 340 setOperationAction(ISD::SDIV, VT, Expand); 341 setOperationAction(ISD::SREM, VT, Expand); 342 setOperationAction(ISD::UDIV, VT, Expand); 343 setOperationAction(ISD::UREM, VT, Expand); 344 setOperationAction(ISD::FDIV, VT, Expand); 345 setOperationAction(ISD::FNEG, VT, Expand); 346 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); 347 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); 348 setOperationAction(ISD::BUILD_VECTOR, VT, Expand); 349 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 350 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 351 setOperationAction(ISD::UDIVREM, VT, Expand); 352 setOperationAction(ISD::SDIVREM, VT, Expand); 353 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); 354 setOperationAction(ISD::FPOW, VT, Expand); 355 setOperationAction(ISD::CTPOP, VT, Expand); 356 setOperationAction(ISD::CTLZ, VT, Expand); 357 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); 358 setOperationAction(ISD::CTTZ, VT, Expand); 359 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); 360 } 361 362 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 363 // with merges, splats, etc. 364 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 365 366 setOperationAction(ISD::AND , MVT::v4i32, Legal); 367 setOperationAction(ISD::OR , MVT::v4i32, Legal); 368 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 369 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 370 setOperationAction(ISD::SELECT, MVT::v4i32, Expand); 371 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 372 373 addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass); 374 addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass); 375 addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass); 376 addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass); 377 378 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 379 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 380 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 381 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 382 383 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 384 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 385 386 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 387 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 388 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 389 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 390 } 391 392 if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) 393 setOperationAction(ISD::PREFETCH, MVT::Other, Legal); 394 395 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); 396 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); 397 398 setBooleanContents(ZeroOrOneBooleanContent); 399 setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? 400 401 if (TM.getSubtarget<PPCSubtarget>().isPPC64()) { 402 setStackPointerRegisterToSaveRestore(PPC::X1); 403 setExceptionPointerRegister(PPC::X3); 404 setExceptionSelectorRegister(PPC::X4); 405 } else { 406 setStackPointerRegisterToSaveRestore(PPC::R1); 407 setExceptionPointerRegister(PPC::R3); 408 setExceptionSelectorRegister(PPC::R4); 409 } 410 411 // We have target-specific dag combine patterns for the following nodes: 412 setTargetDAGCombine(ISD::SINT_TO_FP); 413 setTargetDAGCombine(ISD::STORE); 414 setTargetDAGCombine(ISD::BR_CC); 415 setTargetDAGCombine(ISD::BSWAP); 416 417 // Darwin long double math library functions have $LDBL128 appended. 418 if (TM.getSubtarget<PPCSubtarget>().isDarwin()) { 419 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); 420 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); 421 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); 422 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); 423 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); 424 setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128"); 425 setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128"); 426 setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128"); 427 setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128"); 428 setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); 429 } 430 431 setMinFunctionAlignment(2); 432 if (PPCSubTarget.isDarwin()) 433 setPrefFunctionAlignment(4); 434 435 setInsertFencesForAtomic(true); 436 437 setSchedulingPreference(Sched::Hybrid); 438 439 computeRegisterProperties(); 440 } 441 442 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 443 /// function arguments in the caller parameter area. 444 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { 445 const TargetMachine &TM = getTargetMachine(); 446 // Darwin passes everything on 4 byte boundary. 447 if (TM.getSubtarget<PPCSubtarget>().isDarwin()) 448 return 4; 449 450 // 16byte and wider vectors are passed on 16byte boundary. 451 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) 452 if (VTy->getBitWidth() >= 128) 453 return 16; 454 455 // The rest is 8 on PPC64 and 4 on PPC32 boundary. 456 if (PPCSubTarget.isPPC64()) 457 return 8; 458 459 return 4; 460 } 461 462 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 463 switch (Opcode) { 464 default: return 0; 465 case PPCISD::FSEL: return "PPCISD::FSEL"; 466 case PPCISD::FCFID: return "PPCISD::FCFID"; 467 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 468 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 469 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 470 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 471 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 472 case PPCISD::VPERM: return "PPCISD::VPERM"; 473 case PPCISD::Hi: return "PPCISD::Hi"; 474 case PPCISD::Lo: return "PPCISD::Lo"; 475 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; 476 case PPCISD::TOC_RESTORE: return "PPCISD::TOC_RESTORE"; 477 case PPCISD::LOAD: return "PPCISD::LOAD"; 478 case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC"; 479 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; 480 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 481 case PPCISD::SRL: return "PPCISD::SRL"; 482 case PPCISD::SRA: return "PPCISD::SRA"; 483 case PPCISD::SHL: return "PPCISD::SHL"; 484 case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; 485 case PPCISD::STD_32: return "PPCISD::STD_32"; 486 case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4"; 487 case PPCISD::CALL_NOP_SVR4: return "PPCISD::CALL_NOP_SVR4"; 488 case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin"; 489 case PPCISD::NOP: return "PPCISD::NOP"; 490 case PPCISD::MTCTR: return "PPCISD::MTCTR"; 491 case PPCISD::BCTRL_Darwin: return "PPCISD::BCTRL_Darwin"; 492 case PPCISD::BCTRL_SVR4: return "PPCISD::BCTRL_SVR4"; 493 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 494 case PPCISD::MFCR: return "PPCISD::MFCR"; 495 case PPCISD::VCMP: return "PPCISD::VCMP"; 496 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 497 case PPCISD::LBRX: return "PPCISD::LBRX"; 498 case PPCISD::STBRX: return "PPCISD::STBRX"; 499 case PPCISD::LARX: return "PPCISD::LARX"; 500 case PPCISD::STCX: return "PPCISD::STCX"; 501 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 502 case PPCISD::MFFS: return "PPCISD::MFFS"; 503 case PPCISD::MTFSB0: return "PPCISD::MTFSB0"; 504 case PPCISD::MTFSB1: return "PPCISD::MTFSB1"; 505 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; 506 case PPCISD::MTFSF: return "PPCISD::MTFSF"; 507 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; 508 } 509 } 510 511 EVT PPCTargetLowering::getSetCCResultType(EVT VT) const { 512 return MVT::i32; 513 } 514 515 //===----------------------------------------------------------------------===// 516 // Node matching predicates, for use by the tblgen matching code. 517 //===----------------------------------------------------------------------===// 518 519 /// isFloatingPointZero - Return true if this is 0.0 or -0.0. 520 static bool isFloatingPointZero(SDValue Op) { 521 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 522 return CFP->getValueAPF().isZero(); 523 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 524 // Maybe this has already been legalized into the constant pool? 525 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 526 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 527 return CFP->getValueAPF().isZero(); 528 } 529 return false; 530 } 531 532 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 533 /// true if Op is undef or if it matches the specified value. 534 static bool isConstantOrUndef(int Op, int Val) { 535 return Op < 0 || Op == Val; 536 } 537 538 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 539 /// VPKUHUM instruction. 540 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { 541 if (!isUnary) { 542 for (unsigned i = 0; i != 16; ++i) 543 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) 544 return false; 545 } else { 546 for (unsigned i = 0; i != 8; ++i) 547 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) || 548 !isConstantOrUndef(N->getMaskElt(i+8), i*2+1)) 549 return false; 550 } 551 return true; 552 } 553 554 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 555 /// VPKUWUM instruction. 556 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { 557 if (!isUnary) { 558 for (unsigned i = 0; i != 16; i += 2) 559 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || 560 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) 561 return false; 562 } else { 563 for (unsigned i = 0; i != 8; i += 2) 564 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || 565 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3) || 566 !isConstantOrUndef(N->getMaskElt(i+8), i*2+2) || 567 !isConstantOrUndef(N->getMaskElt(i+9), i*2+3)) 568 return false; 569 } 570 return true; 571 } 572 573 /// isVMerge - Common function, used to match vmrg* shuffles. 574 /// 575 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, 576 unsigned LHSStart, unsigned RHSStart) { 577 assert(N->getValueType(0) == MVT::v16i8 && 578 "PPC only supports shuffles by bytes!"); 579 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 580 "Unsupported merge size!"); 581 582 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 583 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 584 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), 585 LHSStart+j+i*UnitSize) || 586 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), 587 RHSStart+j+i*UnitSize)) 588 return false; 589 } 590 return true; 591 } 592 593 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 594 /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). 595 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 596 bool isUnary) { 597 if (!isUnary) 598 return isVMerge(N, UnitSize, 8, 24); 599 return isVMerge(N, UnitSize, 8, 8); 600 } 601 602 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 603 /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). 604 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 605 bool isUnary) { 606 if (!isUnary) 607 return isVMerge(N, UnitSize, 0, 16); 608 return isVMerge(N, UnitSize, 0, 0); 609 } 610 611 612 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 613 /// amount, otherwise return -1. 614 int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { 615 assert(N->getValueType(0) == MVT::v16i8 && 616 "PPC only supports shuffles by bytes!"); 617 618 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 619 620 // Find the first non-undef value in the shuffle mask. 621 unsigned i; 622 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) 623 /*search*/; 624 625 if (i == 16) return -1; // all undef. 626 627 // Otherwise, check to see if the rest of the elements are consecutively 628 // numbered from this value. 629 unsigned ShiftAmt = SVOp->getMaskElt(i); 630 if (ShiftAmt < i) return -1; 631 ShiftAmt -= i; 632 633 if (!isUnary) { 634 // Check the rest of the elements to see if they are consecutive. 635 for (++i; i != 16; ++i) 636 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 637 return -1; 638 } else { 639 // Check the rest of the elements to see if they are consecutive. 640 for (++i; i != 16; ++i) 641 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) 642 return -1; 643 } 644 return ShiftAmt; 645 } 646 647 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 648 /// specifies a splat of a single element that is suitable for input to 649 /// VSPLTB/VSPLTH/VSPLTW. 650 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { 651 assert(N->getValueType(0) == MVT::v16i8 && 652 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 653 654 // This is a splat operation if each element of the permute is the same, and 655 // if the value doesn't reference the second vector. 656 unsigned ElementBase = N->getMaskElt(0); 657 658 // FIXME: Handle UNDEF elements too! 659 if (ElementBase >= 16) 660 return false; 661 662 // Check that the indices are consecutive, in the case of a multi-byte element 663 // splatted with a v16i8 mask. 664 for (unsigned i = 1; i != EltSize; ++i) 665 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) 666 return false; 667 668 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 669 if (N->getMaskElt(i) < 0) continue; 670 for (unsigned j = 0; j != EltSize; ++j) 671 if (N->getMaskElt(i+j) != N->getMaskElt(j)) 672 return false; 673 } 674 return true; 675 } 676 677 /// isAllNegativeZeroVector - Returns true if all elements of build_vector 678 /// are -0.0. 679 bool PPC::isAllNegativeZeroVector(SDNode *N) { 680 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N); 681 682 APInt APVal, APUndef; 683 unsigned BitSize; 684 bool HasAnyUndefs; 685 686 if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true)) 687 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) 688 return CFP->getValueAPF().isNegZero(); 689 690 return false; 691 } 692 693 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 694 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 695 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { 696 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 697 assert(isSplatShuffleMask(SVOp, EltSize)); 698 return SVOp->getMaskElt(0) / EltSize; 699 } 700 701 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 702 /// by using a vspltis[bhw] instruction of the specified element size, return 703 /// the constant being splatted. The ByteSize field indicates the number of 704 /// bytes of each element [124] -> [bhw]. 705 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 706 SDValue OpVal(0, 0); 707 708 // If ByteSize of the splat is bigger than the element size of the 709 // build_vector, then we have a case where we are checking for a splat where 710 // multiple elements of the buildvector are folded together into a single 711 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 712 unsigned EltSize = 16/N->getNumOperands(); 713 if (EltSize < ByteSize) { 714 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 715 SDValue UniquedVals[4]; 716 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 717 718 // See if all of the elements in the buildvector agree across. 719 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 720 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 721 // If the element isn't a constant, bail fully out. 722 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); 723 724 725 if (UniquedVals[i&(Multiple-1)].getNode() == 0) 726 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 727 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 728 return SDValue(); // no match. 729 } 730 731 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 732 // either constant or undef values that are identical for each chunk. See 733 // if these chunks can form into a larger vspltis*. 734 735 // Check to see if all of the leading entries are either 0 or -1. If 736 // neither, then this won't fit into the immediate field. 737 bool LeadingZero = true; 738 bool LeadingOnes = true; 739 for (unsigned i = 0; i != Multiple-1; ++i) { 740 if (UniquedVals[i].getNode() == 0) continue; // Must have been undefs. 741 742 LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue(); 743 LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue(); 744 } 745 // Finally, check the least significant entry. 746 if (LeadingZero) { 747 if (UniquedVals[Multiple-1].getNode() == 0) 748 return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef 749 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue(); 750 if (Val < 16) 751 return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) 752 } 753 if (LeadingOnes) { 754 if (UniquedVals[Multiple-1].getNode() == 0) 755 return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef 756 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue(); 757 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 758 return DAG.getTargetConstant(Val, MVT::i32); 759 } 760 761 return SDValue(); 762 } 763 764 // Check to see if this buildvec has a single non-undef value in its elements. 765 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 766 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 767 if (OpVal.getNode() == 0) 768 OpVal = N->getOperand(i); 769 else if (OpVal != N->getOperand(i)) 770 return SDValue(); 771 } 772 773 if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def. 774 775 unsigned ValSizeInBytes = EltSize; 776 uint64_t Value = 0; 777 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 778 Value = CN->getZExtValue(); 779 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 780 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 781 Value = FloatToBits(CN->getValueAPF().convertToFloat()); 782 } 783 784 // If the splat value is larger than the element value, then we can never do 785 // this splat. The only case that we could fit the replicated bits into our 786 // immediate field for would be zero, and we prefer to use vxor for it. 787 if (ValSizeInBytes < ByteSize) return SDValue(); 788 789 // If the element value is larger than the splat value, cut it in half and 790 // check to see if the two halves are equal. Continue doing this until we 791 // get to ByteSize. This allows us to handle 0x01010101 as 0x01. 792 while (ValSizeInBytes > ByteSize) { 793 ValSizeInBytes >>= 1; 794 795 // If the top half equals the bottom half, we're still ok. 796 if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != 797 (Value & ((1 << (8*ValSizeInBytes))-1))) 798 return SDValue(); 799 } 800 801 // Properly sign extend the value. 802 int ShAmt = (4-ByteSize)*8; 803 int MaskVal = ((int)Value << ShAmt) >> ShAmt; 804 805 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 806 if (MaskVal == 0) return SDValue(); 807 808 // Finally, if this value fits in a 5 bit sext field, return it 809 if (((MaskVal << (32-5)) >> (32-5)) == MaskVal) 810 return DAG.getTargetConstant(MaskVal, MVT::i32); 811 return SDValue(); 812 } 813 814 //===----------------------------------------------------------------------===// 815 // Addressing Mode Selection 816 //===----------------------------------------------------------------------===// 817 818 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit 819 /// or 64-bit immediate, and if the value can be accurately represented as a 820 /// sign extension from a 16-bit value. If so, this returns true and the 821 /// immediate. 822 static bool isIntS16Immediate(SDNode *N, short &Imm) { 823 if (N->getOpcode() != ISD::Constant) 824 return false; 825 826 Imm = (short)cast<ConstantSDNode>(N)->getZExtValue(); 827 if (N->getValueType(0) == MVT::i32) 828 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); 829 else 830 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); 831 } 832 static bool isIntS16Immediate(SDValue Op, short &Imm) { 833 return isIntS16Immediate(Op.getNode(), Imm); 834 } 835 836 837 /// SelectAddressRegReg - Given the specified addressed, check to see if it 838 /// can be represented as an indexed [r+r] operation. Returns false if it 839 /// can be more efficiently represented with [r+imm]. 840 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, 841 SDValue &Index, 842 SelectionDAG &DAG) const { 843 short imm = 0; 844 if (N.getOpcode() == ISD::ADD) { 845 if (isIntS16Immediate(N.getOperand(1), imm)) 846 return false; // r+i 847 if (N.getOperand(1).getOpcode() == PPCISD::Lo) 848 return false; // r+i 849 850 Base = N.getOperand(0); 851 Index = N.getOperand(1); 852 return true; 853 } else if (N.getOpcode() == ISD::OR) { 854 if (isIntS16Immediate(N.getOperand(1), imm)) 855 return false; // r+i can fold it if we can. 856 857 // If this is an or of disjoint bitfields, we can codegen this as an add 858 // (for better address arithmetic) if the LHS and RHS of the OR are provably 859 // disjoint. 860 APInt LHSKnownZero, LHSKnownOne; 861 APInt RHSKnownZero, RHSKnownOne; 862 DAG.ComputeMaskedBits(N.getOperand(0), 863 LHSKnownZero, LHSKnownOne); 864 865 if (LHSKnownZero.getBoolValue()) { 866 DAG.ComputeMaskedBits(N.getOperand(1), 867 RHSKnownZero, RHSKnownOne); 868 // If all of the bits are known zero on the LHS or RHS, the add won't 869 // carry. 870 if (~(LHSKnownZero | RHSKnownZero) == 0) { 871 Base = N.getOperand(0); 872 Index = N.getOperand(1); 873 return true; 874 } 875 } 876 } 877 878 return false; 879 } 880 881 /// Returns true if the address N can be represented by a base register plus 882 /// a signed 16-bit displacement [r+imm], and if it is not better 883 /// represented as reg+reg. 884 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, 885 SDValue &Base, 886 SelectionDAG &DAG) const { 887 // FIXME dl should come from parent load or store, not from address 888 DebugLoc dl = N.getDebugLoc(); 889 // If this can be more profitably realized as r+r, fail. 890 if (SelectAddressRegReg(N, Disp, Base, DAG)) 891 return false; 892 893 if (N.getOpcode() == ISD::ADD) { 894 short imm = 0; 895 if (isIntS16Immediate(N.getOperand(1), imm)) { 896 Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32); 897 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 898 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 899 } else { 900 Base = N.getOperand(0); 901 } 902 return true; // [r+i] 903 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 904 // Match LOAD (ADD (X, Lo(G))). 905 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() 906 && "Cannot handle constant offsets yet!"); 907 Disp = N.getOperand(1).getOperand(0); // The global address. 908 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 909 Disp.getOpcode() == ISD::TargetConstantPool || 910 Disp.getOpcode() == ISD::TargetJumpTable); 911 Base = N.getOperand(0); 912 return true; // [&g+r] 913 } 914 } else if (N.getOpcode() == ISD::OR) { 915 short imm = 0; 916 if (isIntS16Immediate(N.getOperand(1), imm)) { 917 // If this is an or of disjoint bitfields, we can codegen this as an add 918 // (for better address arithmetic) if the LHS and RHS of the OR are 919 // provably disjoint. 920 APInt LHSKnownZero, LHSKnownOne; 921 DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); 922 923 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 924 // If all of the bits are known zero on the LHS or RHS, the add won't 925 // carry. 926 Base = N.getOperand(0); 927 Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32); 928 return true; 929 } 930 } 931 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 932 // Loading from a constant address. 933 934 // If this address fits entirely in a 16-bit sext immediate field, codegen 935 // this as "d, 0" 936 short Imm; 937 if (isIntS16Immediate(CN, Imm)) { 938 Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); 939 Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, 940 CN->getValueType(0)); 941 return true; 942 } 943 944 // Handle 32-bit sext immediates with LIS + addr mode. 945 if (CN->getValueType(0) == MVT::i32 || 946 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) { 947 int Addr = (int)CN->getZExtValue(); 948 949 // Otherwise, break this down into an LIS + disp. 950 Disp = DAG.getTargetConstant((short)Addr, MVT::i32); 951 952 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32); 953 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 954 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); 955 return true; 956 } 957 } 958 959 Disp = DAG.getTargetConstant(0, getPointerTy()); 960 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) 961 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 962 else 963 Base = N; 964 return true; // [r+0] 965 } 966 967 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be 968 /// represented as an indexed [r+r] operation. 969 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, 970 SDValue &Index, 971 SelectionDAG &DAG) const { 972 // Check to see if we can easily represent this as an [r+r] address. This 973 // will fail if it thinks that the address is more profitably represented as 974 // reg+imm, e.g. where imm = 0. 975 if (SelectAddressRegReg(N, Base, Index, DAG)) 976 return true; 977 978 // If the operand is an addition, always emit this as [r+r], since this is 979 // better (for code size, and execution, as the memop does the add for free) 980 // than emitting an explicit add. 981 if (N.getOpcode() == ISD::ADD) { 982 Base = N.getOperand(0); 983 Index = N.getOperand(1); 984 return true; 985 } 986 987 // Otherwise, do it the hard way, using R0 as the base register. 988 Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, 989 N.getValueType()); 990 Index = N; 991 return true; 992 } 993 994 /// SelectAddressRegImmShift - Returns true if the address N can be 995 /// represented by a base register plus a signed 14-bit displacement 996 /// [r+imm*4]. Suitable for use by STD and friends. 997 bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, 998 SDValue &Base, 999 SelectionDAG &DAG) const { 1000 // FIXME dl should come from the parent load or store, not the address 1001 DebugLoc dl = N.getDebugLoc(); 1002 // If this can be more profitably realized as r+r, fail. 1003 if (SelectAddressRegReg(N, Disp, Base, DAG)) 1004 return false; 1005 1006 if (N.getOpcode() == ISD::ADD) { 1007 short imm = 0; 1008 if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { 1009 Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); 1010 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1011 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1012 } else { 1013 Base = N.getOperand(0); 1014 } 1015 return true; // [r+i] 1016 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 1017 // Match LOAD (ADD (X, Lo(G))). 1018 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() 1019 && "Cannot handle constant offsets yet!"); 1020 Disp = N.getOperand(1).getOperand(0); // The global address. 1021 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 1022 Disp.getOpcode() == ISD::TargetConstantPool || 1023 Disp.getOpcode() == ISD::TargetJumpTable); 1024 Base = N.getOperand(0); 1025 return true; // [&g+r] 1026 } 1027 } else if (N.getOpcode() == ISD::OR) { 1028 short imm = 0; 1029 if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { 1030 // If this is an or of disjoint bitfields, we can codegen this as an add 1031 // (for better address arithmetic) if the LHS and RHS of the OR are 1032 // provably disjoint. 1033 APInt LHSKnownZero, LHSKnownOne; 1034 DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); 1035 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 1036 // If all of the bits are known zero on the LHS or RHS, the add won't 1037 // carry. 1038 Base = N.getOperand(0); 1039 Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); 1040 return true; 1041 } 1042 } 1043 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 1044 // Loading from a constant address. Verify low two bits are clear. 1045 if ((CN->getZExtValue() & 3) == 0) { 1046 // If this address fits entirely in a 14-bit sext immediate field, codegen 1047 // this as "d, 0" 1048 short Imm; 1049 if (isIntS16Immediate(CN, Imm)) { 1050 Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy()); 1051 Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, 1052 CN->getValueType(0)); 1053 return true; 1054 } 1055 1056 // Fold the low-part of 32-bit absolute addresses into addr mode. 1057 if (CN->getValueType(0) == MVT::i32 || 1058 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) { 1059 int Addr = (int)CN->getZExtValue(); 1060 1061 // Otherwise, break this down into an LIS + disp. 1062 Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32); 1063 Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32); 1064 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 1065 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base),0); 1066 return true; 1067 } 1068 } 1069 } 1070 1071 Disp = DAG.getTargetConstant(0, getPointerTy()); 1072 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) 1073 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1074 else 1075 Base = N; 1076 return true; // [r+0] 1077 } 1078 1079 1080 /// getPreIndexedAddressParts - returns true by value, base pointer and 1081 /// offset pointer and addressing mode by reference if the node's address 1082 /// can be legally represented as pre-indexed load / store address. 1083 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 1084 SDValue &Offset, 1085 ISD::MemIndexedMode &AM, 1086 SelectionDAG &DAG) const { 1087 // Disabled by default for now. 1088 if (!EnablePPCPreinc) return false; 1089 1090 SDValue Ptr; 1091 EVT VT; 1092 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1093 Ptr = LD->getBasePtr(); 1094 VT = LD->getMemoryVT(); 1095 1096 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 1097 Ptr = ST->getBasePtr(); 1098 VT = ST->getMemoryVT(); 1099 } else 1100 return false; 1101 1102 // PowerPC doesn't have preinc load/store instructions for vectors. 1103 if (VT.isVector()) 1104 return false; 1105 1106 // TODO: Check reg+reg first. 1107 1108 // LDU/STU use reg+imm*4, others use reg+imm. 1109 if (VT != MVT::i64) { 1110 // reg + imm 1111 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG)) 1112 return false; 1113 } else { 1114 // reg + imm * 4. 1115 if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG)) 1116 return false; 1117 } 1118 1119 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1120 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of 1121 // sext i32 to i64 when addr mode is r+i. 1122 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && 1123 LD->getExtensionType() == ISD::SEXTLOAD && 1124 isa<ConstantSDNode>(Offset)) 1125 return false; 1126 } 1127 1128 AM = ISD::PRE_INC; 1129 return true; 1130 } 1131 1132 //===----------------------------------------------------------------------===// 1133 // LowerOperation implementation 1134 //===----------------------------------------------------------------------===// 1135 1136 /// GetLabelAccessInfo - Return true if we should reference labels using a 1137 /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags. 1138 static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags, 1139 unsigned &LoOpFlags, const GlobalValue *GV = 0) { 1140 HiOpFlags = PPCII::MO_HA16; 1141 LoOpFlags = PPCII::MO_LO16; 1142 1143 // Don't use the pic base if not in PIC relocation model. Or if we are on a 1144 // non-darwin platform. We don't support PIC on other platforms yet. 1145 bool isPIC = TM.getRelocationModel() == Reloc::PIC_ && 1146 TM.getSubtarget<PPCSubtarget>().isDarwin(); 1147 if (isPIC) { 1148 HiOpFlags |= PPCII::MO_PIC_FLAG; 1149 LoOpFlags |= PPCII::MO_PIC_FLAG; 1150 } 1151 1152 // If this is a reference to a global value that requires a non-lazy-ptr, make 1153 // sure that instruction lowering adds it. 1154 if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) { 1155 HiOpFlags |= PPCII::MO_NLP_FLAG; 1156 LoOpFlags |= PPCII::MO_NLP_FLAG; 1157 1158 if (GV->hasHiddenVisibility()) { 1159 HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 1160 LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 1161 } 1162 } 1163 1164 return isPIC; 1165 } 1166 1167 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, 1168 SelectionDAG &DAG) { 1169 EVT PtrVT = HiPart.getValueType(); 1170 SDValue Zero = DAG.getConstant(0, PtrVT); 1171 DebugLoc DL = HiPart.getDebugLoc(); 1172 1173 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); 1174 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); 1175 1176 // With PIC, the first instruction is actually "GR+hi(&G)". 1177 if (isPIC) 1178 Hi = DAG.getNode(ISD::ADD, DL, PtrVT, 1179 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi); 1180 1181 // Generate non-pic code that has direct accesses to the constant pool. 1182 // The address of the global is just (hi(&g)+lo(&g)). 1183 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); 1184 } 1185 1186 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, 1187 SelectionDAG &DAG) const { 1188 EVT PtrVT = Op.getValueType(); 1189 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1190 const Constant *C = CP->getConstVal(); 1191 1192 unsigned MOHiFlag, MOLoFlag; 1193 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1194 SDValue CPIHi = 1195 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); 1196 SDValue CPILo = 1197 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag); 1198 return LowerLabelRef(CPIHi, CPILo, isPIC, DAG); 1199 } 1200 1201 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { 1202 EVT PtrVT = Op.getValueType(); 1203 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 1204 1205 unsigned MOHiFlag, MOLoFlag; 1206 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1207 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); 1208 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); 1209 return LowerLabelRef(JTIHi, JTILo, isPIC, DAG); 1210 } 1211 1212 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, 1213 SelectionDAG &DAG) const { 1214 EVT PtrVT = Op.getValueType(); 1215 1216 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 1217 1218 unsigned MOHiFlag, MOLoFlag; 1219 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1220 SDValue TgtBAHi = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOHiFlag); 1221 SDValue TgtBALo = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOLoFlag); 1222 return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); 1223 } 1224 1225 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, 1226 SelectionDAG &DAG) const { 1227 EVT PtrVT = Op.getValueType(); 1228 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 1229 DebugLoc DL = GSDN->getDebugLoc(); 1230 const GlobalValue *GV = GSDN->getGlobal(); 1231 1232 // 64-bit SVR4 ABI code is always position-independent. 1233 // The actual address of the GlobalValue is stored in the TOC. 1234 if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { 1235 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); 1236 return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA, 1237 DAG.getRegister(PPC::X2, MVT::i64)); 1238 } 1239 1240 unsigned MOHiFlag, MOLoFlag; 1241 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV); 1242 1243 SDValue GAHi = 1244 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); 1245 SDValue GALo = 1246 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); 1247 1248 SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG); 1249 1250 // If the global reference is actually to a non-lazy-pointer, we have to do an 1251 // extra load to get the address of the global. 1252 if (MOHiFlag & PPCII::MO_NLP_FLAG) 1253 Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(), 1254 false, false, false, 0); 1255 return Ptr; 1256 } 1257 1258 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { 1259 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 1260 DebugLoc dl = Op.getDebugLoc(); 1261 1262 // If we're comparing for equality to zero, expose the fact that this is 1263 // implented as a ctlz/srl pair on ppc, so that the dag combiner can 1264 // fold the new nodes. 1265 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 1266 if (C->isNullValue() && CC == ISD::SETEQ) { 1267 EVT VT = Op.getOperand(0).getValueType(); 1268 SDValue Zext = Op.getOperand(0); 1269 if (VT.bitsLT(MVT::i32)) { 1270 VT = MVT::i32; 1271 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); 1272 } 1273 unsigned Log2b = Log2_32(VT.getSizeInBits()); 1274 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); 1275 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, 1276 DAG.getConstant(Log2b, MVT::i32)); 1277 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); 1278 } 1279 // Leave comparisons against 0 and -1 alone for now, since they're usually 1280 // optimized. FIXME: revisit this when we can custom lower all setcc 1281 // optimizations. 1282 if (C->isAllOnesValue() || C->isNullValue()) 1283 return SDValue(); 1284 } 1285 1286 // If we have an integer seteq/setne, turn it into a compare against zero 1287 // by xor'ing the rhs with the lhs, which is faster than setting a 1288 // condition register, reading it back out, and masking the correct bit. The 1289 // normal approach here uses sub to do this instead of xor. Using xor exposes 1290 // the result to other bit-twiddling opportunities. 1291 EVT LHSVT = Op.getOperand(0).getValueType(); 1292 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 1293 EVT VT = Op.getValueType(); 1294 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), 1295 Op.getOperand(1)); 1296 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC); 1297 } 1298 return SDValue(); 1299 } 1300 1301 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, 1302 const PPCSubtarget &Subtarget) const { 1303 SDNode *Node = Op.getNode(); 1304 EVT VT = Node->getValueType(0); 1305 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1306 SDValue InChain = Node->getOperand(0); 1307 SDValue VAListPtr = Node->getOperand(1); 1308 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); 1309 DebugLoc dl = Node->getDebugLoc(); 1310 1311 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); 1312 1313 // gpr_index 1314 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 1315 VAListPtr, MachinePointerInfo(SV), MVT::i8, 1316 false, false, 0); 1317 InChain = GprIndex.getValue(1); 1318 1319 if (VT == MVT::i64) { 1320 // Check if GprIndex is even 1321 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, 1322 DAG.getConstant(1, MVT::i32)); 1323 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, 1324 DAG.getConstant(0, MVT::i32), ISD::SETNE); 1325 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, 1326 DAG.getConstant(1, MVT::i32)); 1327 // Align GprIndex to be even if it isn't 1328 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, 1329 GprIndex); 1330 } 1331 1332 // fpr index is 1 byte after gpr 1333 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1334 DAG.getConstant(1, MVT::i32)); 1335 1336 // fpr 1337 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 1338 FprPtr, MachinePointerInfo(SV), MVT::i8, 1339 false, false, 0); 1340 InChain = FprIndex.getValue(1); 1341 1342 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1343 DAG.getConstant(8, MVT::i32)); 1344 1345 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1346 DAG.getConstant(4, MVT::i32)); 1347 1348 // areas 1349 SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, 1350 MachinePointerInfo(), false, false, 1351 false, 0); 1352 InChain = OverflowArea.getValue(1); 1353 1354 SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, 1355 MachinePointerInfo(), false, false, 1356 false, 0); 1357 InChain = RegSaveArea.getValue(1); 1358 1359 // select overflow_area if index > 8 1360 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, 1361 DAG.getConstant(8, MVT::i32), ISD::SETLT); 1362 1363 // adjustment constant gpr_index * 4/8 1364 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, 1365 VT.isInteger() ? GprIndex : FprIndex, 1366 DAG.getConstant(VT.isInteger() ? 4 : 8, 1367 MVT::i32)); 1368 1369 // OurReg = RegSaveArea + RegConstant 1370 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, 1371 RegConstant); 1372 1373 // Floating types are 32 bytes into RegSaveArea 1374 if (VT.isFloatingPoint()) 1375 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, 1376 DAG.getConstant(32, MVT::i32)); 1377 1378 // increase {f,g}pr_index by 1 (or 2 if VT is i64) 1379 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, 1380 VT.isInteger() ? GprIndex : FprIndex, 1381 DAG.getConstant(VT == MVT::i64 ? 2 : 1, 1382 MVT::i32)); 1383 1384 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, 1385 VT.isInteger() ? VAListPtr : FprPtr, 1386 MachinePointerInfo(SV), 1387 MVT::i8, false, false, 0); 1388 1389 // determine if we should load from reg_save_area or overflow_area 1390 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); 1391 1392 // increase overflow_area by 4/8 if gpr/fpr > 8 1393 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, 1394 DAG.getConstant(VT.isInteger() ? 4 : 8, 1395 MVT::i32)); 1396 1397 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, 1398 OverflowAreaPlusN); 1399 1400 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, 1401 OverflowAreaPtr, 1402 MachinePointerInfo(), 1403 MVT::i32, false, false, 0); 1404 1405 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), 1406 false, false, false, 0); 1407 } 1408 1409 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, 1410 SelectionDAG &DAG) const { 1411 return Op.getOperand(0); 1412 } 1413 1414 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, 1415 SelectionDAG &DAG) const { 1416 SDValue Chain = Op.getOperand(0); 1417 SDValue Trmp = Op.getOperand(1); // trampoline 1418 SDValue FPtr = Op.getOperand(2); // nested function 1419 SDValue Nest = Op.getOperand(3); // 'nest' parameter value 1420 DebugLoc dl = Op.getDebugLoc(); 1421 1422 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1423 bool isPPC64 = (PtrVT == MVT::i64); 1424 Type *IntPtrTy = 1425 DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType( 1426 *DAG.getContext()); 1427 1428 TargetLowering::ArgListTy Args; 1429 TargetLowering::ArgListEntry Entry; 1430 1431 Entry.Ty = IntPtrTy; 1432 Entry.Node = Trmp; Args.push_back(Entry); 1433 1434 // TrampSize == (isPPC64 ? 48 : 40); 1435 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, 1436 isPPC64 ? MVT::i64 : MVT::i32); 1437 Args.push_back(Entry); 1438 1439 Entry.Node = FPtr; Args.push_back(Entry); 1440 Entry.Node = Nest; Args.push_back(Entry); 1441 1442 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) 1443 std::pair<SDValue, SDValue> CallResult = 1444 LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), 1445 false, false, false, false, 0, CallingConv::C, 1446 /*isTailCall=*/false, 1447 /*doesNotRet=*/false, /*isReturnValueUsed=*/true, 1448 DAG.getExternalSymbol("__trampoline_setup", PtrVT), 1449 Args, DAG, dl); 1450 1451 return CallResult.second; 1452 } 1453 1454 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, 1455 const PPCSubtarget &Subtarget) const { 1456 MachineFunction &MF = DAG.getMachineFunction(); 1457 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 1458 1459 DebugLoc dl = Op.getDebugLoc(); 1460 1461 if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { 1462 // vastart just stores the address of the VarArgsFrameIndex slot into the 1463 // memory location argument. 1464 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1465 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 1466 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1467 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 1468 MachinePointerInfo(SV), 1469 false, false, 0); 1470 } 1471 1472 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. 1473 // We suppose the given va_list is already allocated. 1474 // 1475 // typedef struct { 1476 // char gpr; /* index into the array of 8 GPRs 1477 // * stored in the register save area 1478 // * gpr=0 corresponds to r3, 1479 // * gpr=1 to r4, etc. 1480 // */ 1481 // char fpr; /* index into the array of 8 FPRs 1482 // * stored in the register save area 1483 // * fpr=0 corresponds to f1, 1484 // * fpr=1 to f2, etc. 1485 // */ 1486 // char *overflow_arg_area; 1487 // /* location on stack that holds 1488 // * the next overflow argument 1489 // */ 1490 // char *reg_save_area; 1491 // /* where r3:r10 and f1:f8 (if saved) 1492 // * are stored 1493 // */ 1494 // } va_list[1]; 1495 1496 1497 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32); 1498 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32); 1499 1500 1501 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1502 1503 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), 1504 PtrVT); 1505 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 1506 PtrVT); 1507 1508 uint64_t FrameOffset = PtrVT.getSizeInBits()/8; 1509 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT); 1510 1511 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; 1512 SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT); 1513 1514 uint64_t FPROffset = 1; 1515 SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT); 1516 1517 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1518 1519 // Store first byte : number of int regs 1520 SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, 1521 Op.getOperand(1), 1522 MachinePointerInfo(SV), 1523 MVT::i8, false, false, 0); 1524 uint64_t nextOffset = FPROffset; 1525 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), 1526 ConstFPROffset); 1527 1528 // Store second byte : number of float regs 1529 SDValue secondStore = 1530 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, 1531 MachinePointerInfo(SV, nextOffset), MVT::i8, 1532 false, false, 0); 1533 nextOffset += StackOffset; 1534 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); 1535 1536 // Store second word : arguments given on stack 1537 SDValue thirdStore = 1538 DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, 1539 MachinePointerInfo(SV, nextOffset), 1540 false, false, 0); 1541 nextOffset += FrameOffset; 1542 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); 1543 1544 // Store third word : arguments given in registers 1545 return DAG.getStore(thirdStore, dl, FR, nextPtr, 1546 MachinePointerInfo(SV, nextOffset), 1547 false, false, 0); 1548 1549 } 1550 1551 #include "PPCGenCallingConv.inc" 1552 1553 static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 1554 CCValAssign::LocInfo &LocInfo, 1555 ISD::ArgFlagsTy &ArgFlags, 1556 CCState &State) { 1557 return true; 1558 } 1559 1560 static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, 1561 MVT &LocVT, 1562 CCValAssign::LocInfo &LocInfo, 1563 ISD::ArgFlagsTy &ArgFlags, 1564 CCState &State) { 1565 static const uint16_t ArgRegs[] = { 1566 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 1567 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 1568 }; 1569 const unsigned NumArgRegs = array_lengthof(ArgRegs); 1570 1571 unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); 1572 1573 // Skip one register if the first unallocated register has an even register 1574 // number and there are still argument registers available which have not been 1575 // allocated yet. RegNum is actually an index into ArgRegs, which means we 1576 // need to skip a register if RegNum is odd. 1577 if (RegNum != NumArgRegs && RegNum % 2 == 1) { 1578 State.AllocateReg(ArgRegs[RegNum]); 1579 } 1580 1581 // Always return false here, as this function only makes sure that the first 1582 // unallocated register has an odd register number and does not actually 1583 // allocate a register for the current argument. 1584 return false; 1585 } 1586 1587 static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, 1588 MVT &LocVT, 1589 CCValAssign::LocInfo &LocInfo, 1590 ISD::ArgFlagsTy &ArgFlags, 1591 CCState &State) { 1592 static const uint16_t ArgRegs[] = { 1593 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1594 PPC::F8 1595 }; 1596 1597 const unsigned NumArgRegs = array_lengthof(ArgRegs); 1598 1599 unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); 1600 1601 // If there is only one Floating-point register left we need to put both f64 1602 // values of a split ppc_fp128 value on the stack. 1603 if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) { 1604 State.AllocateReg(ArgRegs[RegNum]); 1605 } 1606 1607 // Always return false here, as this function only makes sure that the two f64 1608 // values a ppc_fp128 value is split into are both passed in registers or both 1609 // passed on the stack and does not actually allocate a register for the 1610 // current argument. 1611 return false; 1612 } 1613 1614 /// GetFPR - Get the set of FP registers that should be allocated for arguments, 1615 /// on Darwin. 1616 static const uint16_t *GetFPR() { 1617 static const uint16_t FPR[] = { 1618 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1619 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 1620 }; 1621 1622 return FPR; 1623 } 1624 1625 /// CalculateStackSlotSize - Calculates the size reserved for this argument on 1626 /// the stack. 1627 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, 1628 unsigned PtrByteSize) { 1629 unsigned ArgSize = ArgVT.getSizeInBits()/8; 1630 if (Flags.isByVal()) 1631 ArgSize = Flags.getByValSize(); 1632 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 1633 1634 return ArgSize; 1635 } 1636 1637 SDValue 1638 PPCTargetLowering::LowerFormalArguments(SDValue Chain, 1639 CallingConv::ID CallConv, bool isVarArg, 1640 const SmallVectorImpl<ISD::InputArg> 1641 &Ins, 1642 DebugLoc dl, SelectionDAG &DAG, 1643 SmallVectorImpl<SDValue> &InVals) 1644 const { 1645 if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) { 1646 return LowerFormalArguments_SVR4(Chain, CallConv, isVarArg, Ins, 1647 dl, DAG, InVals); 1648 } else { 1649 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, 1650 dl, DAG, InVals); 1651 } 1652 } 1653 1654 SDValue 1655 PPCTargetLowering::LowerFormalArguments_SVR4( 1656 SDValue Chain, 1657 CallingConv::ID CallConv, bool isVarArg, 1658 const SmallVectorImpl<ISD::InputArg> 1659 &Ins, 1660 DebugLoc dl, SelectionDAG &DAG, 1661 SmallVectorImpl<SDValue> &InVals) const { 1662 1663 // 32-bit SVR4 ABI Stack Frame Layout: 1664 // +-----------------------------------+ 1665 // +--> | Back chain | 1666 // | +-----------------------------------+ 1667 // | | Floating-point register save area | 1668 // | +-----------------------------------+ 1669 // | | General register save area | 1670 // | +-----------------------------------+ 1671 // | | CR save word | 1672 // | +-----------------------------------+ 1673 // | | VRSAVE save word | 1674 // | +-----------------------------------+ 1675 // | | Alignment padding | 1676 // | +-----------------------------------+ 1677 // | | Vector register save area | 1678 // | +-----------------------------------+ 1679 // | | Local variable space | 1680 // | +-----------------------------------+ 1681 // | | Parameter list area | 1682 // | +-----------------------------------+ 1683 // | | LR save word | 1684 // | +-----------------------------------+ 1685 // SP--> +--- | Back chain | 1686 // +-----------------------------------+ 1687 // 1688 // Specifications: 1689 // System V Application Binary Interface PowerPC Processor Supplement 1690 // AltiVec Technology Programming Interface Manual 1691 1692 MachineFunction &MF = DAG.getMachineFunction(); 1693 MachineFrameInfo *MFI = MF.getFrameInfo(); 1694 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 1695 1696 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1697 // Potential tail calls could cause overwriting of argument stack slots. 1698 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 1699 (CallConv == CallingConv::Fast)); 1700 unsigned PtrByteSize = 4; 1701 1702 // Assign locations to all of the incoming arguments. 1703 SmallVector<CCValAssign, 16> ArgLocs; 1704 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1705 getTargetMachine(), ArgLocs, *DAG.getContext()); 1706 1707 // Reserve space for the linkage area on the stack. 1708 CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); 1709 1710 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4); 1711 1712 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1713 CCValAssign &VA = ArgLocs[i]; 1714 1715 // Arguments stored in registers. 1716 if (VA.isRegLoc()) { 1717 const TargetRegisterClass *RC; 1718 EVT ValVT = VA.getValVT(); 1719 1720 switch (ValVT.getSimpleVT().SimpleTy) { 1721 default: 1722 llvm_unreachable("ValVT not supported by formal arguments Lowering"); 1723 case MVT::i32: 1724 RC = PPC::GPRCRegisterClass; 1725 break; 1726 case MVT::f32: 1727 RC = PPC::F4RCRegisterClass; 1728 break; 1729 case MVT::f64: 1730 RC = PPC::F8RCRegisterClass; 1731 break; 1732 case MVT::v16i8: 1733 case MVT::v8i16: 1734 case MVT::v4i32: 1735 case MVT::v4f32: 1736 RC = PPC::VRRCRegisterClass; 1737 break; 1738 } 1739 1740 // Transform the arguments stored in physical registers into virtual ones. 1741 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 1742 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT); 1743 1744 InVals.push_back(ArgValue); 1745 } else { 1746 // Argument stored in memory. 1747 assert(VA.isMemLoc()); 1748 1749 unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; 1750 int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), 1751 isImmutable); 1752 1753 // Create load nodes to retrieve arguments from the stack. 1754 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 1755 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 1756 MachinePointerInfo(), 1757 false, false, false, 0)); 1758 } 1759 } 1760 1761 // Assign locations to all of the incoming aggregate by value arguments. 1762 // Aggregates passed by value are stored in the local variable space of the 1763 // caller's stack frame, right above the parameter list area. 1764 SmallVector<CCValAssign, 16> ByValArgLocs; 1765 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1766 getTargetMachine(), ByValArgLocs, *DAG.getContext()); 1767 1768 // Reserve stack space for the allocations in CCInfo. 1769 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 1770 1771 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4_ByVal); 1772 1773 // Area that is at least reserved in the caller of this function. 1774 unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); 1775 1776 // Set the size that is at least reserved in caller of this function. Tail 1777 // call optimized function's reserved stack space needs to be aligned so that 1778 // taking the difference between two stack areas will result in an aligned 1779 // stack. 1780 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1781 1782 MinReservedArea = 1783 std::max(MinReservedArea, 1784 PPCFrameLowering::getMinCallFrameSize(false, false)); 1785 1786 unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()-> 1787 getStackAlignment(); 1788 unsigned AlignMask = TargetAlign-1; 1789 MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; 1790 1791 FI->setMinReservedArea(MinReservedArea); 1792 1793 SmallVector<SDValue, 8> MemOps; 1794 1795 // If the function takes variable number of arguments, make a frame index for 1796 // the start of the first vararg value... for expansion of llvm.va_start. 1797 if (isVarArg) { 1798 static const uint16_t GPArgRegs[] = { 1799 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 1800 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 1801 }; 1802 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); 1803 1804 static const uint16_t FPArgRegs[] = { 1805 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1806 PPC::F8 1807 }; 1808 const unsigned NumFPArgRegs = array_lengthof(FPArgRegs); 1809 1810 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs, 1811 NumGPArgRegs)); 1812 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs, 1813 NumFPArgRegs)); 1814 1815 // Make room for NumGPArgRegs and NumFPArgRegs. 1816 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + 1817 NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8; 1818 1819 FuncInfo->setVarArgsStackOffset( 1820 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 1821 CCInfo.getNextStackOffset(), true)); 1822 1823 FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false)); 1824 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 1825 1826 // The fixed integer arguments of a variadic function are stored to the 1827 // VarArgsFrameIndex on the stack so that they may be loaded by deferencing 1828 // the result of va_next. 1829 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) { 1830 // Get an existing live-in vreg, or add a new one. 1831 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); 1832 if (!VReg) 1833 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); 1834 1835 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 1836 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 1837 MachinePointerInfo(), false, false, 0); 1838 MemOps.push_back(Store); 1839 // Increment the address by four for the next argument to store 1840 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 1841 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 1842 } 1843 1844 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 1845 // is set. 1846 // The double arguments are stored to the VarArgsFrameIndex 1847 // on the stack. 1848 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) { 1849 // Get an existing live-in vreg, or add a new one. 1850 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); 1851 if (!VReg) 1852 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); 1853 1854 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); 1855 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 1856 MachinePointerInfo(), false, false, 0); 1857 MemOps.push_back(Store); 1858 // Increment the address by eight for the next argument to store 1859 SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8, 1860 PtrVT); 1861 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 1862 } 1863 } 1864 1865 if (!MemOps.empty()) 1866 Chain = DAG.getNode(ISD::TokenFactor, dl, 1867 MVT::Other, &MemOps[0], MemOps.size()); 1868 1869 return Chain; 1870 } 1871 1872 SDValue 1873 PPCTargetLowering::LowerFormalArguments_Darwin( 1874 SDValue Chain, 1875 CallingConv::ID CallConv, bool isVarArg, 1876 const SmallVectorImpl<ISD::InputArg> 1877 &Ins, 1878 DebugLoc dl, SelectionDAG &DAG, 1879 SmallVectorImpl<SDValue> &InVals) const { 1880 // TODO: add description of PPC stack frame format, or at least some docs. 1881 // 1882 MachineFunction &MF = DAG.getMachineFunction(); 1883 MachineFrameInfo *MFI = MF.getFrameInfo(); 1884 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 1885 1886 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1887 bool isPPC64 = PtrVT == MVT::i64; 1888 // Potential tail calls could cause overwriting of argument stack slots. 1889 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 1890 (CallConv == CallingConv::Fast)); 1891 unsigned PtrByteSize = isPPC64 ? 8 : 4; 1892 1893 unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); 1894 // Area that is at least reserved in caller of this function. 1895 unsigned MinReservedArea = ArgOffset; 1896 1897 static const uint16_t GPR_32[] = { // 32-bit registers. 1898 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 1899 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 1900 }; 1901 static const uint16_t GPR_64[] = { // 64-bit registers. 1902 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 1903 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 1904 }; 1905 1906 static const uint16_t *FPR = GetFPR(); 1907 1908 static const uint16_t VR[] = { 1909 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 1910 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 1911 }; 1912 1913 const unsigned Num_GPR_Regs = array_lengthof(GPR_32); 1914 const unsigned Num_FPR_Regs = 13; 1915 const unsigned Num_VR_Regs = array_lengthof( VR); 1916 1917 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 1918 1919 const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32; 1920 1921 // In 32-bit non-varargs functions, the stack space for vectors is after the 1922 // stack space for non-vectors. We do not use this space unless we have 1923 // too many vectors to fit in registers, something that only occurs in 1924 // constructed examples:), but we have to walk the arglist to figure 1925 // that out...for the pathological case, compute VecArgOffset as the 1926 // start of the vector parameter area. Computing VecArgOffset is the 1927 // entire point of the following loop. 1928 unsigned VecArgOffset = ArgOffset; 1929 if (!isVarArg && !isPPC64) { 1930 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; 1931 ++ArgNo) { 1932 EVT ObjectVT = Ins[ArgNo].VT; 1933 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 1934 1935 if (Flags.isByVal()) { 1936 // ObjSize is the true size, ArgSize rounded up to multiple of regs. 1937 unsigned ObjSize = Flags.getByValSize(); 1938 unsigned ArgSize = 1939 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 1940 VecArgOffset += ArgSize; 1941 continue; 1942 } 1943 1944 switch(ObjectVT.getSimpleVT().SimpleTy) { 1945 default: llvm_unreachable("Unhandled argument type!"); 1946 case MVT::i32: 1947 case MVT::f32: 1948 VecArgOffset += isPPC64 ? 8 : 4; 1949 break; 1950 case MVT::i64: // PPC64 1951 case MVT::f64: 1952 VecArgOffset += 8; 1953 break; 1954 case MVT::v4f32: 1955 case MVT::v4i32: 1956 case MVT::v8i16: 1957 case MVT::v16i8: 1958 // Nothing to do, we're only looking at Nonvector args here. 1959 break; 1960 } 1961 } 1962 } 1963 // We've found where the vector parameter area in memory is. Skip the 1964 // first 12 parameters; these don't use that memory. 1965 VecArgOffset = ((VecArgOffset+15)/16)*16; 1966 VecArgOffset += 12*16; 1967 1968 // Add DAG nodes to load the arguments or copy them out of registers. On 1969 // entry to a function on PPC, the arguments start after the linkage area, 1970 // although the first ones are often in registers. 1971 1972 SmallVector<SDValue, 8> MemOps; 1973 unsigned nAltivecParamsAtEnd = 0; 1974 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 1975 SDValue ArgVal; 1976 bool needsLoad = false; 1977 EVT ObjectVT = Ins[ArgNo].VT; 1978 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 1979 unsigned ArgSize = ObjSize; 1980 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 1981 1982 unsigned CurArgOffset = ArgOffset; 1983 1984 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. 1985 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || 1986 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { 1987 if (isVarArg || isPPC64) { 1988 MinReservedArea = ((MinReservedArea+15)/16)*16; 1989 MinReservedArea += CalculateStackSlotSize(ObjectVT, 1990 Flags, 1991 PtrByteSize); 1992 } else nAltivecParamsAtEnd++; 1993 } else 1994 // Calculate min reserved area. 1995 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, 1996 Flags, 1997 PtrByteSize); 1998 1999 // FIXME the codegen can be much improved in some cases. 2000 // We do not have to keep everything in memory. 2001 if (Flags.isByVal()) { 2002 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 2003 ObjSize = Flags.getByValSize(); 2004 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2005 // Objects of size 1 and 2 are right justified, everything else is 2006 // left justified. This means the memory address is adjusted forwards. 2007 if (ObjSize==1 || ObjSize==2) { 2008 CurArgOffset = CurArgOffset + (4 - ObjSize); 2009 } 2010 // The value of the object is its address. 2011 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true); 2012 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2013 InVals.push_back(FIN); 2014 if (ObjSize==1 || ObjSize==2) { 2015 if (GPR_idx != Num_GPR_Regs) { 2016 unsigned VReg; 2017 if (isPPC64) 2018 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2019 else 2020 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 2021 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2022 SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, 2023 MachinePointerInfo(), 2024 ObjSize==1 ? MVT::i8 : MVT::i16, 2025 false, false, 0); 2026 MemOps.push_back(Store); 2027 ++GPR_idx; 2028 } 2029 2030 ArgOffset += PtrByteSize; 2031 2032 continue; 2033 } 2034 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 2035 // Store whatever pieces of the object are in registers 2036 // to memory. ArgVal will be address of the beginning of 2037 // the object. 2038 if (GPR_idx != Num_GPR_Regs) { 2039 unsigned VReg; 2040 if (isPPC64) 2041 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2042 else 2043 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 2044 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 2045 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2046 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2047 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2048 MachinePointerInfo(), 2049 false, false, 0); 2050 MemOps.push_back(Store); 2051 ++GPR_idx; 2052 ArgOffset += PtrByteSize; 2053 } else { 2054 ArgOffset += ArgSize - (ArgOffset-CurArgOffset); 2055 break; 2056 } 2057 } 2058 continue; 2059 } 2060 2061 switch (ObjectVT.getSimpleVT().SimpleTy) { 2062 default: llvm_unreachable("Unhandled argument type!"); 2063 case MVT::i32: 2064 if (!isPPC64) { 2065 if (GPR_idx != Num_GPR_Regs) { 2066 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 2067 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 2068 ++GPR_idx; 2069 } else { 2070 needsLoad = true; 2071 ArgSize = PtrByteSize; 2072 } 2073 // All int arguments reserve stack space in the Darwin ABI. 2074 ArgOffset += PtrByteSize; 2075 break; 2076 } 2077 // FALLTHROUGH 2078 case MVT::i64: // PPC64 2079 if (GPR_idx != Num_GPR_Regs) { 2080 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2081 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 2082 2083 if (ObjectVT == MVT::i32) { 2084 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 2085 // value to MVT::i64 and then truncate to the correct register size. 2086 if (Flags.isSExt()) 2087 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, 2088 DAG.getValueType(ObjectVT)); 2089 else if (Flags.isZExt()) 2090 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, 2091 DAG.getValueType(ObjectVT)); 2092 2093 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); 2094 } 2095 2096 ++GPR_idx; 2097 } else { 2098 needsLoad = true; 2099 ArgSize = PtrByteSize; 2100 } 2101 // All int arguments reserve stack space in the Darwin ABI. 2102 ArgOffset += 8; 2103 break; 2104 2105 case MVT::f32: 2106 case MVT::f64: 2107 // Every 4 bytes of argument space consumes one of the GPRs available for 2108 // argument passing. 2109 if (GPR_idx != Num_GPR_Regs) { 2110 ++GPR_idx; 2111 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) 2112 ++GPR_idx; 2113 } 2114 if (FPR_idx != Num_FPR_Regs) { 2115 unsigned VReg; 2116 2117 if (ObjectVT == MVT::f32) 2118 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 2119 else 2120 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); 2121 2122 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2123 ++FPR_idx; 2124 } else { 2125 needsLoad = true; 2126 } 2127 2128 // All FP arguments reserve stack space in the Darwin ABI. 2129 ArgOffset += isPPC64 ? 8 : ObjSize; 2130 break; 2131 case MVT::v4f32: 2132 case MVT::v4i32: 2133 case MVT::v8i16: 2134 case MVT::v16i8: 2135 // Note that vector arguments in registers don't reserve stack space, 2136 // except in varargs functions. 2137 if (VR_idx != Num_VR_Regs) { 2138 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 2139 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2140 if (isVarArg) { 2141 while ((ArgOffset % 16) != 0) { 2142 ArgOffset += PtrByteSize; 2143 if (GPR_idx != Num_GPR_Regs) 2144 GPR_idx++; 2145 } 2146 ArgOffset += 16; 2147 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? 2148 } 2149 ++VR_idx; 2150 } else { 2151 if (!isVarArg && !isPPC64) { 2152 // Vectors go after all the nonvectors. 2153 CurArgOffset = VecArgOffset; 2154 VecArgOffset += 16; 2155 } else { 2156 // Vectors are aligned. 2157 ArgOffset = ((ArgOffset+15)/16)*16; 2158 CurArgOffset = ArgOffset; 2159 ArgOffset += 16; 2160 } 2161 needsLoad = true; 2162 } 2163 break; 2164 } 2165 2166 // We need to load the argument to a virtual register if we determined above 2167 // that we ran out of physical registers of the appropriate type. 2168 if (needsLoad) { 2169 int FI = MFI->CreateFixedObject(ObjSize, 2170 CurArgOffset + (ArgSize - ObjSize), 2171 isImmutable); 2172 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2173 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), 2174 false, false, false, 0); 2175 } 2176 2177 InVals.push_back(ArgVal); 2178 } 2179 2180 // Set the size that is at least reserved in caller of this function. Tail 2181 // call optimized function's reserved stack space needs to be aligned so that 2182 // taking the difference between two stack areas will result in an aligned 2183 // stack. 2184 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 2185 // Add the Altivec parameters at the end, if needed. 2186 if (nAltivecParamsAtEnd) { 2187 MinReservedArea = ((MinReservedArea+15)/16)*16; 2188 MinReservedArea += 16*nAltivecParamsAtEnd; 2189 } 2190 MinReservedArea = 2191 std::max(MinReservedArea, 2192 PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); 2193 unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()-> 2194 getStackAlignment(); 2195 unsigned AlignMask = TargetAlign-1; 2196 MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; 2197 FI->setMinReservedArea(MinReservedArea); 2198 2199 // If the function takes variable number of arguments, make a frame index for 2200 // the start of the first vararg value... for expansion of llvm.va_start. 2201 if (isVarArg) { 2202 int Depth = ArgOffset; 2203 2204 FuncInfo->setVarArgsFrameIndex( 2205 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 2206 Depth, true)); 2207 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2208 2209 // If this function is vararg, store any remaining integer argument regs 2210 // to their spots on the stack so that they may be loaded by deferencing the 2211 // result of va_next. 2212 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 2213 unsigned VReg; 2214 2215 if (isPPC64) 2216 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2217 else 2218 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 2219 2220 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2221 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2222 MachinePointerInfo(), false, false, 0); 2223 MemOps.push_back(Store); 2224 // Increment the address by four for the next argument to store 2225 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 2226 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2227 } 2228 } 2229 2230 if (!MemOps.empty()) 2231 Chain = DAG.getNode(ISD::TokenFactor, dl, 2232 MVT::Other, &MemOps[0], MemOps.size()); 2233 2234 return Chain; 2235 } 2236 2237 /// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus 2238 /// linkage area for the Darwin ABI. 2239 static unsigned 2240 CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, 2241 bool isPPC64, 2242 bool isVarArg, 2243 unsigned CC, 2244 const SmallVectorImpl<ISD::OutputArg> 2245 &Outs, 2246 const SmallVectorImpl<SDValue> &OutVals, 2247 unsigned &nAltivecParamsAtEnd) { 2248 // Count how many bytes are to be pushed on the stack, including the linkage 2249 // area, and parameter passing area. We start with 24/48 bytes, which is 2250 // prereserved space for [SP][CR][LR][3 x unused]. 2251 unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true); 2252 unsigned NumOps = Outs.size(); 2253 unsigned PtrByteSize = isPPC64 ? 8 : 4; 2254 2255 // Add up all the space actually used. 2256 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually 2257 // they all go in registers, but we must reserve stack space for them for 2258 // possible use by the caller. In varargs or 64-bit calls, parameters are 2259 // assigned stack space in order, with padding so Altivec parameters are 2260 // 16-byte aligned. 2261 nAltivecParamsAtEnd = 0; 2262 for (unsigned i = 0; i != NumOps; ++i) { 2263 ISD::ArgFlagsTy Flags = Outs[i].Flags; 2264 EVT ArgVT = Outs[i].VT; 2265 // Varargs Altivec parameters are padded to a 16 byte boundary. 2266 if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 || 2267 ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) { 2268 if (!isVarArg && !isPPC64) { 2269 // Non-varargs Altivec parameters go after all the non-Altivec 2270 // parameters; handle those later so we know how much padding we need. 2271 nAltivecParamsAtEnd++; 2272 continue; 2273 } 2274 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. 2275 NumBytes = ((NumBytes+15)/16)*16; 2276 } 2277 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 2278 } 2279 2280 // Allow for Altivec parameters at the end, if needed. 2281 if (nAltivecParamsAtEnd) { 2282 NumBytes = ((NumBytes+15)/16)*16; 2283 NumBytes += 16*nAltivecParamsAtEnd; 2284 } 2285 2286 // The prolog code of the callee may store up to 8 GPR argument registers to 2287 // the stack, allowing va_start to index over them in memory if its varargs. 2288 // Because we cannot tell if this is needed on the caller side, we have to 2289 // conservatively assume that it is needed. As such, make sure we have at 2290 // least enough stack space for the caller to store the 8 GPRs. 2291 NumBytes = std::max(NumBytes, 2292 PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); 2293 2294 // Tail call needs the stack to be aligned. 2295 if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){ 2296 unsigned TargetAlign = DAG.getMachineFunction().getTarget(). 2297 getFrameLowering()->getStackAlignment(); 2298 unsigned AlignMask = TargetAlign-1; 2299 NumBytes = (NumBytes + AlignMask) & ~AlignMask; 2300 } 2301 2302 return NumBytes; 2303 } 2304 2305 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be 2306 /// adjusted to accommodate the arguments for the tailcall. 2307 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, 2308 unsigned ParamSize) { 2309 2310 if (!isTailCall) return 0; 2311 2312 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>(); 2313 unsigned CallerMinReservedArea = FI->getMinReservedArea(); 2314 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; 2315 // Remember only if the new adjustement is bigger. 2316 if (SPDiff < FI->getTailCallSPDelta()) 2317 FI->setTailCallSPDelta(SPDiff); 2318 2319 return SPDiff; 2320 } 2321 2322 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 2323 /// for tail call optimization. Targets which want to do tail call 2324 /// optimization should implement this function. 2325 bool 2326 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 2327 CallingConv::ID CalleeCC, 2328 bool isVarArg, 2329 const SmallVectorImpl<ISD::InputArg> &Ins, 2330 SelectionDAG& DAG) const { 2331 if (!getTargetMachine().Options.GuaranteedTailCallOpt) 2332 return false; 2333 2334 // Variable argument functions are not supported. 2335 if (isVarArg) 2336 return false; 2337 2338 MachineFunction &MF = DAG.getMachineFunction(); 2339 CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); 2340 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 2341 // Functions containing by val parameters are not supported. 2342 for (unsigned i = 0; i != Ins.size(); i++) { 2343 ISD::ArgFlagsTy Flags = Ins[i].Flags; 2344 if (Flags.isByVal()) return false; 2345 } 2346 2347 // Non PIC/GOT tail calls are supported. 2348 if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 2349 return true; 2350 2351 // At the moment we can only do local tail calls (in same module, hidden 2352 // or protected) if we are generating PIC. 2353 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 2354 return G->getGlobal()->hasHiddenVisibility() 2355 || G->getGlobal()->hasProtectedVisibility(); 2356 } 2357 2358 return false; 2359 } 2360 2361 /// isCallCompatibleAddress - Return the immediate to use if the specified 2362 /// 32-bit value is representable in the immediate field of a BxA instruction. 2363 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { 2364 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 2365 if (!C) return 0; 2366 2367 int Addr = C->getZExtValue(); 2368 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 2369 (Addr << 6 >> 6) != Addr) 2370 return 0; // Top 6 bits have to be sext of immediate. 2371 2372 return DAG.getConstant((int)C->getZExtValue() >> 2, 2373 DAG.getTargetLoweringInfo().getPointerTy()).getNode(); 2374 } 2375 2376 namespace { 2377 2378 struct TailCallArgumentInfo { 2379 SDValue Arg; 2380 SDValue FrameIdxOp; 2381 int FrameIdx; 2382 2383 TailCallArgumentInfo() : FrameIdx(0) {} 2384 }; 2385 2386 } 2387 2388 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. 2389 static void 2390 StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, 2391 SDValue Chain, 2392 const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs, 2393 SmallVector<SDValue, 8> &MemOpChains, 2394 DebugLoc dl) { 2395 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { 2396 SDValue Arg = TailCallArgs[i].Arg; 2397 SDValue FIN = TailCallArgs[i].FrameIdxOp; 2398 int FI = TailCallArgs[i].FrameIdx; 2399 // Store relative to framepointer. 2400 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN, 2401 MachinePointerInfo::getFixedStack(FI), 2402 false, false, 0)); 2403 } 2404 } 2405 2406 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to 2407 /// the appropriate stack slot for the tail call optimized function call. 2408 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, 2409 MachineFunction &MF, 2410 SDValue Chain, 2411 SDValue OldRetAddr, 2412 SDValue OldFP, 2413 int SPDiff, 2414 bool isPPC64, 2415 bool isDarwinABI, 2416 DebugLoc dl) { 2417 if (SPDiff) { 2418 // Calculate the new stack slot for the return address. 2419 int SlotSize = isPPC64 ? 8 : 4; 2420 int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64, 2421 isDarwinABI); 2422 int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, 2423 NewRetAddrLoc, true); 2424 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 2425 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); 2426 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, 2427 MachinePointerInfo::getFixedStack(NewRetAddr), 2428 false, false, 0); 2429 2430 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack 2431 // slot as the FP is never overwritten. 2432 if (isDarwinABI) { 2433 int NewFPLoc = 2434 SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI); 2435 int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc, 2436 true); 2437 SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); 2438 Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, 2439 MachinePointerInfo::getFixedStack(NewFPIdx), 2440 false, false, 0); 2441 } 2442 } 2443 return Chain; 2444 } 2445 2446 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate 2447 /// the position of the argument. 2448 static void 2449 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, 2450 SDValue Arg, int SPDiff, unsigned ArgOffset, 2451 SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) { 2452 int Offset = ArgOffset + SPDiff; 2453 uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; 2454 int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); 2455 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 2456 SDValue FIN = DAG.getFrameIndex(FI, VT); 2457 TailCallArgumentInfo Info; 2458 Info.Arg = Arg; 2459 Info.FrameIdxOp = FIN; 2460 Info.FrameIdx = FI; 2461 TailCallArguments.push_back(Info); 2462 } 2463 2464 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address 2465 /// stack slot. Returns the chain as result and the loaded frame pointers in 2466 /// LROpOut/FPOpout. Used when tail calling. 2467 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, 2468 int SPDiff, 2469 SDValue Chain, 2470 SDValue &LROpOut, 2471 SDValue &FPOpOut, 2472 bool isDarwinABI, 2473 DebugLoc dl) const { 2474 if (SPDiff) { 2475 // Load the LR and FP stack slot for later adjusting. 2476 EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; 2477 LROpOut = getReturnAddrFrameIndex(DAG); 2478 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(), 2479 false, false, false, 0); 2480 Chain = SDValue(LROpOut.getNode(), 1); 2481 2482 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack 2483 // slot as the FP is never overwritten. 2484 if (isDarwinABI) { 2485 FPOpOut = getFramePointerFrameIndex(DAG); 2486 FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(), 2487 false, false, false, 0); 2488 Chain = SDValue(FPOpOut.getNode(), 1); 2489 } 2490 } 2491 return Chain; 2492 } 2493 2494 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 2495 /// by "Src" to address "Dst" of size "Size". Alignment information is 2496 /// specified by the specific parameter attribute. The copy will be passed as 2497 /// a byval function parameter. 2498 /// Sometimes what we are copying is the end of a larger object, the part that 2499 /// does not fit in registers. 2500 static SDValue 2501 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 2502 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 2503 DebugLoc dl) { 2504 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 2505 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 2506 false, false, MachinePointerInfo(0), 2507 MachinePointerInfo(0)); 2508 } 2509 2510 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of 2511 /// tail calls. 2512 static void 2513 LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, 2514 SDValue Arg, SDValue PtrOff, int SPDiff, 2515 unsigned ArgOffset, bool isPPC64, bool isTailCall, 2516 bool isVector, SmallVector<SDValue, 8> &MemOpChains, 2517 SmallVector<TailCallArgumentInfo, 8> &TailCallArguments, 2518 DebugLoc dl) { 2519 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2520 if (!isTailCall) { 2521 if (isVector) { 2522 SDValue StackPtr; 2523 if (isPPC64) 2524 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 2525 else 2526 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 2527 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 2528 DAG.getConstant(ArgOffset, PtrVT)); 2529 } 2530 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 2531 MachinePointerInfo(), false, false, 0)); 2532 // Calculate and remember argument location. 2533 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, 2534 TailCallArguments); 2535 } 2536 2537 static 2538 void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, 2539 DebugLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes, 2540 SDValue LROp, SDValue FPOp, bool isDarwinABI, 2541 SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) { 2542 MachineFunction &MF = DAG.getMachineFunction(); 2543 2544 // Emit a sequence of copyto/copyfrom virtual registers for arguments that 2545 // might overwrite each other in case of tail call optimization. 2546 SmallVector<SDValue, 8> MemOpChains2; 2547 // Do not flag preceding copytoreg stuff together with the following stuff. 2548 InFlag = SDValue(); 2549 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, 2550 MemOpChains2, dl); 2551 if (!MemOpChains2.empty()) 2552 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 2553 &MemOpChains2[0], MemOpChains2.size()); 2554 2555 // Store the return address to the appropriate stack slot. 2556 Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff, 2557 isPPC64, isDarwinABI, dl); 2558 2559 // Emit callseq_end just before tailcall node. 2560 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 2561 DAG.getIntPtrConstant(0, true), InFlag); 2562 InFlag = Chain.getValue(1); 2563 } 2564 2565 static 2566 unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, 2567 SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall, 2568 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, 2569 SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys, 2570 const PPCSubtarget &PPCSubTarget) { 2571 2572 bool isPPC64 = PPCSubTarget.isPPC64(); 2573 bool isSVR4ABI = PPCSubTarget.isSVR4ABI(); 2574 2575 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2576 NodeTys.push_back(MVT::Other); // Returns a chain 2577 NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. 2578 2579 unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin; 2580 2581 bool needIndirectCall = true; 2582 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { 2583 // If this is an absolute destination address, use the munged value. 2584 Callee = SDValue(Dest, 0); 2585 needIndirectCall = false; 2586 } 2587 2588 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 2589 // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201 2590 // Use indirect calls for ALL functions calls in JIT mode, since the 2591 // far-call stubs may be outside relocation limits for a BL instruction. 2592 if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) { 2593 unsigned OpFlags = 0; 2594 if (DAG.getTarget().getRelocationModel() != Reloc::Static && 2595 (PPCSubTarget.getTargetTriple().isMacOSX() && 2596 PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && 2597 (G->getGlobal()->isDeclaration() || 2598 G->getGlobal()->isWeakForLinker())) { 2599 // PC-relative references to external symbols should go through $stub, 2600 // unless we're building with the leopard linker or later, which 2601 // automatically synthesizes these stubs. 2602 OpFlags = PPCII::MO_DARWIN_STUB; 2603 } 2604 2605 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, 2606 // every direct call is) turn it into a TargetGlobalAddress / 2607 // TargetExternalSymbol node so that legalize doesn't hack it. 2608 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, 2609 Callee.getValueType(), 2610 0, OpFlags); 2611 needIndirectCall = false; 2612 } 2613 } 2614 2615 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 2616 unsigned char OpFlags = 0; 2617 2618 if (DAG.getTarget().getRelocationModel() != Reloc::Static && 2619 (PPCSubTarget.getTargetTriple().isMacOSX() && 2620 PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) { 2621 // PC-relative references to external symbols should go through $stub, 2622 // unless we're building with the leopard linker or later, which 2623 // automatically synthesizes these stubs. 2624 OpFlags = PPCII::MO_DARWIN_STUB; 2625 } 2626 2627 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), 2628 OpFlags); 2629 needIndirectCall = false; 2630 } 2631 2632 if (needIndirectCall) { 2633 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair 2634 // to do the call, we can't use PPCISD::CALL. 2635 SDValue MTCTROps[] = {Chain, Callee, InFlag}; 2636 2637 if (isSVR4ABI && isPPC64) { 2638 // Function pointers in the 64-bit SVR4 ABI do not point to the function 2639 // entry point, but to the function descriptor (the function entry point 2640 // address is part of the function descriptor though). 2641 // The function descriptor is a three doubleword structure with the 2642 // following fields: function entry point, TOC base address and 2643 // environment pointer. 2644 // Thus for a call through a function pointer, the following actions need 2645 // to be performed: 2646 // 1. Save the TOC of the caller in the TOC save area of its stack 2647 // frame (this is done in LowerCall_Darwin()). 2648 // 2. Load the address of the function entry point from the function 2649 // descriptor. 2650 // 3. Load the TOC of the callee from the function descriptor into r2. 2651 // 4. Load the environment pointer from the function descriptor into 2652 // r11. 2653 // 5. Branch to the function entry point address. 2654 // 6. On return of the callee, the TOC of the caller needs to be 2655 // restored (this is done in FinishCall()). 2656 // 2657 // All those operations are flagged together to ensure that no other 2658 // operations can be scheduled in between. E.g. without flagging the 2659 // operations together, a TOC access in the caller could be scheduled 2660 // between the load of the callee TOC and the branch to the callee, which 2661 // results in the TOC access going through the TOC of the callee instead 2662 // of going through the TOC of the caller, which leads to incorrect code. 2663 2664 // Load the address of the function entry point from the function 2665 // descriptor. 2666 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue); 2667 SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps, 2668 InFlag.getNode() ? 3 : 2); 2669 Chain = LoadFuncPtr.getValue(1); 2670 InFlag = LoadFuncPtr.getValue(2); 2671 2672 // Load environment pointer into r11. 2673 // Offset of the environment pointer within the function descriptor. 2674 SDValue PtrOff = DAG.getIntPtrConstant(16); 2675 2676 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); 2677 SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr, 2678 InFlag); 2679 Chain = LoadEnvPtr.getValue(1); 2680 InFlag = LoadEnvPtr.getValue(2); 2681 2682 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, 2683 InFlag); 2684 Chain = EnvVal.getValue(0); 2685 InFlag = EnvVal.getValue(1); 2686 2687 // Load TOC of the callee into r2. We are using a target-specific load 2688 // with r2 hard coded, because the result of a target-independent load 2689 // would never go directly into r2, since r2 is a reserved register (which 2690 // prevents the register allocator from allocating it), resulting in an 2691 // additional register being allocated and an unnecessary move instruction 2692 // being generated. 2693 VTs = DAG.getVTList(MVT::Other, MVT::Glue); 2694 SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, 2695 Callee, InFlag); 2696 Chain = LoadTOCPtr.getValue(0); 2697 InFlag = LoadTOCPtr.getValue(1); 2698 2699 MTCTROps[0] = Chain; 2700 MTCTROps[1] = LoadFuncPtr; 2701 MTCTROps[2] = InFlag; 2702 } 2703 2704 Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps, 2705 2 + (InFlag.getNode() != 0)); 2706 InFlag = Chain.getValue(1); 2707 2708 NodeTys.clear(); 2709 NodeTys.push_back(MVT::Other); 2710 NodeTys.push_back(MVT::Glue); 2711 Ops.push_back(Chain); 2712 CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin; 2713 Callee.setNode(0); 2714 // Add CTR register as callee so a bctr can be emitted later. 2715 if (isTailCall) 2716 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); 2717 } 2718 2719 // If this is a direct call, pass the chain and the callee. 2720 if (Callee.getNode()) { 2721 Ops.push_back(Chain); 2722 Ops.push_back(Callee); 2723 } 2724 // If this is a tail call add stack pointer delta. 2725 if (isTailCall) 2726 Ops.push_back(DAG.getConstant(SPDiff, MVT::i32)); 2727 2728 // Add argument registers to the end of the list so that they are known live 2729 // into the call. 2730 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 2731 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 2732 RegsToPass[i].second.getValueType())); 2733 2734 return CallOpc; 2735 } 2736 2737 SDValue 2738 PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 2739 CallingConv::ID CallConv, bool isVarArg, 2740 const SmallVectorImpl<ISD::InputArg> &Ins, 2741 DebugLoc dl, SelectionDAG &DAG, 2742 SmallVectorImpl<SDValue> &InVals) const { 2743 2744 SmallVector<CCValAssign, 16> RVLocs; 2745 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2746 getTargetMachine(), RVLocs, *DAG.getContext()); 2747 CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC); 2748 2749 // Copy all of the result registers out of their specified physreg. 2750 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 2751 CCValAssign &VA = RVLocs[i]; 2752 EVT VT = VA.getValVT(); 2753 assert(VA.isRegLoc() && "Can only return in registers!"); 2754 Chain = DAG.getCopyFromReg(Chain, dl, 2755 VA.getLocReg(), VT, InFlag).getValue(1); 2756 InVals.push_back(Chain.getValue(0)); 2757 InFlag = Chain.getValue(2); 2758 } 2759 2760 return Chain; 2761 } 2762 2763 SDValue 2764 PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, 2765 bool isTailCall, bool isVarArg, 2766 SelectionDAG &DAG, 2767 SmallVector<std::pair<unsigned, SDValue>, 8> 2768 &RegsToPass, 2769 SDValue InFlag, SDValue Chain, 2770 SDValue &Callee, 2771 int SPDiff, unsigned NumBytes, 2772 const SmallVectorImpl<ISD::InputArg> &Ins, 2773 SmallVectorImpl<SDValue> &InVals) const { 2774 std::vector<EVT> NodeTys; 2775 SmallVector<SDValue, 8> Ops; 2776 unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff, 2777 isTailCall, RegsToPass, Ops, NodeTys, 2778 PPCSubTarget); 2779 2780 // When performing tail call optimization the callee pops its arguments off 2781 // the stack. Account for this here so these bytes can be pushed back on in 2782 // PPCRegisterInfo::eliminateCallFramePseudoInstr. 2783 int BytesCalleePops = 2784 (CallConv == CallingConv::Fast && 2785 getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; 2786 2787 // Add a register mask operand representing the call-preserved registers. 2788 const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); 2789 const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); 2790 assert(Mask && "Missing call preserved mask for calling convention"); 2791 Ops.push_back(DAG.getRegisterMask(Mask)); 2792 2793 if (InFlag.getNode()) 2794 Ops.push_back(InFlag); 2795 2796 // Emit tail call. 2797 if (isTailCall) { 2798 // If this is the first return lowered for this function, add the regs 2799 // to the liveout set for the function. 2800 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 2801 SmallVector<CCValAssign, 16> RVLocs; 2802 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2803 getTargetMachine(), RVLocs, *DAG.getContext()); 2804 CCInfo.AnalyzeCallResult(Ins, RetCC_PPC); 2805 for (unsigned i = 0; i != RVLocs.size(); ++i) 2806 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 2807 } 2808 2809 assert(((Callee.getOpcode() == ISD::Register && 2810 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || 2811 Callee.getOpcode() == ISD::TargetExternalSymbol || 2812 Callee.getOpcode() == ISD::TargetGlobalAddress || 2813 isa<ConstantSDNode>(Callee)) && 2814 "Expecting an global address, external symbol, absolute value or register"); 2815 2816 return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size()); 2817 } 2818 2819 // Add a NOP immediately after the branch instruction when using the 64-bit 2820 // SVR4 ABI. At link time, if caller and callee are in a different module and 2821 // thus have a different TOC, the call will be replaced with a call to a stub 2822 // function which saves the current TOC, loads the TOC of the callee and 2823 // branches to the callee. The NOP will be replaced with a load instruction 2824 // which restores the TOC of the caller from the TOC save slot of the current 2825 // stack frame. If caller and callee belong to the same module (and have the 2826 // same TOC), the NOP will remain unchanged. 2827 2828 bool needsTOCRestore = false; 2829 if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) { 2830 if (CallOpc == PPCISD::BCTRL_SVR4) { 2831 // This is a call through a function pointer. 2832 // Restore the caller TOC from the save area into R2. 2833 // See PrepareCall() for more information about calls through function 2834 // pointers in the 64-bit SVR4 ABI. 2835 // We are using a target-specific load with r2 hard coded, because the 2836 // result of a target-independent load would never go directly into r2, 2837 // since r2 is a reserved register (which prevents the register allocator 2838 // from allocating it), resulting in an additional register being 2839 // allocated and an unnecessary move instruction being generated. 2840 needsTOCRestore = true; 2841 } else if (CallOpc == PPCISD::CALL_SVR4) { 2842 // Otherwise insert NOP. 2843 CallOpc = PPCISD::CALL_NOP_SVR4; 2844 } 2845 } 2846 2847 Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); 2848 InFlag = Chain.getValue(1); 2849 2850 if (needsTOCRestore) { 2851 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 2852 Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag); 2853 InFlag = Chain.getValue(1); 2854 } 2855 2856 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 2857 DAG.getIntPtrConstant(BytesCalleePops, true), 2858 InFlag); 2859 if (!Ins.empty()) 2860 InFlag = Chain.getValue(1); 2861 2862 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, 2863 Ins, dl, DAG, InVals); 2864 } 2865 2866 SDValue 2867 PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee, 2868 CallingConv::ID CallConv, bool isVarArg, 2869 bool doesNotRet, bool &isTailCall, 2870 const SmallVectorImpl<ISD::OutputArg> &Outs, 2871 const SmallVectorImpl<SDValue> &OutVals, 2872 const SmallVectorImpl<ISD::InputArg> &Ins, 2873 DebugLoc dl, SelectionDAG &DAG, 2874 SmallVectorImpl<SDValue> &InVals) const { 2875 if (isTailCall) 2876 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, 2877 Ins, DAG); 2878 2879 if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) 2880 return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg, 2881 isTailCall, Outs, OutVals, Ins, 2882 dl, DAG, InVals); 2883 2884 return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, 2885 isTailCall, Outs, OutVals, Ins, 2886 dl, DAG, InVals); 2887 } 2888 2889 SDValue 2890 PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, 2891 CallingConv::ID CallConv, bool isVarArg, 2892 bool isTailCall, 2893 const SmallVectorImpl<ISD::OutputArg> &Outs, 2894 const SmallVectorImpl<SDValue> &OutVals, 2895 const SmallVectorImpl<ISD::InputArg> &Ins, 2896 DebugLoc dl, SelectionDAG &DAG, 2897 SmallVectorImpl<SDValue> &InVals) const { 2898 // See PPCTargetLowering::LowerFormalArguments_SVR4() for a description 2899 // of the 32-bit SVR4 ABI stack frame layout. 2900 2901 assert((CallConv == CallingConv::C || 2902 CallConv == CallingConv::Fast) && "Unknown calling convention!"); 2903 2904 unsigned PtrByteSize = 4; 2905 2906 MachineFunction &MF = DAG.getMachineFunction(); 2907 2908 // Mark this function as potentially containing a function that contains a 2909 // tail call. As a consequence the frame pointer will be used for dynamicalloc 2910 // and restoring the callers stack pointer in this functions epilog. This is 2911 // done because by tail calling the called function might overwrite the value 2912 // in this function's (MF) stack pointer stack slot 0(SP). 2913 if (getTargetMachine().Options.GuaranteedTailCallOpt && 2914 CallConv == CallingConv::Fast) 2915 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 2916 2917 // Count how many bytes are to be pushed on the stack, including the linkage 2918 // area, parameter list area and the part of the local variable space which 2919 // contains copies of aggregates which are passed by value. 2920 2921 // Assign locations to all of the outgoing arguments. 2922 SmallVector<CCValAssign, 16> ArgLocs; 2923 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2924 getTargetMachine(), ArgLocs, *DAG.getContext()); 2925 2926 // Reserve space for the linkage area on the stack. 2927 CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); 2928 2929 if (isVarArg) { 2930 // Handle fixed and variable vector arguments differently. 2931 // Fixed vector arguments go into registers as long as registers are 2932 // available. Variable vector arguments always go into memory. 2933 unsigned NumArgs = Outs.size(); 2934 2935 for (unsigned i = 0; i != NumArgs; ++i) { 2936 MVT ArgVT = Outs[i].VT; 2937 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 2938 bool Result; 2939 2940 if (Outs[i].IsFixed) { 2941 Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, 2942 CCInfo); 2943 } else { 2944 Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, 2945 ArgFlags, CCInfo); 2946 } 2947 2948 if (Result) { 2949 #ifndef NDEBUG 2950 errs() << "Call operand #" << i << " has unhandled type " 2951 << EVT(ArgVT).getEVTString() << "\n"; 2952 #endif 2953 llvm_unreachable(0); 2954 } 2955 } 2956 } else { 2957 // All arguments are treated the same. 2958 CCInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4); 2959 } 2960 2961 // Assign locations to all of the outgoing aggregate by value arguments. 2962 SmallVector<CCValAssign, 16> ByValArgLocs; 2963 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2964 getTargetMachine(), ByValArgLocs, *DAG.getContext()); 2965 2966 // Reserve stack space for the allocations in CCInfo. 2967 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 2968 2969 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4_ByVal); 2970 2971 // Size of the linkage area, parameter list area and the part of the local 2972 // space variable where copies of aggregates which are passed by value are 2973 // stored. 2974 unsigned NumBytes = CCByValInfo.getNextStackOffset(); 2975 2976 // Calculate by how many bytes the stack has to be adjusted in case of tail 2977 // call optimization. 2978 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 2979 2980 // Adjust the stack pointer for the new arguments... 2981 // These operations are automatically eliminated by the prolog/epilog pass 2982 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 2983 SDValue CallSeqStart = Chain; 2984 2985 // Load the return address and frame pointer so it can be moved somewhere else 2986 // later. 2987 SDValue LROp, FPOp; 2988 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false, 2989 dl); 2990 2991 // Set up a copy of the stack pointer for use loading and storing any 2992 // arguments that may not fit in the registers available for argument 2993 // passing. 2994 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 2995 2996 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 2997 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 2998 SmallVector<SDValue, 8> MemOpChains; 2999 3000 bool seenFloatArg = false; 3001 // Walk the register/memloc assignments, inserting copies/loads. 3002 for (unsigned i = 0, j = 0, e = ArgLocs.size(); 3003 i != e; 3004 ++i) { 3005 CCValAssign &VA = ArgLocs[i]; 3006 SDValue Arg = OutVals[i]; 3007 ISD::ArgFlagsTy Flags = Outs[i].Flags; 3008 3009 if (Flags.isByVal()) { 3010 // Argument is an aggregate which is passed by value, thus we need to 3011 // create a copy of it in the local variable space of the current stack 3012 // frame (which is the stack frame of the caller) and pass the address of 3013 // this copy to the callee. 3014 assert((j < ByValArgLocs.size()) && "Index out of bounds!"); 3015 CCValAssign &ByValVA = ByValArgLocs[j++]; 3016 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); 3017 3018 // Memory reserved in the local variable space of the callers stack frame. 3019 unsigned LocMemOffset = ByValVA.getLocMemOffset(); 3020 3021 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 3022 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 3023 3024 // Create a copy of the argument in the local area of the current 3025 // stack frame. 3026 SDValue MemcpyCall = 3027 CreateCopyOfByValArgument(Arg, PtrOff, 3028 CallSeqStart.getNode()->getOperand(0), 3029 Flags, DAG, dl); 3030 3031 // This must go outside the CALLSEQ_START..END. 3032 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 3033 CallSeqStart.getNode()->getOperand(1)); 3034 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 3035 NewCallSeqStart.getNode()); 3036 Chain = CallSeqStart = NewCallSeqStart; 3037 3038 // Pass the address of the aggregate copy on the stack either in a 3039 // physical register or in the parameter list area of the current stack 3040 // frame to the callee. 3041 Arg = PtrOff; 3042 } 3043 3044 if (VA.isRegLoc()) { 3045 seenFloatArg |= VA.getLocVT().isFloatingPoint(); 3046 // Put argument in a physical register. 3047 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 3048 } else { 3049 // Put argument in the parameter list area of the current stack frame. 3050 assert(VA.isMemLoc()); 3051 unsigned LocMemOffset = VA.getLocMemOffset(); 3052 3053 if (!isTailCall) { 3054 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 3055 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 3056 3057 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 3058 MachinePointerInfo(), 3059 false, false, 0)); 3060 } else { 3061 // Calculate and remember argument location. 3062 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, 3063 TailCallArguments); 3064 } 3065 } 3066 } 3067 3068 if (!MemOpChains.empty()) 3069 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 3070 &MemOpChains[0], MemOpChains.size()); 3071 3072 // Set CR6 to true if this is a vararg call with floating args passed in 3073 // registers. 3074 if (isVarArg) { 3075 SDValue SetCR(DAG.getMachineNode(seenFloatArg ? PPC::CRSET : PPC::CRUNSET, 3076 dl, MVT::i32), 0); 3077 RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR)); 3078 } 3079 3080 // Build a sequence of copy-to-reg nodes chained together with token chain 3081 // and flag operands which copy the outgoing args into the appropriate regs. 3082 SDValue InFlag; 3083 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 3084 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 3085 RegsToPass[i].second, InFlag); 3086 InFlag = Chain.getValue(1); 3087 } 3088 3089 if (isTailCall) 3090 PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, 3091 false, TailCallArguments); 3092 3093 return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, 3094 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 3095 Ins, InVals); 3096 } 3097 3098 SDValue 3099 PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, 3100 CallingConv::ID CallConv, bool isVarArg, 3101 bool isTailCall, 3102 const SmallVectorImpl<ISD::OutputArg> &Outs, 3103 const SmallVectorImpl<SDValue> &OutVals, 3104 const SmallVectorImpl<ISD::InputArg> &Ins, 3105 DebugLoc dl, SelectionDAG &DAG, 3106 SmallVectorImpl<SDValue> &InVals) const { 3107 3108 unsigned NumOps = Outs.size(); 3109 3110 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3111 bool isPPC64 = PtrVT == MVT::i64; 3112 unsigned PtrByteSize = isPPC64 ? 8 : 4; 3113 3114 MachineFunction &MF = DAG.getMachineFunction(); 3115 3116 // Mark this function as potentially containing a function that contains a 3117 // tail call. As a consequence the frame pointer will be used for dynamicalloc 3118 // and restoring the callers stack pointer in this functions epilog. This is 3119 // done because by tail calling the called function might overwrite the value 3120 // in this function's (MF) stack pointer stack slot 0(SP). 3121 if (getTargetMachine().Options.GuaranteedTailCallOpt && 3122 CallConv == CallingConv::Fast) 3123 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 3124 3125 unsigned nAltivecParamsAtEnd = 0; 3126 3127 // Count how many bytes are to be pushed on the stack, including the linkage 3128 // area, and parameter passing area. We start with 24/48 bytes, which is 3129 // prereserved space for [SP][CR][LR][3 x unused]. 3130 unsigned NumBytes = 3131 CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv, 3132 Outs, OutVals, 3133 nAltivecParamsAtEnd); 3134 3135 // Calculate by how many bytes the stack has to be adjusted in case of tail 3136 // call optimization. 3137 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 3138 3139 // To protect arguments on the stack from being clobbered in a tail call, 3140 // force all the loads to happen before doing any other lowering. 3141 if (isTailCall) 3142 Chain = DAG.getStackArgumentTokenFactor(Chain); 3143 3144 // Adjust the stack pointer for the new arguments... 3145 // These operations are automatically eliminated by the prolog/epilog pass 3146 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 3147 SDValue CallSeqStart = Chain; 3148 3149 // Load the return address and frame pointer so it can be move somewhere else 3150 // later. 3151 SDValue LROp, FPOp; 3152 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, 3153 dl); 3154 3155 // Set up a copy of the stack pointer for use loading and storing any 3156 // arguments that may not fit in the registers available for argument 3157 // passing. 3158 SDValue StackPtr; 3159 if (isPPC64) 3160 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 3161 else 3162 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 3163 3164 // Figure out which arguments are going to go in registers, and which in 3165 // memory. Also, if this is a vararg function, floating point operations 3166 // must be stored to our stack, and loaded into integer regs as well, if 3167 // any integer regs are available for argument passing. 3168 unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); 3169 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 3170 3171 static const uint16_t GPR_32[] = { // 32-bit registers. 3172 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 3173 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 3174 }; 3175 static const uint16_t GPR_64[] = { // 64-bit registers. 3176 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 3177 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 3178 }; 3179 static const uint16_t *FPR = GetFPR(); 3180 3181 static const uint16_t VR[] = { 3182 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 3183 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 3184 }; 3185 const unsigned NumGPRs = array_lengthof(GPR_32); 3186 const unsigned NumFPRs = 13; 3187 const unsigned NumVRs = array_lengthof(VR); 3188 3189 const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32; 3190 3191 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 3192 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 3193 3194 SmallVector<SDValue, 8> MemOpChains; 3195 for (unsigned i = 0; i != NumOps; ++i) { 3196 SDValue Arg = OutVals[i]; 3197 ISD::ArgFlagsTy Flags = Outs[i].Flags; 3198 3199 // PtrOff will be used to store the current argument to the stack if a 3200 // register cannot be found for it. 3201 SDValue PtrOff; 3202 3203 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 3204 3205 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 3206 3207 // On PPC64, promote integers to 64-bit values. 3208 if (isPPC64 && Arg.getValueType() == MVT::i32) { 3209 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 3210 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 3211 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 3212 } 3213 3214 // FIXME memcpy is used way more than necessary. Correctness first. 3215 if (Flags.isByVal()) { 3216 unsigned Size = Flags.getByValSize(); 3217 if (Size==1 || Size==2) { 3218 // Very small objects are passed right-justified. 3219 // Everything else is passed left-justified. 3220 EVT VT = (Size==1) ? MVT::i8 : MVT::i16; 3221 if (GPR_idx != NumGPRs) { 3222 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 3223 MachinePointerInfo(), VT, 3224 false, false, 0); 3225 MemOpChains.push_back(Load.getValue(1)); 3226 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 3227 3228 ArgOffset += PtrByteSize; 3229 } else { 3230 SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType()); 3231 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 3232 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr, 3233 CallSeqStart.getNode()->getOperand(0), 3234 Flags, DAG, dl); 3235 // This must go outside the CALLSEQ_START..END. 3236 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 3237 CallSeqStart.getNode()->getOperand(1)); 3238 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 3239 NewCallSeqStart.getNode()); 3240 Chain = CallSeqStart = NewCallSeqStart; 3241 ArgOffset += PtrByteSize; 3242 } 3243 continue; 3244 } 3245 // Copy entire object into memory. There are cases where gcc-generated 3246 // code assumes it is there, even if it could be put entirely into 3247 // registers. (This is not what the doc says.) 3248 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, 3249 CallSeqStart.getNode()->getOperand(0), 3250 Flags, DAG, dl); 3251 // This must go outside the CALLSEQ_START..END. 3252 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 3253 CallSeqStart.getNode()->getOperand(1)); 3254 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode()); 3255 Chain = CallSeqStart = NewCallSeqStart; 3256 // And copy the pieces of it that fit into registers. 3257 for (unsigned j=0; j<Size; j+=PtrByteSize) { 3258 SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); 3259 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 3260 if (GPR_idx != NumGPRs) { 3261 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 3262 MachinePointerInfo(), 3263 false, false, false, 0); 3264 MemOpChains.push_back(Load.getValue(1)); 3265 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 3266 ArgOffset += PtrByteSize; 3267 } else { 3268 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 3269 break; 3270 } 3271 } 3272 continue; 3273 } 3274 3275 switch (Arg.getValueType().getSimpleVT().SimpleTy) { 3276 default: llvm_unreachable("Unexpected ValueType for argument!"); 3277 case MVT::i32: 3278 case MVT::i64: 3279 if (GPR_idx != NumGPRs) { 3280 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 3281 } else { 3282 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 3283 isPPC64, isTailCall, false, MemOpChains, 3284 TailCallArguments, dl); 3285 } 3286 ArgOffset += PtrByteSize; 3287 break; 3288 case MVT::f32: 3289 case MVT::f64: 3290 if (FPR_idx != NumFPRs) { 3291 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 3292 3293 if (isVarArg) { 3294 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 3295 MachinePointerInfo(), false, false, 0); 3296 MemOpChains.push_back(Store); 3297 3298 // Float varargs are always shadowed in available integer registers 3299 if (GPR_idx != NumGPRs) { 3300 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 3301 MachinePointerInfo(), false, false, 3302 false, 0); 3303 MemOpChains.push_back(Load.getValue(1)); 3304 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 3305 } 3306 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ 3307 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 3308 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 3309 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 3310 MachinePointerInfo(), 3311 false, false, false, 0); 3312 MemOpChains.push_back(Load.getValue(1)); 3313 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 3314 } 3315 } else { 3316 // If we have any FPRs remaining, we may also have GPRs remaining. 3317 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 3318 // GPRs. 3319 if (GPR_idx != NumGPRs) 3320 ++GPR_idx; 3321 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && 3322 !isPPC64) // PPC64 has 64-bit GPR's obviously :) 3323 ++GPR_idx; 3324 } 3325 } else { 3326 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 3327 isPPC64, isTailCall, false, MemOpChains, 3328 TailCallArguments, dl); 3329 } 3330 if (isPPC64) 3331 ArgOffset += 8; 3332 else 3333 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; 3334 break; 3335 case MVT::v4f32: 3336 case MVT::v4i32: 3337 case MVT::v8i16: 3338 case MVT::v16i8: 3339 if (isVarArg) { 3340 // These go aligned on the stack, or in the corresponding R registers 3341 // when within range. The Darwin PPC ABI doc claims they also go in 3342 // V registers; in fact gcc does this only for arguments that are 3343 // prototyped, not for those that match the ... We do it for all 3344 // arguments, seems to work. 3345 while (ArgOffset % 16 !=0) { 3346 ArgOffset += PtrByteSize; 3347 if (GPR_idx != NumGPRs) 3348 GPR_idx++; 3349 } 3350 // We could elide this store in the case where the object fits 3351 // entirely in R registers. Maybe later. 3352 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 3353 DAG.getConstant(ArgOffset, PtrVT)); 3354 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 3355 MachinePointerInfo(), false, false, 0); 3356 MemOpChains.push_back(Store); 3357 if (VR_idx != NumVRs) { 3358 SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, 3359 MachinePointerInfo(), 3360 false, false, false, 0); 3361 MemOpChains.push_back(Load.getValue(1)); 3362 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 3363 } 3364 ArgOffset += 16; 3365 for (unsigned i=0; i<16; i+=PtrByteSize) { 3366 if (GPR_idx == NumGPRs) 3367 break; 3368 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 3369 DAG.getConstant(i, PtrVT)); 3370 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), 3371 false, false, false, 0); 3372 MemOpChains.push_back(Load.getValue(1)); 3373 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 3374 } 3375 break; 3376 } 3377 3378 // Non-varargs Altivec params generally go in registers, but have 3379 // stack space allocated at the end. 3380 if (VR_idx != NumVRs) { 3381 // Doesn't have GPR space allocated. 3382 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 3383 } else if (nAltivecParamsAtEnd==0) { 3384 // We are emitting Altivec params in order. 3385 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 3386 isPPC64, isTailCall, true, MemOpChains, 3387 TailCallArguments, dl); 3388 ArgOffset += 16; 3389 } 3390 break; 3391 } 3392 } 3393 // If all Altivec parameters fit in registers, as they usually do, 3394 // they get stack space following the non-Altivec parameters. We 3395 // don't track this here because nobody below needs it. 3396 // If there are more Altivec parameters than fit in registers emit 3397 // the stores here. 3398 if (!isVarArg && nAltivecParamsAtEnd > NumVRs) { 3399 unsigned j = 0; 3400 // Offset is aligned; skip 1st 12 params which go in V registers. 3401 ArgOffset = ((ArgOffset+15)/16)*16; 3402 ArgOffset += 12*16; 3403 for (unsigned i = 0; i != NumOps; ++i) { 3404 SDValue Arg = OutVals[i]; 3405 EVT ArgType = Outs[i].VT; 3406 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || 3407 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { 3408 if (++j > NumVRs) { 3409 SDValue PtrOff; 3410 // We are emitting Altivec params in order. 3411 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 3412 isPPC64, isTailCall, true, MemOpChains, 3413 TailCallArguments, dl); 3414 ArgOffset += 16; 3415 } 3416 } 3417 } 3418 } 3419 3420 if (!MemOpChains.empty()) 3421 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 3422 &MemOpChains[0], MemOpChains.size()); 3423 3424 // Check if this is an indirect call (MTCTR/BCTRL). 3425 // See PrepareCall() for more information about calls through function 3426 // pointers in the 64-bit SVR4 ABI. 3427 if (!isTailCall && isPPC64 && PPCSubTarget.isSVR4ABI() && 3428 !dyn_cast<GlobalAddressSDNode>(Callee) && 3429 !dyn_cast<ExternalSymbolSDNode>(Callee) && 3430 !isBLACompatibleAddress(Callee, DAG)) { 3431 // Load r2 into a virtual register and store it to the TOC save area. 3432 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); 3433 // TOC save area offset. 3434 SDValue PtrOff = DAG.getIntPtrConstant(40); 3435 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 3436 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(), 3437 false, false, 0); 3438 } 3439 3440 // On Darwin, R12 must contain the address of an indirect callee. This does 3441 // not mean the MTCTR instruction must use R12; it's easier to model this as 3442 // an extra parameter, so do that. 3443 if (!isTailCall && 3444 !dyn_cast<GlobalAddressSDNode>(Callee) && 3445 !dyn_cast<ExternalSymbolSDNode>(Callee) && 3446 !isBLACompatibleAddress(Callee, DAG)) 3447 RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 : 3448 PPC::R12), Callee)); 3449 3450 // Build a sequence of copy-to-reg nodes chained together with token chain 3451 // and flag operands which copy the outgoing args into the appropriate regs. 3452 SDValue InFlag; 3453 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 3454 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 3455 RegsToPass[i].second, InFlag); 3456 InFlag = Chain.getValue(1); 3457 } 3458 3459 if (isTailCall) 3460 PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp, 3461 FPOp, true, TailCallArguments); 3462 3463 return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, 3464 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 3465 Ins, InVals); 3466 } 3467 3468 bool 3469 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 3470 MachineFunction &MF, bool isVarArg, 3471 const SmallVectorImpl<ISD::OutputArg> &Outs, 3472 LLVMContext &Context) const { 3473 SmallVector<CCValAssign, 16> RVLocs; 3474 CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), 3475 RVLocs, Context); 3476 return CCInfo.CheckReturn(Outs, RetCC_PPC); 3477 } 3478 3479 SDValue 3480 PPCTargetLowering::LowerReturn(SDValue Chain, 3481 CallingConv::ID CallConv, bool isVarArg, 3482 const SmallVectorImpl<ISD::OutputArg> &Outs, 3483 const SmallVectorImpl<SDValue> &OutVals, 3484 DebugLoc dl, SelectionDAG &DAG) const { 3485 3486 SmallVector<CCValAssign, 16> RVLocs; 3487 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3488 getTargetMachine(), RVLocs, *DAG.getContext()); 3489 CCInfo.AnalyzeReturn(Outs, RetCC_PPC); 3490 3491 // If this is the first return lowered for this function, add the regs to the 3492 // liveout set for the function. 3493 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 3494 for (unsigned i = 0; i != RVLocs.size(); ++i) 3495 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 3496 } 3497 3498 SDValue Flag; 3499 3500 // Copy the result values into the output registers. 3501 for (unsigned i = 0; i != RVLocs.size(); ++i) { 3502 CCValAssign &VA = RVLocs[i]; 3503 assert(VA.isRegLoc() && "Can only return in registers!"); 3504 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 3505 OutVals[i], Flag); 3506 Flag = Chain.getValue(1); 3507 } 3508 3509 if (Flag.getNode()) 3510 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag); 3511 else 3512 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain); 3513 } 3514 3515 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, 3516 const PPCSubtarget &Subtarget) const { 3517 // When we pop the dynamic allocation we need to restore the SP link. 3518 DebugLoc dl = Op.getDebugLoc(); 3519 3520 // Get the corect type for pointers. 3521 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3522 3523 // Construct the stack pointer operand. 3524 bool isPPC64 = Subtarget.isPPC64(); 3525 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1; 3526 SDValue StackPtr = DAG.getRegister(SP, PtrVT); 3527 3528 // Get the operands for the STACKRESTORE. 3529 SDValue Chain = Op.getOperand(0); 3530 SDValue SaveSP = Op.getOperand(1); 3531 3532 // Load the old link SP. 3533 SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, 3534 MachinePointerInfo(), 3535 false, false, false, 0); 3536 3537 // Restore the stack pointer. 3538 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); 3539 3540 // Store the old link SP. 3541 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(), 3542 false, false, 0); 3543 } 3544 3545 3546 3547 SDValue 3548 PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { 3549 MachineFunction &MF = DAG.getMachineFunction(); 3550 bool isPPC64 = PPCSubTarget.isPPC64(); 3551 bool isDarwinABI = PPCSubTarget.isDarwinABI(); 3552 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3553 3554 // Get current frame pointer save index. The users of this index will be 3555 // primarily DYNALLOC instructions. 3556 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 3557 int RASI = FI->getReturnAddrSaveIndex(); 3558 3559 // If the frame pointer save index hasn't been defined yet. 3560 if (!RASI) { 3561 // Find out what the fix offset of the frame pointer save area. 3562 int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI); 3563 // Allocate the frame index for frame pointer save area. 3564 RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true); 3565 // Save the result. 3566 FI->setReturnAddrSaveIndex(RASI); 3567 } 3568 return DAG.getFrameIndex(RASI, PtrVT); 3569 } 3570 3571 SDValue 3572 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { 3573 MachineFunction &MF = DAG.getMachineFunction(); 3574 bool isPPC64 = PPCSubTarget.isPPC64(); 3575 bool isDarwinABI = PPCSubTarget.isDarwinABI(); 3576 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3577 3578 // Get current frame pointer save index. The users of this index will be 3579 // primarily DYNALLOC instructions. 3580 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 3581 int FPSI = FI->getFramePointerSaveIndex(); 3582 3583 // If the frame pointer save index hasn't been defined yet. 3584 if (!FPSI) { 3585 // Find out what the fix offset of the frame pointer save area. 3586 int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, 3587 isDarwinABI); 3588 3589 // Allocate the frame index for frame pointer save area. 3590 FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 3591 // Save the result. 3592 FI->setFramePointerSaveIndex(FPSI); 3593 } 3594 return DAG.getFrameIndex(FPSI, PtrVT); 3595 } 3596 3597 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 3598 SelectionDAG &DAG, 3599 const PPCSubtarget &Subtarget) const { 3600 // Get the inputs. 3601 SDValue Chain = Op.getOperand(0); 3602 SDValue Size = Op.getOperand(1); 3603 DebugLoc dl = Op.getDebugLoc(); 3604 3605 // Get the corect type for pointers. 3606 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3607 // Negate the size. 3608 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, 3609 DAG.getConstant(0, PtrVT), Size); 3610 // Construct a node for the frame pointer save index. 3611 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 3612 // Build a DYNALLOC node. 3613 SDValue Ops[3] = { Chain, NegSize, FPSIdx }; 3614 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); 3615 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3); 3616 } 3617 3618 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 3619 /// possible. 3620 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 3621 // Not FP? Not a fsel. 3622 if (!Op.getOperand(0).getValueType().isFloatingPoint() || 3623 !Op.getOperand(2).getValueType().isFloatingPoint()) 3624 return Op; 3625 3626 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 3627 3628 // Cannot handle SETEQ/SETNE. 3629 if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op; 3630 3631 EVT ResVT = Op.getValueType(); 3632 EVT CmpVT = Op.getOperand(0).getValueType(); 3633 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 3634 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); 3635 DebugLoc dl = Op.getDebugLoc(); 3636 3637 // If the RHS of the comparison is a 0.0, we don't need to do the 3638 // subtraction at all. 3639 if (isFloatingPointZero(RHS)) 3640 switch (CC) { 3641 default: break; // SETUO etc aren't handled by fsel. 3642 case ISD::SETULT: 3643 case ISD::SETLT: 3644 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 3645 case ISD::SETOGE: 3646 case ISD::SETGE: 3647 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 3648 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 3649 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 3650 case ISD::SETUGT: 3651 case ISD::SETGT: 3652 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 3653 case ISD::SETOLE: 3654 case ISD::SETLE: 3655 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 3656 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 3657 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 3658 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); 3659 } 3660 3661 SDValue Cmp; 3662 switch (CC) { 3663 default: break; // SETUO etc aren't handled by fsel. 3664 case ISD::SETULT: 3665 case ISD::SETLT: 3666 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 3667 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 3668 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 3669 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 3670 case ISD::SETOGE: 3671 case ISD::SETGE: 3672 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 3673 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 3674 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 3675 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 3676 case ISD::SETUGT: 3677 case ISD::SETGT: 3678 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); 3679 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 3680 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 3681 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 3682 case ISD::SETOLE: 3683 case ISD::SETLE: 3684 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); 3685 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 3686 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 3687 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 3688 } 3689 return Op; 3690 } 3691 3692 // FIXME: Split this code up when LegalizeDAGTypes lands. 3693 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, 3694 DebugLoc dl) const { 3695 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 3696 SDValue Src = Op.getOperand(0); 3697 if (Src.getValueType() == MVT::f32) 3698 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 3699 3700 SDValue Tmp; 3701 switch (Op.getValueType().getSimpleVT().SimpleTy) { 3702 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); 3703 case MVT::i32: 3704 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ : 3705 PPCISD::FCTIDZ, 3706 dl, MVT::f64, Src); 3707 break; 3708 case MVT::i64: 3709 Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src); 3710 break; 3711 } 3712 3713 // Convert the FP value to an int value through memory. 3714 SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64); 3715 3716 // Emit a store to the stack slot. 3717 SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, 3718 MachinePointerInfo(), false, false, 0); 3719 3720 // Result is a load from the stack slot. If loading 4 bytes, make sure to 3721 // add in a bias. 3722 if (Op.getValueType() == MVT::i32) 3723 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, 3724 DAG.getConstant(4, FIPtr.getValueType())); 3725 return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(), 3726 false, false, false, 0); 3727 } 3728 3729 SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, 3730 SelectionDAG &DAG) const { 3731 DebugLoc dl = Op.getDebugLoc(); 3732 // Don't handle ppc_fp128 here; let it be lowered to a libcall. 3733 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 3734 return SDValue(); 3735 3736 if (Op.getOperand(0).getValueType() == MVT::i64) { 3737 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0)); 3738 SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits); 3739 if (Op.getValueType() == MVT::f32) 3740 FP = DAG.getNode(ISD::FP_ROUND, dl, 3741 MVT::f32, FP, DAG.getIntPtrConstant(0)); 3742 return FP; 3743 } 3744 3745 assert(Op.getOperand(0).getValueType() == MVT::i32 && 3746 "Unhandled SINT_TO_FP type in custom expander!"); 3747 // Since we only generate this in 64-bit mode, we can take advantage of 3748 // 64-bit registers. In particular, sign extend the input value into the 3749 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 3750 // then lfd it and fcfid it. 3751 MachineFunction &MF = DAG.getMachineFunction(); 3752 MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 3753 int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); 3754 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3755 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 3756 3757 SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32, 3758 Op.getOperand(0)); 3759 3760 // STD the extended value into the stack slot. 3761 MachineMemOperand *MMO = 3762 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), 3763 MachineMemOperand::MOStore, 8, 8); 3764 SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx }; 3765 SDValue Store = 3766 DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other), 3767 Ops, 4, MVT::i64, MMO); 3768 // Load the value as a double. 3769 SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(), 3770 false, false, false, 0); 3771 3772 // FCFID it and return it. 3773 SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld); 3774 if (Op.getValueType() == MVT::f32) 3775 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); 3776 return FP; 3777 } 3778 3779 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 3780 SelectionDAG &DAG) const { 3781 DebugLoc dl = Op.getDebugLoc(); 3782 /* 3783 The rounding mode is in bits 30:31 of FPSR, and has the following 3784 settings: 3785 00 Round to nearest 3786 01 Round to 0 3787 10 Round to +inf 3788 11 Round to -inf 3789 3790 FLT_ROUNDS, on the other hand, expects the following: 3791 -1 Undefined 3792 0 Round to 0 3793 1 Round to nearest 3794 2 Round to +inf 3795 3 Round to -inf 3796 3797 To perform the conversion, we do: 3798 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) 3799 */ 3800 3801 MachineFunction &MF = DAG.getMachineFunction(); 3802 EVT VT = Op.getValueType(); 3803 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3804 std::vector<EVT> NodeTys; 3805 SDValue MFFSreg, InFlag; 3806 3807 // Save FP Control Word to register 3808 NodeTys.push_back(MVT::f64); // return register 3809 NodeTys.push_back(MVT::Glue); // unused in this context 3810 SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); 3811 3812 // Save FP register to stack slot 3813 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); 3814 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); 3815 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, 3816 StackSlot, MachinePointerInfo(), false, false,0); 3817 3818 // Load FP Control Word from low 32 bits of stack slot. 3819 SDValue Four = DAG.getConstant(4, PtrVT); 3820 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); 3821 SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(), 3822 false, false, false, 0); 3823 3824 // Transform as necessary 3825 SDValue CWD1 = 3826 DAG.getNode(ISD::AND, dl, MVT::i32, 3827 CWD, DAG.getConstant(3, MVT::i32)); 3828 SDValue CWD2 = 3829 DAG.getNode(ISD::SRL, dl, MVT::i32, 3830 DAG.getNode(ISD::AND, dl, MVT::i32, 3831 DAG.getNode(ISD::XOR, dl, MVT::i32, 3832 CWD, DAG.getConstant(3, MVT::i32)), 3833 DAG.getConstant(3, MVT::i32)), 3834 DAG.getConstant(1, MVT::i32)); 3835 3836 SDValue RetVal = 3837 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); 3838 3839 return DAG.getNode((VT.getSizeInBits() < 16 ? 3840 ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); 3841 } 3842 3843 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { 3844 EVT VT = Op.getValueType(); 3845 unsigned BitWidth = VT.getSizeInBits(); 3846 DebugLoc dl = Op.getDebugLoc(); 3847 assert(Op.getNumOperands() == 3 && 3848 VT == Op.getOperand(1).getValueType() && 3849 "Unexpected SHL!"); 3850 3851 // Expand into a bunch of logical ops. Note that these ops 3852 // depend on the PPC behavior for oversized shift amounts. 3853 SDValue Lo = Op.getOperand(0); 3854 SDValue Hi = Op.getOperand(1); 3855 SDValue Amt = Op.getOperand(2); 3856 EVT AmtVT = Amt.getValueType(); 3857 3858 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 3859 DAG.getConstant(BitWidth, AmtVT), Amt); 3860 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); 3861 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); 3862 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); 3863 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 3864 DAG.getConstant(-BitWidth, AmtVT)); 3865 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); 3866 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 3867 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); 3868 SDValue OutOps[] = { OutLo, OutHi }; 3869 return DAG.getMergeValues(OutOps, 2, dl); 3870 } 3871 3872 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { 3873 EVT VT = Op.getValueType(); 3874 DebugLoc dl = Op.getDebugLoc(); 3875 unsigned BitWidth = VT.getSizeInBits(); 3876 assert(Op.getNumOperands() == 3 && 3877 VT == Op.getOperand(1).getValueType() && 3878 "Unexpected SRL!"); 3879 3880 // Expand into a bunch of logical ops. Note that these ops 3881 // depend on the PPC behavior for oversized shift amounts. 3882 SDValue Lo = Op.getOperand(0); 3883 SDValue Hi = Op.getOperand(1); 3884 SDValue Amt = Op.getOperand(2); 3885 EVT AmtVT = Amt.getValueType(); 3886 3887 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 3888 DAG.getConstant(BitWidth, AmtVT), Amt); 3889 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 3890 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 3891 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 3892 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 3893 DAG.getConstant(-BitWidth, AmtVT)); 3894 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); 3895 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 3896 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); 3897 SDValue OutOps[] = { OutLo, OutHi }; 3898 return DAG.getMergeValues(OutOps, 2, dl); 3899 } 3900 3901 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { 3902 DebugLoc dl = Op.getDebugLoc(); 3903 EVT VT = Op.getValueType(); 3904 unsigned BitWidth = VT.getSizeInBits(); 3905 assert(Op.getNumOperands() == 3 && 3906 VT == Op.getOperand(1).getValueType() && 3907 "Unexpected SRA!"); 3908 3909 // Expand into a bunch of logical ops, followed by a select_cc. 3910 SDValue Lo = Op.getOperand(0); 3911 SDValue Hi = Op.getOperand(1); 3912 SDValue Amt = Op.getOperand(2); 3913 EVT AmtVT = Amt.getValueType(); 3914 3915 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 3916 DAG.getConstant(BitWidth, AmtVT), Amt); 3917 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 3918 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 3919 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 3920 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 3921 DAG.getConstant(-BitWidth, AmtVT)); 3922 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); 3923 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); 3924 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT), 3925 Tmp4, Tmp6, ISD::SETLE); 3926 SDValue OutOps[] = { OutLo, OutHi }; 3927 return DAG.getMergeValues(OutOps, 2, dl); 3928 } 3929 3930 //===----------------------------------------------------------------------===// 3931 // Vector related lowering. 3932 // 3933 3934 /// BuildSplatI - Build a canonical splati of Val with an element size of 3935 /// SplatSize. Cast the result to VT. 3936 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, 3937 SelectionDAG &DAG, DebugLoc dl) { 3938 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 3939 3940 static const EVT VTys[] = { // canonical VT to use for each size. 3941 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 3942 }; 3943 3944 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; 3945 3946 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. 3947 if (Val == -1) 3948 SplatSize = 1; 3949 3950 EVT CanonicalVT = VTys[SplatSize-1]; 3951 3952 // Build a canonical splat for this value. 3953 SDValue Elt = DAG.getConstant(Val, MVT::i32); 3954 SmallVector<SDValue, 8> Ops; 3955 Ops.assign(CanonicalVT.getVectorNumElements(), Elt); 3956 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, 3957 &Ops[0], Ops.size()); 3958 return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res); 3959 } 3960 3961 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the 3962 /// specified intrinsic ID. 3963 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, 3964 SelectionDAG &DAG, DebugLoc dl, 3965 EVT DestVT = MVT::Other) { 3966 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 3967 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 3968 DAG.getConstant(IID, MVT::i32), LHS, RHS); 3969 } 3970 3971 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 3972 /// specified intrinsic ID. 3973 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, 3974 SDValue Op2, SelectionDAG &DAG, 3975 DebugLoc dl, EVT DestVT = MVT::Other) { 3976 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 3977 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 3978 DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); 3979 } 3980 3981 3982 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 3983 /// amount. The result has the specified value type. 3984 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, 3985 EVT VT, SelectionDAG &DAG, DebugLoc dl) { 3986 // Force LHS/RHS to be the right type. 3987 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); 3988 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); 3989 3990 int Ops[16]; 3991 for (unsigned i = 0; i != 16; ++i) 3992 Ops[i] = i + Amt; 3993 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); 3994 return DAG.getNode(ISD::BITCAST, dl, VT, T); 3995 } 3996 3997 // If this is a case we can't handle, return null and let the default 3998 // expansion code take care of it. If we CAN select this case, and if it 3999 // selects to a single instruction, return Op. Otherwise, if we can codegen 4000 // this case more efficiently than a constant pool load, lower it to the 4001 // sequence of ops that should be used. 4002 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, 4003 SelectionDAG &DAG) const { 4004 DebugLoc dl = Op.getDebugLoc(); 4005 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 4006 assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); 4007 4008 // Check if this is a splat of a constant value. 4009 APInt APSplatBits, APSplatUndef; 4010 unsigned SplatBitSize; 4011 bool HasAnyUndefs; 4012 if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 4013 HasAnyUndefs, 0, true) || SplatBitSize > 32) 4014 return SDValue(); 4015 4016 unsigned SplatBits = APSplatBits.getZExtValue(); 4017 unsigned SplatUndef = APSplatUndef.getZExtValue(); 4018 unsigned SplatSize = SplatBitSize / 8; 4019 4020 // First, handle single instruction cases. 4021 4022 // All zeros? 4023 if (SplatBits == 0) { 4024 // Canonicalize all zero vectors to be v4i32. 4025 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 4026 SDValue Z = DAG.getConstant(0, MVT::i32); 4027 Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z); 4028 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); 4029 } 4030 return Op; 4031 } 4032 4033 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 4034 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> 4035 (32-SplatBitSize)); 4036 if (SextVal >= -16 && SextVal <= 15) 4037 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); 4038 4039 4040 // Two instruction sequences. 4041 4042 // If this value is in the range [-32,30] and is even, use: 4043 // tmp = VSPLTI[bhw], result = add tmp, tmp 4044 if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) { 4045 SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl); 4046 Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res); 4047 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 4048 } 4049 4050 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 4051 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 4052 // for fneg/fabs. 4053 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 4054 // Make -1 and vspltisw -1: 4055 SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); 4056 4057 // Make the VSLW intrinsic, computing 0x8000_0000. 4058 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 4059 OnesV, DAG, dl); 4060 4061 // xor by OnesV to invert it. 4062 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); 4063 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 4064 } 4065 4066 // Check to see if this is a wide variety of vsplti*, binop self cases. 4067 static const signed char SplatCsts[] = { 4068 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 4069 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 4070 }; 4071 4072 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { 4073 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 4074 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 4075 int i = SplatCsts[idx]; 4076 4077 // Figure out what shift amount will be used by altivec if shifted by i in 4078 // this splat size. 4079 unsigned TypeShiftAmt = i & (SplatBitSize-1); 4080 4081 // vsplti + shl self. 4082 if (SextVal == (i << (int)TypeShiftAmt)) { 4083 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 4084 static const unsigned IIDs[] = { // Intrinsic to use for each size. 4085 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 4086 Intrinsic::ppc_altivec_vslw 4087 }; 4088 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 4089 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 4090 } 4091 4092 // vsplti + srl self. 4093 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 4094 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 4095 static const unsigned IIDs[] = { // Intrinsic to use for each size. 4096 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 4097 Intrinsic::ppc_altivec_vsrw 4098 }; 4099 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 4100 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 4101 } 4102 4103 // vsplti + sra self. 4104 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 4105 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 4106 static const unsigned IIDs[] = { // Intrinsic to use for each size. 4107 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 4108 Intrinsic::ppc_altivec_vsraw 4109 }; 4110 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 4111 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 4112 } 4113 4114 // vsplti + rol self. 4115 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 4116 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 4117 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 4118 static const unsigned IIDs[] = { // Intrinsic to use for each size. 4119 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 4120 Intrinsic::ppc_altivec_vrlw 4121 }; 4122 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 4123 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 4124 } 4125 4126 // t = vsplti c, result = vsldoi t, t, 1 4127 if (SextVal == ((i << 8) | (i < 0 ? 0xFF : 0))) { 4128 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 4129 return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl); 4130 } 4131 // t = vsplti c, result = vsldoi t, t, 2 4132 if (SextVal == ((i << 16) | (i < 0 ? 0xFFFF : 0))) { 4133 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 4134 return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl); 4135 } 4136 // t = vsplti c, result = vsldoi t, t, 3 4137 if (SextVal == ((i << 24) | (i < 0 ? 0xFFFFFF : 0))) { 4138 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 4139 return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl); 4140 } 4141 } 4142 4143 // Three instruction sequences. 4144 4145 // Odd, in range [17,31]: (vsplti C)-(vsplti -16). 4146 if (SextVal >= 0 && SextVal <= 31) { 4147 SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl); 4148 SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl); 4149 LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS); 4150 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS); 4151 } 4152 // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16). 4153 if (SextVal >= -31 && SextVal <= 0) { 4154 SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl); 4155 SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl); 4156 LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS); 4157 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS); 4158 } 4159 4160 return SDValue(); 4161 } 4162 4163 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 4164 /// the specified operations to build the shuffle. 4165 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 4166 SDValue RHS, SelectionDAG &DAG, 4167 DebugLoc dl) { 4168 unsigned OpNum = (PFEntry >> 26) & 0x0F; 4169 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 4170 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 4171 4172 enum { 4173 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 4174 OP_VMRGHW, 4175 OP_VMRGLW, 4176 OP_VSPLTISW0, 4177 OP_VSPLTISW1, 4178 OP_VSPLTISW2, 4179 OP_VSPLTISW3, 4180 OP_VSLDOI4, 4181 OP_VSLDOI8, 4182 OP_VSLDOI12 4183 }; 4184 4185 if (OpNum == OP_COPY) { 4186 if (LHSID == (1*9+2)*9+3) return LHS; 4187 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 4188 return RHS; 4189 } 4190 4191 SDValue OpLHS, OpRHS; 4192 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 4193 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 4194 4195 int ShufIdxs[16]; 4196 switch (OpNum) { 4197 default: llvm_unreachable("Unknown i32 permute!"); 4198 case OP_VMRGHW: 4199 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 4200 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 4201 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 4202 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 4203 break; 4204 case OP_VMRGLW: 4205 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 4206 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 4207 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 4208 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 4209 break; 4210 case OP_VSPLTISW0: 4211 for (unsigned i = 0; i != 16; ++i) 4212 ShufIdxs[i] = (i&3)+0; 4213 break; 4214 case OP_VSPLTISW1: 4215 for (unsigned i = 0; i != 16; ++i) 4216 ShufIdxs[i] = (i&3)+4; 4217 break; 4218 case OP_VSPLTISW2: 4219 for (unsigned i = 0; i != 16; ++i) 4220 ShufIdxs[i] = (i&3)+8; 4221 break; 4222 case OP_VSPLTISW3: 4223 for (unsigned i = 0; i != 16; ++i) 4224 ShufIdxs[i] = (i&3)+12; 4225 break; 4226 case OP_VSLDOI4: 4227 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); 4228 case OP_VSLDOI8: 4229 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); 4230 case OP_VSLDOI12: 4231 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); 4232 } 4233 EVT VT = OpLHS.getValueType(); 4234 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS); 4235 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS); 4236 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); 4237 return DAG.getNode(ISD::BITCAST, dl, VT, T); 4238 } 4239 4240 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 4241 /// is a shuffle we can handle in a single instruction, return it. Otherwise, 4242 /// return the code it can be lowered into. Worst case, it can always be 4243 /// lowered into a vperm. 4244 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, 4245 SelectionDAG &DAG) const { 4246 DebugLoc dl = Op.getDebugLoc(); 4247 SDValue V1 = Op.getOperand(0); 4248 SDValue V2 = Op.getOperand(1); 4249 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); 4250 EVT VT = Op.getValueType(); 4251 4252 // Cases that are handled by instructions that take permute immediates 4253 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 4254 // selected by the instruction selector. 4255 if (V2.getOpcode() == ISD::UNDEF) { 4256 if (PPC::isSplatShuffleMask(SVOp, 1) || 4257 PPC::isSplatShuffleMask(SVOp, 2) || 4258 PPC::isSplatShuffleMask(SVOp, 4) || 4259 PPC::isVPKUWUMShuffleMask(SVOp, true) || 4260 PPC::isVPKUHUMShuffleMask(SVOp, true) || 4261 PPC::isVSLDOIShuffleMask(SVOp, true) != -1 || 4262 PPC::isVMRGLShuffleMask(SVOp, 1, true) || 4263 PPC::isVMRGLShuffleMask(SVOp, 2, true) || 4264 PPC::isVMRGLShuffleMask(SVOp, 4, true) || 4265 PPC::isVMRGHShuffleMask(SVOp, 1, true) || 4266 PPC::isVMRGHShuffleMask(SVOp, 2, true) || 4267 PPC::isVMRGHShuffleMask(SVOp, 4, true)) { 4268 return Op; 4269 } 4270 } 4271 4272 // Altivec has a variety of "shuffle immediates" that take two vector inputs 4273 // and produce a fixed permutation. If any of these match, do not lower to 4274 // VPERM. 4275 if (PPC::isVPKUWUMShuffleMask(SVOp, false) || 4276 PPC::isVPKUHUMShuffleMask(SVOp, false) || 4277 PPC::isVSLDOIShuffleMask(SVOp, false) != -1 || 4278 PPC::isVMRGLShuffleMask(SVOp, 1, false) || 4279 PPC::isVMRGLShuffleMask(SVOp, 2, false) || 4280 PPC::isVMRGLShuffleMask(SVOp, 4, false) || 4281 PPC::isVMRGHShuffleMask(SVOp, 1, false) || 4282 PPC::isVMRGHShuffleMask(SVOp, 2, false) || 4283 PPC::isVMRGHShuffleMask(SVOp, 4, false)) 4284 return Op; 4285 4286 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 4287 // perfect shuffle table to emit an optimal matching sequence. 4288 ArrayRef<int> PermMask = SVOp->getMask(); 4289 4290 unsigned PFIndexes[4]; 4291 bool isFourElementShuffle = true; 4292 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 4293 unsigned EltNo = 8; // Start out undef. 4294 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 4295 if (PermMask[i*4+j] < 0) 4296 continue; // Undef, ignore it. 4297 4298 unsigned ByteSource = PermMask[i*4+j]; 4299 if ((ByteSource & 3) != j) { 4300 isFourElementShuffle = false; 4301 break; 4302 } 4303 4304 if (EltNo == 8) { 4305 EltNo = ByteSource/4; 4306 } else if (EltNo != ByteSource/4) { 4307 isFourElementShuffle = false; 4308 break; 4309 } 4310 } 4311 PFIndexes[i] = EltNo; 4312 } 4313 4314 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 4315 // perfect shuffle vector to determine if it is cost effective to do this as 4316 // discrete instructions, or whether we should use a vperm. 4317 if (isFourElementShuffle) { 4318 // Compute the index in the perfect shuffle table. 4319 unsigned PFTableIndex = 4320 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 4321 4322 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 4323 unsigned Cost = (PFEntry >> 30); 4324 4325 // Determining when to avoid vperm is tricky. Many things affect the cost 4326 // of vperm, particularly how many times the perm mask needs to be computed. 4327 // For example, if the perm mask can be hoisted out of a loop or is already 4328 // used (perhaps because there are multiple permutes with the same shuffle 4329 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 4330 // the loop requires an extra register. 4331 // 4332 // As a compromise, we only emit discrete instructions if the shuffle can be 4333 // generated in 3 or fewer operations. When we have loop information 4334 // available, if this block is within a loop, we should avoid using vperm 4335 // for 3-operation perms and use a constant pool load instead. 4336 if (Cost < 3) 4337 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 4338 } 4339 4340 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 4341 // vector that will get spilled to the constant pool. 4342 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 4343 4344 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 4345 // that it is in input element units, not in bytes. Convert now. 4346 EVT EltVT = V1.getValueType().getVectorElementType(); 4347 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 4348 4349 SmallVector<SDValue, 16> ResultMask; 4350 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { 4351 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; 4352 4353 for (unsigned j = 0; j != BytesPerElement; ++j) 4354 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, 4355 MVT::i32)); 4356 } 4357 4358 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, 4359 &ResultMask[0], ResultMask.size()); 4360 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask); 4361 } 4362 4363 /// getAltivecCompareInfo - Given an intrinsic, return false if it is not an 4364 /// altivec comparison. If it is, return true and fill in Opc/isDot with 4365 /// information about the intrinsic. 4366 static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, 4367 bool &isDot) { 4368 unsigned IntrinsicID = 4369 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); 4370 CompareOpc = -1; 4371 isDot = false; 4372 switch (IntrinsicID) { 4373 default: return false; 4374 // Comparison predicates. 4375 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 4376 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 4377 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 4378 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 4379 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 4380 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 4381 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 4382 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 4383 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 4384 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 4385 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 4386 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 4387 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 4388 4389 // Normal Comparisons. 4390 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 4391 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 4392 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 4393 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 4394 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 4395 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 4396 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 4397 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 4398 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 4399 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 4400 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 4401 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 4402 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 4403 } 4404 return true; 4405 } 4406 4407 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 4408 /// lower, do it, otherwise return null. 4409 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 4410 SelectionDAG &DAG) const { 4411 // If this is a lowered altivec predicate compare, CompareOpc is set to the 4412 // opcode number of the comparison. 4413 DebugLoc dl = Op.getDebugLoc(); 4414 int CompareOpc; 4415 bool isDot; 4416 if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) 4417 return SDValue(); // Don't custom lower most intrinsics. 4418 4419 // If this is a non-dot comparison, make the VCMP node and we are done. 4420 if (!isDot) { 4421 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), 4422 Op.getOperand(1), Op.getOperand(2), 4423 DAG.getConstant(CompareOpc, MVT::i32)); 4424 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); 4425 } 4426 4427 // Create the PPCISD altivec 'dot' comparison node. 4428 SDValue Ops[] = { 4429 Op.getOperand(2), // LHS 4430 Op.getOperand(3), // RHS 4431 DAG.getConstant(CompareOpc, MVT::i32) 4432 }; 4433 std::vector<EVT> VTs; 4434 VTs.push_back(Op.getOperand(2).getValueType()); 4435 VTs.push_back(MVT::Glue); 4436 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); 4437 4438 // Now that we have the comparison, emit a copy from the CR to a GPR. 4439 // This is flagged to the above dot comparison. 4440 SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32, 4441 DAG.getRegister(PPC::CR6, MVT::i32), 4442 CompNode.getValue(1)); 4443 4444 // Unpack the result based on how the target uses it. 4445 unsigned BitNo; // Bit # of CR6. 4446 bool InvertBit; // Invert result? 4447 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) { 4448 default: // Can't happen, don't crash on invalid number though. 4449 case 0: // Return the value of the EQ bit of CR6. 4450 BitNo = 0; InvertBit = false; 4451 break; 4452 case 1: // Return the inverted value of the EQ bit of CR6. 4453 BitNo = 0; InvertBit = true; 4454 break; 4455 case 2: // Return the value of the LT bit of CR6. 4456 BitNo = 2; InvertBit = false; 4457 break; 4458 case 3: // Return the inverted value of the LT bit of CR6. 4459 BitNo = 2; InvertBit = true; 4460 break; 4461 } 4462 4463 // Shift the bit into the low position. 4464 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, 4465 DAG.getConstant(8-(3-BitNo), MVT::i32)); 4466 // Isolate the bit. 4467 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, 4468 DAG.getConstant(1, MVT::i32)); 4469 4470 // If we are supposed to, toggle the bit. 4471 if (InvertBit) 4472 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, 4473 DAG.getConstant(1, MVT::i32)); 4474 return Flags; 4475 } 4476 4477 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, 4478 SelectionDAG &DAG) const { 4479 DebugLoc dl = Op.getDebugLoc(); 4480 // Create a stack slot that is 16-byte aligned. 4481 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 4482 int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); 4483 EVT PtrVT = getPointerTy(); 4484 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 4485 4486 // Store the input value into Value#0 of the stack slot. 4487 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, 4488 Op.getOperand(0), FIdx, MachinePointerInfo(), 4489 false, false, 0); 4490 // Load it out. 4491 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(), 4492 false, false, false, 0); 4493 } 4494 4495 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { 4496 DebugLoc dl = Op.getDebugLoc(); 4497 if (Op.getValueType() == MVT::v4i32) { 4498 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 4499 4500 SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); 4501 SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. 4502 4503 SDValue RHSSwap = // = vrlw RHS, 16 4504 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); 4505 4506 // Shrinkify inputs to v8i16. 4507 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS); 4508 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS); 4509 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap); 4510 4511 // Low parts multiplied together, generating 32-bit results (we ignore the 4512 // top parts). 4513 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 4514 LHS, RHS, DAG, dl, MVT::v4i32); 4515 4516 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 4517 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); 4518 // Shift the high parts up 16 bits. 4519 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, 4520 Neg16, DAG, dl); 4521 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); 4522 } else if (Op.getValueType() == MVT::v8i16) { 4523 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 4524 4525 SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl); 4526 4527 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 4528 LHS, RHS, Zero, DAG, dl); 4529 } else if (Op.getValueType() == MVT::v16i8) { 4530 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 4531 4532 // Multiply the even 8-bit parts, producing 16-bit sums. 4533 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 4534 LHS, RHS, DAG, dl, MVT::v8i16); 4535 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts); 4536 4537 // Multiply the odd 8-bit parts, producing 16-bit sums. 4538 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 4539 LHS, RHS, DAG, dl, MVT::v8i16); 4540 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); 4541 4542 // Merge the results together. 4543 int Ops[16]; 4544 for (unsigned i = 0; i != 8; ++i) { 4545 Ops[i*2 ] = 2*i+1; 4546 Ops[i*2+1] = 2*i+1+16; 4547 } 4548 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); 4549 } else { 4550 llvm_unreachable("Unknown mul to lower!"); 4551 } 4552 } 4553 4554 /// LowerOperation - Provide custom lowering hooks for some operations. 4555 /// 4556 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 4557 switch (Op.getOpcode()) { 4558 default: llvm_unreachable("Wasn't expecting to be able to lower this!"); 4559 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 4560 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 4561 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 4562 case ISD::GlobalTLSAddress: llvm_unreachable("TLS not implemented for PPC"); 4563 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 4564 case ISD::SETCC: return LowerSETCC(Op, DAG); 4565 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); 4566 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); 4567 case ISD::VASTART: 4568 return LowerVASTART(Op, DAG, PPCSubTarget); 4569 4570 case ISD::VAARG: 4571 return LowerVAARG(Op, DAG, PPCSubTarget); 4572 4573 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget); 4574 case ISD::DYNAMIC_STACKALLOC: 4575 return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); 4576 4577 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 4578 case ISD::FP_TO_UINT: 4579 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, 4580 Op.getDebugLoc()); 4581 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 4582 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 4583 4584 // Lower 64-bit shifts. 4585 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); 4586 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); 4587 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); 4588 4589 // Vector-related lowering. 4590 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 4591 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 4592 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 4593 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 4594 case ISD::MUL: return LowerMUL(Op, DAG); 4595 4596 // Frame & Return address. 4597 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 4598 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 4599 } 4600 } 4601 4602 void PPCTargetLowering::ReplaceNodeResults(SDNode *N, 4603 SmallVectorImpl<SDValue>&Results, 4604 SelectionDAG &DAG) const { 4605 const TargetMachine &TM = getTargetMachine(); 4606 DebugLoc dl = N->getDebugLoc(); 4607 switch (N->getOpcode()) { 4608 default: 4609 llvm_unreachable("Do not know how to custom type legalize this operation!"); 4610 case ISD::VAARG: { 4611 if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI() 4612 || TM.getSubtarget<PPCSubtarget>().isPPC64()) 4613 return; 4614 4615 EVT VT = N->getValueType(0); 4616 4617 if (VT == MVT::i64) { 4618 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget); 4619 4620 Results.push_back(NewNode); 4621 Results.push_back(NewNode.getValue(1)); 4622 } 4623 return; 4624 } 4625 case ISD::FP_ROUND_INREG: { 4626 assert(N->getValueType(0) == MVT::ppcf128); 4627 assert(N->getOperand(0).getValueType() == MVT::ppcf128); 4628 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 4629 MVT::f64, N->getOperand(0), 4630 DAG.getIntPtrConstant(0)); 4631 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 4632 MVT::f64, N->getOperand(0), 4633 DAG.getIntPtrConstant(1)); 4634 4635 // This sequence changes FPSCR to do round-to-zero, adds the two halves 4636 // of the long double, and puts FPSCR back the way it was. We do not 4637 // actually model FPSCR. 4638 std::vector<EVT> NodeTys; 4639 SDValue Ops[4], Result, MFFSreg, InFlag, FPreg; 4640 4641 NodeTys.push_back(MVT::f64); // Return register 4642 NodeTys.push_back(MVT::Glue); // Returns a flag for later insns 4643 Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); 4644 MFFSreg = Result.getValue(0); 4645 InFlag = Result.getValue(1); 4646 4647 NodeTys.clear(); 4648 NodeTys.push_back(MVT::Glue); // Returns a flag 4649 Ops[0] = DAG.getConstant(31, MVT::i32); 4650 Ops[1] = InFlag; 4651 Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2); 4652 InFlag = Result.getValue(0); 4653 4654 NodeTys.clear(); 4655 NodeTys.push_back(MVT::Glue); // Returns a flag 4656 Ops[0] = DAG.getConstant(30, MVT::i32); 4657 Ops[1] = InFlag; 4658 Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2); 4659 InFlag = Result.getValue(0); 4660 4661 NodeTys.clear(); 4662 NodeTys.push_back(MVT::f64); // result of add 4663 NodeTys.push_back(MVT::Glue); // Returns a flag 4664 Ops[0] = Lo; 4665 Ops[1] = Hi; 4666 Ops[2] = InFlag; 4667 Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3); 4668 FPreg = Result.getValue(0); 4669 InFlag = Result.getValue(1); 4670 4671 NodeTys.clear(); 4672 NodeTys.push_back(MVT::f64); 4673 Ops[0] = DAG.getConstant(1, MVT::i32); 4674 Ops[1] = MFFSreg; 4675 Ops[2] = FPreg; 4676 Ops[3] = InFlag; 4677 Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4); 4678 FPreg = Result.getValue(0); 4679 4680 // We know the low half is about to be thrown away, so just use something 4681 // convenient. 4682 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, 4683 FPreg, FPreg)); 4684 return; 4685 } 4686 case ISD::FP_TO_SINT: 4687 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); 4688 return; 4689 } 4690 } 4691 4692 4693 //===----------------------------------------------------------------------===// 4694 // Other Lowering Code 4695 //===----------------------------------------------------------------------===// 4696 4697 MachineBasicBlock * 4698 PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 4699 bool is64bit, unsigned BinOpcode) const { 4700 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 4701 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 4702 4703 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4704 MachineFunction *F = BB->getParent(); 4705 MachineFunction::iterator It = BB; 4706 ++It; 4707 4708 unsigned dest = MI->getOperand(0).getReg(); 4709 unsigned ptrA = MI->getOperand(1).getReg(); 4710 unsigned ptrB = MI->getOperand(2).getReg(); 4711 unsigned incr = MI->getOperand(3).getReg(); 4712 DebugLoc dl = MI->getDebugLoc(); 4713 4714 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 4715 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 4716 F->insert(It, loopMBB); 4717 F->insert(It, exitMBB); 4718 exitMBB->splice(exitMBB->begin(), BB, 4719 llvm::next(MachineBasicBlock::iterator(MI)), 4720 BB->end()); 4721 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 4722 4723 MachineRegisterInfo &RegInfo = F->getRegInfo(); 4724 unsigned TmpReg = (!BinOpcode) ? incr : 4725 RegInfo.createVirtualRegister( 4726 is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : 4727 (const TargetRegisterClass *) &PPC::GPRCRegClass); 4728 4729 // thisMBB: 4730 // ... 4731 // fallthrough --> loopMBB 4732 BB->addSuccessor(loopMBB); 4733 4734 // loopMBB: 4735 // l[wd]arx dest, ptr 4736 // add r0, dest, incr 4737 // st[wd]cx. r0, ptr 4738 // bne- loopMBB 4739 // fallthrough --> exitMBB 4740 BB = loopMBB; 4741 BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) 4742 .addReg(ptrA).addReg(ptrB); 4743 if (BinOpcode) 4744 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); 4745 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 4746 .addReg(TmpReg).addReg(ptrA).addReg(ptrB); 4747 BuildMI(BB, dl, TII->get(PPC::BCC)) 4748 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 4749 BB->addSuccessor(loopMBB); 4750 BB->addSuccessor(exitMBB); 4751 4752 // exitMBB: 4753 // ... 4754 BB = exitMBB; 4755 return BB; 4756 } 4757 4758 MachineBasicBlock * 4759 PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, 4760 MachineBasicBlock *BB, 4761 bool is8bit, // operation 4762 unsigned BinOpcode) const { 4763 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 4764 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 4765 // In 64 bit mode we have to use 64 bits for addresses, even though the 4766 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address 4767 // registers without caring whether they're 32 or 64, but here we're 4768 // doing actual arithmetic on the addresses. 4769 bool is64bit = PPCSubTarget.isPPC64(); 4770 unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0; 4771 4772 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4773 MachineFunction *F = BB->getParent(); 4774 MachineFunction::iterator It = BB; 4775 ++It; 4776 4777 unsigned dest = MI->getOperand(0).getReg(); 4778 unsigned ptrA = MI->getOperand(1).getReg(); 4779 unsigned ptrB = MI->getOperand(2).getReg(); 4780 unsigned incr = MI->getOperand(3).getReg(); 4781 DebugLoc dl = MI->getDebugLoc(); 4782 4783 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 4784 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 4785 F->insert(It, loopMBB); 4786 F->insert(It, exitMBB); 4787 exitMBB->splice(exitMBB->begin(), BB, 4788 llvm::next(MachineBasicBlock::iterator(MI)), 4789 BB->end()); 4790 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 4791 4792 MachineRegisterInfo &RegInfo = F->getRegInfo(); 4793 const TargetRegisterClass *RC = 4794 is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : 4795 (const TargetRegisterClass *) &PPC::GPRCRegClass; 4796 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 4797 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 4798 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 4799 unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); 4800 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 4801 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 4802 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 4803 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 4804 unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC); 4805 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 4806 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 4807 unsigned Ptr1Reg; 4808 unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC); 4809 4810 // thisMBB: 4811 // ... 4812 // fallthrough --> loopMBB 4813 BB->addSuccessor(loopMBB); 4814 4815 // The 4-byte load must be aligned, while a char or short may be 4816 // anywhere in the word. Hence all this nasty bookkeeping code. 4817 // add ptr1, ptrA, ptrB [copy if ptrA==0] 4818 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 4819 // xori shift, shift1, 24 [16] 4820 // rlwinm ptr, ptr1, 0, 0, 29 4821 // slw incr2, incr, shift 4822 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 4823 // slw mask, mask2, shift 4824 // loopMBB: 4825 // lwarx tmpDest, ptr 4826 // add tmp, tmpDest, incr2 4827 // andc tmp2, tmpDest, mask 4828 // and tmp3, tmp, mask 4829 // or tmp4, tmp3, tmp2 4830 // stwcx. tmp4, ptr 4831 // bne- loopMBB 4832 // fallthrough --> exitMBB 4833 // srw dest, tmpDest, shift 4834 if (ptrA != ZeroReg) { 4835 Ptr1Reg = RegInfo.createVirtualRegister(RC); 4836 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 4837 .addReg(ptrA).addReg(ptrB); 4838 } else { 4839 Ptr1Reg = ptrB; 4840 } 4841 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 4842 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 4843 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 4844 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 4845 if (is64bit) 4846 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 4847 .addReg(Ptr1Reg).addImm(0).addImm(61); 4848 else 4849 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 4850 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 4851 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg) 4852 .addReg(incr).addReg(ShiftReg); 4853 if (is8bit) 4854 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 4855 else { 4856 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 4857 BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535); 4858 } 4859 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 4860 .addReg(Mask2Reg).addReg(ShiftReg); 4861 4862 BB = loopMBB; 4863 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 4864 .addReg(ZeroReg).addReg(PtrReg); 4865 if (BinOpcode) 4866 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) 4867 .addReg(Incr2Reg).addReg(TmpDestReg); 4868 BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg) 4869 .addReg(TmpDestReg).addReg(MaskReg); 4870 BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) 4871 .addReg(TmpReg).addReg(MaskReg); 4872 BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) 4873 .addReg(Tmp3Reg).addReg(Tmp2Reg); 4874 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 4875 .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); 4876 BuildMI(BB, dl, TII->get(PPC::BCC)) 4877 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 4878 BB->addSuccessor(loopMBB); 4879 BB->addSuccessor(exitMBB); 4880 4881 // exitMBB: 4882 // ... 4883 BB = exitMBB; 4884 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg) 4885 .addReg(ShiftReg); 4886 return BB; 4887 } 4888 4889 MachineBasicBlock * 4890 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 4891 MachineBasicBlock *BB) const { 4892 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 4893 4894 // To "insert" these instructions we actually have to insert their 4895 // control-flow patterns. 4896 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4897 MachineFunction::iterator It = BB; 4898 ++It; 4899 4900 MachineFunction *F = BB->getParent(); 4901 4902 if (MI->getOpcode() == PPC::SELECT_CC_I4 || 4903 MI->getOpcode() == PPC::SELECT_CC_I8 || 4904 MI->getOpcode() == PPC::SELECT_CC_F4 || 4905 MI->getOpcode() == PPC::SELECT_CC_F8 || 4906 MI->getOpcode() == PPC::SELECT_CC_VRRC) { 4907 4908 // The incoming instruction knows the destination vreg to set, the 4909 // condition code register to branch on, the true/false values to 4910 // select between, and a branch opcode to use. 4911 4912 // thisMBB: 4913 // ... 4914 // TrueVal = ... 4915 // cmpTY ccX, r1, r2 4916 // bCC copy1MBB 4917 // fallthrough --> copy0MBB 4918 MachineBasicBlock *thisMBB = BB; 4919 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 4920 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 4921 unsigned SelectPred = MI->getOperand(4).getImm(); 4922 DebugLoc dl = MI->getDebugLoc(); 4923 F->insert(It, copy0MBB); 4924 F->insert(It, sinkMBB); 4925 4926 // Transfer the remainder of BB and its successor edges to sinkMBB. 4927 sinkMBB->splice(sinkMBB->begin(), BB, 4928 llvm::next(MachineBasicBlock::iterator(MI)), 4929 BB->end()); 4930 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 4931 4932 // Next, add the true and fallthrough blocks as its successors. 4933 BB->addSuccessor(copy0MBB); 4934 BB->addSuccessor(sinkMBB); 4935 4936 BuildMI(BB, dl, TII->get(PPC::BCC)) 4937 .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 4938 4939 // copy0MBB: 4940 // %FalseValue = ... 4941 // # fallthrough to sinkMBB 4942 BB = copy0MBB; 4943 4944 // Update machine-CFG edges 4945 BB->addSuccessor(sinkMBB); 4946 4947 // sinkMBB: 4948 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 4949 // ... 4950 BB = sinkMBB; 4951 BuildMI(*BB, BB->begin(), dl, 4952 TII->get(PPC::PHI), MI->getOperand(0).getReg()) 4953 .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) 4954 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 4955 } 4956 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) 4957 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); 4958 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) 4959 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); 4960 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) 4961 BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4); 4962 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) 4963 BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8); 4964 4965 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8) 4966 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); 4967 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16) 4968 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); 4969 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32) 4970 BB = EmitAtomicBinary(MI, BB, false, PPC::AND); 4971 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64) 4972 BB = EmitAtomicBinary(MI, BB, true, PPC::AND8); 4973 4974 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8) 4975 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); 4976 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16) 4977 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); 4978 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32) 4979 BB = EmitAtomicBinary(MI, BB, false, PPC::OR); 4980 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64) 4981 BB = EmitAtomicBinary(MI, BB, true, PPC::OR8); 4982 4983 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) 4984 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); 4985 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) 4986 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); 4987 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) 4988 BB = EmitAtomicBinary(MI, BB, false, PPC::XOR); 4989 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) 4990 BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8); 4991 4992 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) 4993 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC); 4994 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) 4995 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC); 4996 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) 4997 BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC); 4998 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) 4999 BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8); 5000 5001 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) 5002 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); 5003 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) 5004 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); 5005 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) 5006 BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF); 5007 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) 5008 BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8); 5009 5010 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8) 5011 BB = EmitPartwordAtomicBinary(MI, BB, true, 0); 5012 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16) 5013 BB = EmitPartwordAtomicBinary(MI, BB, false, 0); 5014 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32) 5015 BB = EmitAtomicBinary(MI, BB, false, 0); 5016 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64) 5017 BB = EmitAtomicBinary(MI, BB, true, 0); 5018 5019 else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || 5020 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) { 5021 bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; 5022 5023 unsigned dest = MI->getOperand(0).getReg(); 5024 unsigned ptrA = MI->getOperand(1).getReg(); 5025 unsigned ptrB = MI->getOperand(2).getReg(); 5026 unsigned oldval = MI->getOperand(3).getReg(); 5027 unsigned newval = MI->getOperand(4).getReg(); 5028 DebugLoc dl = MI->getDebugLoc(); 5029 5030 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 5031 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 5032 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 5033 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 5034 F->insert(It, loop1MBB); 5035 F->insert(It, loop2MBB); 5036 F->insert(It, midMBB); 5037 F->insert(It, exitMBB); 5038 exitMBB->splice(exitMBB->begin(), BB, 5039 llvm::next(MachineBasicBlock::iterator(MI)), 5040 BB->end()); 5041 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5042 5043 // thisMBB: 5044 // ... 5045 // fallthrough --> loopMBB 5046 BB->addSuccessor(loop1MBB); 5047 5048 // loop1MBB: 5049 // l[wd]arx dest, ptr 5050 // cmp[wd] dest, oldval 5051 // bne- midMBB 5052 // loop2MBB: 5053 // st[wd]cx. newval, ptr 5054 // bne- loopMBB 5055 // b exitBB 5056 // midMBB: 5057 // st[wd]cx. dest, ptr 5058 // exitBB: 5059 BB = loop1MBB; 5060 BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) 5061 .addReg(ptrA).addReg(ptrB); 5062 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) 5063 .addReg(oldval).addReg(dest); 5064 BuildMI(BB, dl, TII->get(PPC::BCC)) 5065 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 5066 BB->addSuccessor(loop2MBB); 5067 BB->addSuccessor(midMBB); 5068 5069 BB = loop2MBB; 5070 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 5071 .addReg(newval).addReg(ptrA).addReg(ptrB); 5072 BuildMI(BB, dl, TII->get(PPC::BCC)) 5073 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 5074 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 5075 BB->addSuccessor(loop1MBB); 5076 BB->addSuccessor(exitMBB); 5077 5078 BB = midMBB; 5079 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 5080 .addReg(dest).addReg(ptrA).addReg(ptrB); 5081 BB->addSuccessor(exitMBB); 5082 5083 // exitMBB: 5084 // ... 5085 BB = exitMBB; 5086 } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || 5087 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { 5088 // We must use 64-bit registers for addresses when targeting 64-bit, 5089 // since we're actually doing arithmetic on them. Other registers 5090 // can be 32-bit. 5091 bool is64bit = PPCSubTarget.isPPC64(); 5092 bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; 5093 5094 unsigned dest = MI->getOperand(0).getReg(); 5095 unsigned ptrA = MI->getOperand(1).getReg(); 5096 unsigned ptrB = MI->getOperand(2).getReg(); 5097 unsigned oldval = MI->getOperand(3).getReg(); 5098 unsigned newval = MI->getOperand(4).getReg(); 5099 DebugLoc dl = MI->getDebugLoc(); 5100 5101 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 5102 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 5103 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 5104 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 5105 F->insert(It, loop1MBB); 5106 F->insert(It, loop2MBB); 5107 F->insert(It, midMBB); 5108 F->insert(It, exitMBB); 5109 exitMBB->splice(exitMBB->begin(), BB, 5110 llvm::next(MachineBasicBlock::iterator(MI)), 5111 BB->end()); 5112 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5113 5114 MachineRegisterInfo &RegInfo = F->getRegInfo(); 5115 const TargetRegisterClass *RC = 5116 is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : 5117 (const TargetRegisterClass *) &PPC::GPRCRegClass; 5118 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 5119 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 5120 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 5121 unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); 5122 unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); 5123 unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); 5124 unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC); 5125 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 5126 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 5127 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 5128 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 5129 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 5130 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 5131 unsigned Ptr1Reg; 5132 unsigned TmpReg = RegInfo.createVirtualRegister(RC); 5133 unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0; 5134 // thisMBB: 5135 // ... 5136 // fallthrough --> loopMBB 5137 BB->addSuccessor(loop1MBB); 5138 5139 // The 4-byte load must be aligned, while a char or short may be 5140 // anywhere in the word. Hence all this nasty bookkeeping code. 5141 // add ptr1, ptrA, ptrB [copy if ptrA==0] 5142 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 5143 // xori shift, shift1, 24 [16] 5144 // rlwinm ptr, ptr1, 0, 0, 29 5145 // slw newval2, newval, shift 5146 // slw oldval2, oldval,shift 5147 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 5148 // slw mask, mask2, shift 5149 // and newval3, newval2, mask 5150 // and oldval3, oldval2, mask 5151 // loop1MBB: 5152 // lwarx tmpDest, ptr 5153 // and tmp, tmpDest, mask 5154 // cmpw tmp, oldval3 5155 // bne- midMBB 5156 // loop2MBB: 5157 // andc tmp2, tmpDest, mask 5158 // or tmp4, tmp2, newval3 5159 // stwcx. tmp4, ptr 5160 // bne- loop1MBB 5161 // b exitBB 5162 // midMBB: 5163 // stwcx. tmpDest, ptr 5164 // exitBB: 5165 // srw dest, tmpDest, shift 5166 if (ptrA != ZeroReg) { 5167 Ptr1Reg = RegInfo.createVirtualRegister(RC); 5168 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 5169 .addReg(ptrA).addReg(ptrB); 5170 } else { 5171 Ptr1Reg = ptrB; 5172 } 5173 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 5174 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 5175 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 5176 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 5177 if (is64bit) 5178 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 5179 .addReg(Ptr1Reg).addImm(0).addImm(61); 5180 else 5181 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 5182 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 5183 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) 5184 .addReg(newval).addReg(ShiftReg); 5185 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) 5186 .addReg(oldval).addReg(ShiftReg); 5187 if (is8bit) 5188 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 5189 else { 5190 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 5191 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) 5192 .addReg(Mask3Reg).addImm(65535); 5193 } 5194 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 5195 .addReg(Mask2Reg).addReg(ShiftReg); 5196 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) 5197 .addReg(NewVal2Reg).addReg(MaskReg); 5198 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) 5199 .addReg(OldVal2Reg).addReg(MaskReg); 5200 5201 BB = loop1MBB; 5202 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 5203 .addReg(ZeroReg).addReg(PtrReg); 5204 BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) 5205 .addReg(TmpDestReg).addReg(MaskReg); 5206 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) 5207 .addReg(TmpReg).addReg(OldVal3Reg); 5208 BuildMI(BB, dl, TII->get(PPC::BCC)) 5209 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 5210 BB->addSuccessor(loop2MBB); 5211 BB->addSuccessor(midMBB); 5212 5213 BB = loop2MBB; 5214 BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg) 5215 .addReg(TmpDestReg).addReg(MaskReg); 5216 BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) 5217 .addReg(Tmp2Reg).addReg(NewVal3Reg); 5218 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) 5219 .addReg(ZeroReg).addReg(PtrReg); 5220 BuildMI(BB, dl, TII->get(PPC::BCC)) 5221 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 5222 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 5223 BB->addSuccessor(loop1MBB); 5224 BB->addSuccessor(exitMBB); 5225 5226 BB = midMBB; 5227 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) 5228 .addReg(ZeroReg).addReg(PtrReg); 5229 BB->addSuccessor(exitMBB); 5230 5231 // exitMBB: 5232 // ... 5233 BB = exitMBB; 5234 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) 5235 .addReg(ShiftReg); 5236 } else { 5237 llvm_unreachable("Unexpected instr type to insert"); 5238 } 5239 5240 MI->eraseFromParent(); // The pseudo instruction is gone now. 5241 return BB; 5242 } 5243 5244 //===----------------------------------------------------------------------===// 5245 // Target Optimization Hooks 5246 //===----------------------------------------------------------------------===// 5247 5248 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, 5249 DAGCombinerInfo &DCI) const { 5250 const TargetMachine &TM = getTargetMachine(); 5251 SelectionDAG &DAG = DCI.DAG; 5252 DebugLoc dl = N->getDebugLoc(); 5253 switch (N->getOpcode()) { 5254 default: break; 5255 case PPCISD::SHL: 5256 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 5257 if (C->isNullValue()) // 0 << V -> 0. 5258 return N->getOperand(0); 5259 } 5260 break; 5261 case PPCISD::SRL: 5262 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 5263 if (C->isNullValue()) // 0 >>u V -> 0. 5264 return N->getOperand(0); 5265 } 5266 break; 5267 case PPCISD::SRA: 5268 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 5269 if (C->isNullValue() || // 0 >>s V -> 0. 5270 C->isAllOnesValue()) // -1 >>s V -> -1. 5271 return N->getOperand(0); 5272 } 5273 break; 5274 5275 case ISD::SINT_TO_FP: 5276 if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { 5277 if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) { 5278 // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores. 5279 // We allow the src/dst to be either f32/f64, but the intermediate 5280 // type must be i64. 5281 if (N->getOperand(0).getValueType() == MVT::i64 && 5282 N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) { 5283 SDValue Val = N->getOperand(0).getOperand(0); 5284 if (Val.getValueType() == MVT::f32) { 5285 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); 5286 DCI.AddToWorklist(Val.getNode()); 5287 } 5288 5289 Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val); 5290 DCI.AddToWorklist(Val.getNode()); 5291 Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val); 5292 DCI.AddToWorklist(Val.getNode()); 5293 if (N->getValueType(0) == MVT::f32) { 5294 Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val, 5295 DAG.getIntPtrConstant(0)); 5296 DCI.AddToWorklist(Val.getNode()); 5297 } 5298 return Val; 5299 } else if (N->getOperand(0).getValueType() == MVT::i32) { 5300 // If the intermediate type is i32, we can avoid the load/store here 5301 // too. 5302 } 5303 } 5304 } 5305 break; 5306 case ISD::STORE: 5307 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 5308 if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() && 5309 !cast<StoreSDNode>(N)->isTruncatingStore() && 5310 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 5311 N->getOperand(1).getValueType() == MVT::i32 && 5312 N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { 5313 SDValue Val = N->getOperand(1).getOperand(0); 5314 if (Val.getValueType() == MVT::f32) { 5315 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); 5316 DCI.AddToWorklist(Val.getNode()); 5317 } 5318 Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); 5319 DCI.AddToWorklist(Val.getNode()); 5320 5321 Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val, 5322 N->getOperand(2), N->getOperand(3)); 5323 DCI.AddToWorklist(Val.getNode()); 5324 return Val; 5325 } 5326 5327 // Turn STORE (BSWAP) -> sthbrx/stwbrx. 5328 if (cast<StoreSDNode>(N)->isUnindexed() && 5329 N->getOperand(1).getOpcode() == ISD::BSWAP && 5330 N->getOperand(1).getNode()->hasOneUse() && 5331 (N->getOperand(1).getValueType() == MVT::i32 || 5332 N->getOperand(1).getValueType() == MVT::i16)) { 5333 SDValue BSwapOp = N->getOperand(1).getOperand(0); 5334 // Do an any-extend to 32-bits if this is a half-word input. 5335 if (BSwapOp.getValueType() == MVT::i16) 5336 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); 5337 5338 SDValue Ops[] = { 5339 N->getOperand(0), BSwapOp, N->getOperand(2), 5340 DAG.getValueType(N->getOperand(1).getValueType()) 5341 }; 5342 return 5343 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), 5344 Ops, array_lengthof(Ops), 5345 cast<StoreSDNode>(N)->getMemoryVT(), 5346 cast<StoreSDNode>(N)->getMemOperand()); 5347 } 5348 break; 5349 case ISD::BSWAP: 5350 // Turn BSWAP (LOAD) -> lhbrx/lwbrx. 5351 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 5352 N->getOperand(0).hasOneUse() && 5353 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) { 5354 SDValue Load = N->getOperand(0); 5355 LoadSDNode *LD = cast<LoadSDNode>(Load); 5356 // Create the byte-swapping load. 5357 SDValue Ops[] = { 5358 LD->getChain(), // Chain 5359 LD->getBasePtr(), // Ptr 5360 DAG.getValueType(N->getValueType(0)) // VT 5361 }; 5362 SDValue BSLoad = 5363 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, 5364 DAG.getVTList(MVT::i32, MVT::Other), Ops, 3, 5365 LD->getMemoryVT(), LD->getMemOperand()); 5366 5367 // If this is an i16 load, insert the truncate. 5368 SDValue ResVal = BSLoad; 5369 if (N->getValueType(0) == MVT::i16) 5370 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); 5371 5372 // First, combine the bswap away. This makes the value produced by the 5373 // load dead. 5374 DCI.CombineTo(N, ResVal); 5375 5376 // Next, combine the load away, we give it a bogus result value but a real 5377 // chain result. The result value is dead because the bswap is dead. 5378 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); 5379 5380 // Return N so it doesn't get rechecked! 5381 return SDValue(N, 0); 5382 } 5383 5384 break; 5385 case PPCISD::VCMP: { 5386 // If a VCMPo node already exists with exactly the same operands as this 5387 // node, use its result instead of this node (VCMPo computes both a CR6 and 5388 // a normal output). 5389 // 5390 if (!N->getOperand(0).hasOneUse() && 5391 !N->getOperand(1).hasOneUse() && 5392 !N->getOperand(2).hasOneUse()) { 5393 5394 // Scan all of the users of the LHS, looking for VCMPo's that match. 5395 SDNode *VCMPoNode = 0; 5396 5397 SDNode *LHSN = N->getOperand(0).getNode(); 5398 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 5399 UI != E; ++UI) 5400 if (UI->getOpcode() == PPCISD::VCMPo && 5401 UI->getOperand(1) == N->getOperand(1) && 5402 UI->getOperand(2) == N->getOperand(2) && 5403 UI->getOperand(0) == N->getOperand(0)) { 5404 VCMPoNode = *UI; 5405 break; 5406 } 5407 5408 // If there is no VCMPo node, or if the flag value has a single use, don't 5409 // transform this. 5410 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 5411 break; 5412 5413 // Look at the (necessarily single) use of the flag value. If it has a 5414 // chain, this transformation is more complex. Note that multiple things 5415 // could use the value result, which we should ignore. 5416 SDNode *FlagUser = 0; 5417 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 5418 FlagUser == 0; ++UI) { 5419 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 5420 SDNode *User = *UI; 5421 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 5422 if (User->getOperand(i) == SDValue(VCMPoNode, 1)) { 5423 FlagUser = User; 5424 break; 5425 } 5426 } 5427 } 5428 5429 // If the user is a MFCR instruction, we know this is safe. Otherwise we 5430 // give up for right now. 5431 if (FlagUser->getOpcode() == PPCISD::MFCR) 5432 return SDValue(VCMPoNode, 0); 5433 } 5434 break; 5435 } 5436 case ISD::BR_CC: { 5437 // If this is a branch on an altivec predicate comparison, lower this so 5438 // that we don't have to do a MFCR: instead, branch directly on CR6. This 5439 // lowering is done pre-legalize, because the legalizer lowers the predicate 5440 // compare down to code that is difficult to reassemble. 5441 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 5442 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); 5443 int CompareOpc; 5444 bool isDot; 5445 5446 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 5447 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 5448 getAltivecCompareInfo(LHS, CompareOpc, isDot)) { 5449 assert(isDot && "Can't compare against a vector result!"); 5450 5451 // If this is a comparison against something other than 0/1, then we know 5452 // that the condition is never/always true. 5453 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 5454 if (Val != 0 && Val != 1) { 5455 if (CC == ISD::SETEQ) // Cond never true, remove branch. 5456 return N->getOperand(0); 5457 // Always !=, turn it into an unconditional branch. 5458 return DAG.getNode(ISD::BR, dl, MVT::Other, 5459 N->getOperand(0), N->getOperand(4)); 5460 } 5461 5462 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 5463 5464 // Create the PPCISD altivec 'dot' comparison node. 5465 std::vector<EVT> VTs; 5466 SDValue Ops[] = { 5467 LHS.getOperand(2), // LHS of compare 5468 LHS.getOperand(3), // RHS of compare 5469 DAG.getConstant(CompareOpc, MVT::i32) 5470 }; 5471 VTs.push_back(LHS.getOperand(2).getValueType()); 5472 VTs.push_back(MVT::Glue); 5473 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); 5474 5475 // Unpack the result based on how the target uses it. 5476 PPC::Predicate CompOpc; 5477 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) { 5478 default: // Can't happen, don't crash on invalid number though. 5479 case 0: // Branch on the value of the EQ bit of CR6. 5480 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; 5481 break; 5482 case 1: // Branch on the inverted value of the EQ bit of CR6. 5483 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; 5484 break; 5485 case 2: // Branch on the value of the LT bit of CR6. 5486 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; 5487 break; 5488 case 3: // Branch on the inverted value of the LT bit of CR6. 5489 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; 5490 break; 5491 } 5492 5493 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), 5494 DAG.getConstant(CompOpc, MVT::i32), 5495 DAG.getRegister(PPC::CR6, MVT::i32), 5496 N->getOperand(4), CompNode.getValue(1)); 5497 } 5498 break; 5499 } 5500 } 5501 5502 return SDValue(); 5503 } 5504 5505 //===----------------------------------------------------------------------===// 5506 // Inline Assembly Support 5507 //===----------------------------------------------------------------------===// 5508 5509 void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 5510 APInt &KnownZero, 5511 APInt &KnownOne, 5512 const SelectionDAG &DAG, 5513 unsigned Depth) const { 5514 KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); 5515 switch (Op.getOpcode()) { 5516 default: break; 5517 case PPCISD::LBRX: { 5518 // lhbrx is known to have the top bits cleared out. 5519 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16) 5520 KnownZero = 0xFFFF0000; 5521 break; 5522 } 5523 case ISD::INTRINSIC_WO_CHAIN: { 5524 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) { 5525 default: break; 5526 case Intrinsic::ppc_altivec_vcmpbfp_p: 5527 case Intrinsic::ppc_altivec_vcmpeqfp_p: 5528 case Intrinsic::ppc_altivec_vcmpequb_p: 5529 case Intrinsic::ppc_altivec_vcmpequh_p: 5530 case Intrinsic::ppc_altivec_vcmpequw_p: 5531 case Intrinsic::ppc_altivec_vcmpgefp_p: 5532 case Intrinsic::ppc_altivec_vcmpgtfp_p: 5533 case Intrinsic::ppc_altivec_vcmpgtsb_p: 5534 case Intrinsic::ppc_altivec_vcmpgtsh_p: 5535 case Intrinsic::ppc_altivec_vcmpgtsw_p: 5536 case Intrinsic::ppc_altivec_vcmpgtub_p: 5537 case Intrinsic::ppc_altivec_vcmpgtuh_p: 5538 case Intrinsic::ppc_altivec_vcmpgtuw_p: 5539 KnownZero = ~1U; // All bits but the low one are known to be zero. 5540 break; 5541 } 5542 } 5543 } 5544 } 5545 5546 5547 /// getConstraintType - Given a constraint, return the type of 5548 /// constraint it is for this target. 5549 PPCTargetLowering::ConstraintType 5550 PPCTargetLowering::getConstraintType(const std::string &Constraint) const { 5551 if (Constraint.size() == 1) { 5552 switch (Constraint[0]) { 5553 default: break; 5554 case 'b': 5555 case 'r': 5556 case 'f': 5557 case 'v': 5558 case 'y': 5559 return C_RegisterClass; 5560 } 5561 } 5562 return TargetLowering::getConstraintType(Constraint); 5563 } 5564 5565 /// Examine constraint type and operand type and determine a weight value. 5566 /// This object must already have been set up with the operand type 5567 /// and the current alternative constraint selected. 5568 TargetLowering::ConstraintWeight 5569 PPCTargetLowering::getSingleConstraintMatchWeight( 5570 AsmOperandInfo &info, const char *constraint) const { 5571 ConstraintWeight weight = CW_Invalid; 5572 Value *CallOperandVal = info.CallOperandVal; 5573 // If we don't have a value, we can't do a match, 5574 // but allow it at the lowest weight. 5575 if (CallOperandVal == NULL) 5576 return CW_Default; 5577 Type *type = CallOperandVal->getType(); 5578 // Look at the constraint type. 5579 switch (*constraint) { 5580 default: 5581 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 5582 break; 5583 case 'b': 5584 if (type->isIntegerTy()) 5585 weight = CW_Register; 5586 break; 5587 case 'f': 5588 if (type->isFloatTy()) 5589 weight = CW_Register; 5590 break; 5591 case 'd': 5592 if (type->isDoubleTy()) 5593 weight = CW_Register; 5594 break; 5595 case 'v': 5596 if (type->isVectorTy()) 5597 weight = CW_Register; 5598 break; 5599 case 'y': 5600 weight = CW_Register; 5601 break; 5602 } 5603 return weight; 5604 } 5605 5606 std::pair<unsigned, const TargetRegisterClass*> 5607 PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 5608 EVT VT) const { 5609 if (Constraint.size() == 1) { 5610 // GCC RS6000 Constraint Letters 5611 switch (Constraint[0]) { 5612 case 'b': // R1-R31 5613 case 'r': // R0-R31 5614 if (VT == MVT::i64 && PPCSubTarget.isPPC64()) 5615 return std::make_pair(0U, PPC::G8RCRegisterClass); 5616 return std::make_pair(0U, PPC::GPRCRegisterClass); 5617 case 'f': 5618 if (VT == MVT::f32) 5619 return std::make_pair(0U, PPC::F4RCRegisterClass); 5620 else if (VT == MVT::f64) 5621 return std::make_pair(0U, PPC::F8RCRegisterClass); 5622 break; 5623 case 'v': 5624 return std::make_pair(0U, PPC::VRRCRegisterClass); 5625 case 'y': // crrc 5626 return std::make_pair(0U, PPC::CRRCRegisterClass); 5627 } 5628 } 5629 5630 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 5631 } 5632 5633 5634 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 5635 /// vector. If it is invalid, don't add anything to Ops. 5636 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 5637 std::string &Constraint, 5638 std::vector<SDValue>&Ops, 5639 SelectionDAG &DAG) const { 5640 SDValue Result(0,0); 5641 5642 // Only support length 1 constraints. 5643 if (Constraint.length() > 1) return; 5644 5645 char Letter = Constraint[0]; 5646 switch (Letter) { 5647 default: break; 5648 case 'I': 5649 case 'J': 5650 case 'K': 5651 case 'L': 5652 case 'M': 5653 case 'N': 5654 case 'O': 5655 case 'P': { 5656 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op); 5657 if (!CST) return; // Must be an immediate to match. 5658 unsigned Value = CST->getZExtValue(); 5659 switch (Letter) { 5660 default: llvm_unreachable("Unknown constraint letter!"); 5661 case 'I': // "I" is a signed 16-bit constant. 5662 if ((short)Value == (int)Value) 5663 Result = DAG.getTargetConstant(Value, Op.getValueType()); 5664 break; 5665 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 5666 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 5667 if ((short)Value == 0) 5668 Result = DAG.getTargetConstant(Value, Op.getValueType()); 5669 break; 5670 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 5671 if ((Value >> 16) == 0) 5672 Result = DAG.getTargetConstant(Value, Op.getValueType()); 5673 break; 5674 case 'M': // "M" is a constant that is greater than 31. 5675 if (Value > 31) 5676 Result = DAG.getTargetConstant(Value, Op.getValueType()); 5677 break; 5678 case 'N': // "N" is a positive constant that is an exact power of two. 5679 if ((int)Value > 0 && isPowerOf2_32(Value)) 5680 Result = DAG.getTargetConstant(Value, Op.getValueType()); 5681 break; 5682 case 'O': // "O" is the constant zero. 5683 if (Value == 0) 5684 Result = DAG.getTargetConstant(Value, Op.getValueType()); 5685 break; 5686 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 5687 if ((short)-Value == (int)-Value) 5688 Result = DAG.getTargetConstant(Value, Op.getValueType()); 5689 break; 5690 } 5691 break; 5692 } 5693 } 5694 5695 if (Result.getNode()) { 5696 Ops.push_back(Result); 5697 return; 5698 } 5699 5700 // Handle standard constraint letters. 5701 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 5702 } 5703 5704 // isLegalAddressingMode - Return true if the addressing mode represented 5705 // by AM is legal for this target, for a load/store of the specified type. 5706 bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, 5707 Type *Ty) const { 5708 // FIXME: PPC does not allow r+i addressing modes for vectors! 5709 5710 // PPC allows a sign-extended 16-bit immediate field. 5711 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) 5712 return false; 5713 5714 // No global is ever allowed as a base. 5715 if (AM.BaseGV) 5716 return false; 5717 5718 // PPC only support r+r, 5719 switch (AM.Scale) { 5720 case 0: // "r+i" or just "i", depending on HasBaseReg. 5721 break; 5722 case 1: 5723 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. 5724 return false; 5725 // Otherwise we have r+r or r+i. 5726 break; 5727 case 2: 5728 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. 5729 return false; 5730 // Allow 2*r as r+r. 5731 break; 5732 default: 5733 // No other scales are supported. 5734 return false; 5735 } 5736 5737 return true; 5738 } 5739 5740 /// isLegalAddressImmediate - Return true if the integer value can be used 5741 /// as the offset of the target addressing mode for load / store of the 5742 /// given type. 5743 bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{ 5744 // PPC allows a sign-extended 16-bit immediate field. 5745 return (V > -(1 << 16) && V < (1 << 16)-1); 5746 } 5747 5748 bool PPCTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const { 5749 return false; 5750 } 5751 5752 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, 5753 SelectionDAG &DAG) const { 5754 MachineFunction &MF = DAG.getMachineFunction(); 5755 MachineFrameInfo *MFI = MF.getFrameInfo(); 5756 MFI->setReturnAddressIsTaken(true); 5757 5758 DebugLoc dl = Op.getDebugLoc(); 5759 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 5760 5761 // Make sure the function does not optimize away the store of the RA to 5762 // the stack. 5763 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 5764 FuncInfo->setLRStoreRequired(); 5765 bool isPPC64 = PPCSubTarget.isPPC64(); 5766 bool isDarwinABI = PPCSubTarget.isDarwinABI(); 5767 5768 if (Depth > 0) { 5769 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 5770 SDValue Offset = 5771 5772 DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI), 5773 isPPC64? MVT::i64 : MVT::i32); 5774 return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 5775 DAG.getNode(ISD::ADD, dl, getPointerTy(), 5776 FrameAddr, Offset), 5777 MachinePointerInfo(), false, false, false, 0); 5778 } 5779 5780 // Just load the return address off the stack. 5781 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); 5782 return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 5783 RetAddrFI, MachinePointerInfo(), false, false, false, 0); 5784 } 5785 5786 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, 5787 SelectionDAG &DAG) const { 5788 DebugLoc dl = Op.getDebugLoc(); 5789 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 5790 5791 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5792 bool isPPC64 = PtrVT == MVT::i64; 5793 5794 MachineFunction &MF = DAG.getMachineFunction(); 5795 MachineFrameInfo *MFI = MF.getFrameInfo(); 5796 MFI->setFrameAddressIsTaken(true); 5797 bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) || 5798 MFI->hasVarSizedObjects()) && 5799 MFI->getStackSize() && 5800 !MF.getFunction()->hasFnAttr(Attribute::Naked); 5801 unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) : 5802 (is31 ? PPC::R31 : PPC::R1); 5803 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, 5804 PtrVT); 5805 while (Depth--) 5806 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), 5807 FrameAddr, MachinePointerInfo(), false, false, 5808 false, 0); 5809 return FrameAddr; 5810 } 5811 5812 bool 5813 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 5814 // The PowerPC target isn't yet aware of offsets. 5815 return false; 5816 } 5817 5818 /// getOptimalMemOpType - Returns the target specific optimal type for load 5819 /// and store operations as a result of memset, memcpy, and memmove 5820 /// lowering. If DstAlign is zero that means it's safe to destination 5821 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 5822 /// means there isn't a need to check it against alignment requirement, 5823 /// probably because the source does not need to be loaded. If 5824 /// 'IsZeroVal' is true, that means it's safe to return a 5825 /// non-scalar-integer type, e.g. empty string source, constant, or loaded 5826 /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is 5827 /// constant so it does not need to be loaded. 5828 /// It returns EVT::Other if the type should be determined using generic 5829 /// target-independent logic. 5830 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, 5831 unsigned DstAlign, unsigned SrcAlign, 5832 bool IsZeroVal, 5833 bool MemcpyStrSrc, 5834 MachineFunction &MF) const { 5835 if (this->PPCSubTarget.isPPC64()) { 5836 return MVT::i64; 5837 } else { 5838 return MVT::i32; 5839 } 5840 } 5841 5842 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { 5843 unsigned Directive = PPCSubTarget.getDarwinDirective(); 5844 if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2) 5845 return Sched::ILP; 5846 5847 return TargetLowering::getSchedulingPreference(N); 5848 } 5849 5850