Home | History | Annotate | Download | only in PowerPC
      1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements the PPCISelLowering class.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "PPCISelLowering.h"
     15 #include "PPCMachineFunctionInfo.h"
     16 #include "PPCPerfectShuffle.h"
     17 #include "PPCPredicates.h"
     18 #include "PPCTargetMachine.h"
     19 #include "llvm/ADT/STLExtras.h"
     20 #include "llvm/ADT/VectorExtras.h"
     21 #include "llvm/CodeGen/CallingConvLower.h"
     22 #include "llvm/CodeGen/MachineFrameInfo.h"
     23 #include "llvm/CodeGen/MachineFunction.h"
     24 #include "llvm/CodeGen/MachineInstrBuilder.h"
     25 #include "llvm/CodeGen/MachineRegisterInfo.h"
     26 #include "llvm/CodeGen/PseudoSourceValue.h"
     27 #include "llvm/CodeGen/SelectionDAG.h"
     28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
     29 #include "llvm/CallingConv.h"
     30 #include "llvm/Constants.h"
     31 #include "llvm/Function.h"
     32 #include "llvm/Intrinsics.h"
     33 #include "llvm/Support/MathExtras.h"
     34 #include "llvm/Target/TargetOptions.h"
     35 #include "llvm/Support/CommandLine.h"
     36 #include "llvm/Support/ErrorHandling.h"
     37 #include "llvm/Support/raw_ostream.h"
     38 #include "llvm/DerivedTypes.h"
     39 using namespace llvm;
     40 
     41 static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
     42                                      CCValAssign::LocInfo &LocInfo,
     43                                      ISD::ArgFlagsTy &ArgFlags,
     44                                      CCState &State);
     45 static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
     46                                             MVT &LocVT,
     47                                             CCValAssign::LocInfo &LocInfo,
     48                                             ISD::ArgFlagsTy &ArgFlags,
     49                                             CCState &State);
     50 static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
     51                                               MVT &LocVT,
     52                                               CCValAssign::LocInfo &LocInfo,
     53                                               ISD::ArgFlagsTy &ArgFlags,
     54                                               CCState &State);
     55 
     56 static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc",
     57 cl::desc("enable preincrement load/store generation on PPC (experimental)"),
     58                                      cl::Hidden);
     59 
     60 static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
     61   if (TM.getSubtargetImpl()->isDarwin())
     62     return new TargetLoweringObjectFileMachO();
     63 
     64   return new TargetLoweringObjectFileELF();
     65 }
     66 
     67 PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
     68   : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
     69 
     70   setPow2DivIsCheap();
     71 
     72   // Use _setjmp/_longjmp instead of setjmp/longjmp.
     73   setUseUnderscoreSetJmp(true);
     74   setUseUnderscoreLongJmp(true);
     75 
     76   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
     77   // arguments are at least 4/8 bytes aligned.
     78   setMinStackArgumentAlignment(TM.getSubtarget<PPCSubtarget>().isPPC64() ? 8:4);
     79 
     80   // Set up the register classes.
     81   addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
     82   addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
     83   addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
     84 
     85   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
     86   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
     87   setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
     88 
     89   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
     90 
     91   // PowerPC has pre-inc load and store's.
     92   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
     93   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
     94   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
     95   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
     96   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
     97   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
     98   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
     99   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
    100   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
    101   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
    102 
    103   // This is used in the ppcf128->int sequence.  Note it has different semantics
    104   // from FP_ROUND:  that rounds to nearest, this rounds to zero.
    105   setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
    106 
    107   // PowerPC has no SREM/UREM instructions
    108   setOperationAction(ISD::SREM, MVT::i32, Expand);
    109   setOperationAction(ISD::UREM, MVT::i32, Expand);
    110   setOperationAction(ISD::SREM, MVT::i64, Expand);
    111   setOperationAction(ISD::UREM, MVT::i64, Expand);
    112 
    113   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
    114   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
    115   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
    116   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
    117   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
    118   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
    119   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
    120   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
    121   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
    122 
    123   // We don't support sin/cos/sqrt/fmod/pow
    124   setOperationAction(ISD::FSIN , MVT::f64, Expand);
    125   setOperationAction(ISD::FCOS , MVT::f64, Expand);
    126   setOperationAction(ISD::FREM , MVT::f64, Expand);
    127   setOperationAction(ISD::FPOW , MVT::f64, Expand);
    128   setOperationAction(ISD::FMA  , MVT::f64, Expand);
    129   setOperationAction(ISD::FSIN , MVT::f32, Expand);
    130   setOperationAction(ISD::FCOS , MVT::f32, Expand);
    131   setOperationAction(ISD::FREM , MVT::f32, Expand);
    132   setOperationAction(ISD::FPOW , MVT::f32, Expand);
    133   setOperationAction(ISD::FMA  , MVT::f32, Expand);
    134 
    135   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
    136 
    137   // If we're enabling GP optimizations, use hardware square root
    138   if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
    139     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
    140     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
    141   }
    142 
    143   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
    144   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
    145 
    146   // PowerPC does not have BSWAP, CTPOP or CTTZ
    147   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
    148   setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
    149   setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
    150   setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
    151   setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
    152   setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
    153 
    154   // PowerPC does not have ROTR
    155   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
    156   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
    157 
    158   // PowerPC does not have Select
    159   setOperationAction(ISD::SELECT, MVT::i32, Expand);
    160   setOperationAction(ISD::SELECT, MVT::i64, Expand);
    161   setOperationAction(ISD::SELECT, MVT::f32, Expand);
    162   setOperationAction(ISD::SELECT, MVT::f64, Expand);
    163 
    164   // PowerPC wants to turn select_cc of FP into fsel when possible.
    165   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
    166   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
    167 
    168   // PowerPC wants to optimize integer setcc a bit
    169   setOperationAction(ISD::SETCC, MVT::i32, Custom);
    170 
    171   // PowerPC does not have BRCOND which requires SetCC
    172   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
    173 
    174   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
    175 
    176   // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
    177   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
    178 
    179   // PowerPC does not have [U|S]INT_TO_FP
    180   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
    181   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
    182 
    183   setOperationAction(ISD::BITCAST, MVT::f32, Expand);
    184   setOperationAction(ISD::BITCAST, MVT::i32, Expand);
    185   setOperationAction(ISD::BITCAST, MVT::i64, Expand);
    186   setOperationAction(ISD::BITCAST, MVT::f64, Expand);
    187 
    188   // We cannot sextinreg(i1).  Expand to shifts.
    189   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
    190 
    191   setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
    192   setOperationAction(ISD::EHSELECTION,   MVT::i64, Expand);
    193   setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
    194   setOperationAction(ISD::EHSELECTION,   MVT::i32, Expand);
    195 
    196 
    197   // We want to legalize GlobalAddress and ConstantPool nodes into the
    198   // appropriate instructions to materialize the address.
    199   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
    200   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
    201   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
    202   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
    203   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
    204   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
    205   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
    206   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
    207   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
    208   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
    209 
    210   // TRAP is legal.
    211   setOperationAction(ISD::TRAP, MVT::Other, Legal);
    212 
    213   // TRAMPOLINE is custom lowered.
    214   setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
    215 
    216   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
    217   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
    218 
    219   // VAARG is custom lowered with the 32-bit SVR4 ABI.
    220   if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
    221       && !TM.getSubtarget<PPCSubtarget>().isPPC64()) {
    222     setOperationAction(ISD::VAARG, MVT::Other, Custom);
    223     setOperationAction(ISD::VAARG, MVT::i64, Custom);
    224   } else
    225     setOperationAction(ISD::VAARG, MVT::Other, Expand);
    226 
    227   // Use the default implementation.
    228   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
    229   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
    230   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
    231   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
    232   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
    233   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
    234 
    235   // We want to custom lower some of our intrinsics.
    236   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
    237 
    238   // Comparisons that require checking two conditions.
    239   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
    240   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
    241   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
    242   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
    243   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
    244   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
    245   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
    246   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
    247   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
    248   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
    249   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
    250   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
    251 
    252   if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
    253     // They also have instructions for converting between i64 and fp.
    254     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
    255     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
    256     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
    257     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
    258     // This is just the low 32 bits of a (signed) fp->i64 conversion.
    259     // We cannot do this with Promote because i64 is not a legal type.
    260     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
    261 
    262     // FIXME: disable this lowered code.  This generates 64-bit register values,
    263     // and we don't model the fact that the top part is clobbered by calls.  We
    264     // need to flag these together so that the value isn't live across a call.
    265     //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
    266   } else {
    267     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
    268     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
    269   }
    270 
    271   if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) {
    272     // 64-bit PowerPC implementations can support i64 types directly
    273     addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
    274     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
    275     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
    276     // 64-bit PowerPC wants to expand i128 shifts itself.
    277     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
    278     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
    279     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
    280   } else {
    281     // 32-bit PowerPC wants to expand i64 shifts itself.
    282     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
    283     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
    284     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
    285   }
    286 
    287   if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
    288     // First set operation action for all vector types to expand. Then we
    289     // will selectively turn on ones that can be effectively codegen'd.
    290     for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
    291          i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
    292       MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
    293 
    294       // add/sub are legal for all supported vector VT's.
    295       setOperationAction(ISD::ADD , VT, Legal);
    296       setOperationAction(ISD::SUB , VT, Legal);
    297 
    298       // We promote all shuffles to v16i8.
    299       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
    300       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
    301 
    302       // We promote all non-typed operations to v4i32.
    303       setOperationAction(ISD::AND   , VT, Promote);
    304       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
    305       setOperationAction(ISD::OR    , VT, Promote);
    306       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
    307       setOperationAction(ISD::XOR   , VT, Promote);
    308       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
    309       setOperationAction(ISD::LOAD  , VT, Promote);
    310       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
    311       setOperationAction(ISD::SELECT, VT, Promote);
    312       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
    313       setOperationAction(ISD::STORE, VT, Promote);
    314       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
    315 
    316       // No other operations are legal.
    317       setOperationAction(ISD::MUL , VT, Expand);
    318       setOperationAction(ISD::SDIV, VT, Expand);
    319       setOperationAction(ISD::SREM, VT, Expand);
    320       setOperationAction(ISD::UDIV, VT, Expand);
    321       setOperationAction(ISD::UREM, VT, Expand);
    322       setOperationAction(ISD::FDIV, VT, Expand);
    323       setOperationAction(ISD::FNEG, VT, Expand);
    324       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
    325       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
    326       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
    327       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
    328       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
    329       setOperationAction(ISD::UDIVREM, VT, Expand);
    330       setOperationAction(ISD::SDIVREM, VT, Expand);
    331       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
    332       setOperationAction(ISD::FPOW, VT, Expand);
    333       setOperationAction(ISD::CTPOP, VT, Expand);
    334       setOperationAction(ISD::CTLZ, VT, Expand);
    335       setOperationAction(ISD::CTTZ, VT, Expand);
    336     }
    337 
    338     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
    339     // with merges, splats, etc.
    340     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
    341 
    342     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
    343     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
    344     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
    345     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
    346     setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
    347     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
    348 
    349     addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);
    350     addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);
    351     addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);
    352     addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);
    353 
    354     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
    355     setOperationAction(ISD::MUL, MVT::v4i32, Custom);
    356     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
    357     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
    358 
    359     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
    360     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
    361 
    362     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
    363     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
    364     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
    365     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
    366   }
    367 
    368   setBooleanContents(ZeroOrOneBooleanContent);
    369 
    370   if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
    371     setStackPointerRegisterToSaveRestore(PPC::X1);
    372     setExceptionPointerRegister(PPC::X3);
    373     setExceptionSelectorRegister(PPC::X4);
    374   } else {
    375     setStackPointerRegisterToSaveRestore(PPC::R1);
    376     setExceptionPointerRegister(PPC::R3);
    377     setExceptionSelectorRegister(PPC::R4);
    378   }
    379 
    380   // We have target-specific dag combine patterns for the following nodes:
    381   setTargetDAGCombine(ISD::SINT_TO_FP);
    382   setTargetDAGCombine(ISD::STORE);
    383   setTargetDAGCombine(ISD::BR_CC);
    384   setTargetDAGCombine(ISD::BSWAP);
    385 
    386   // Darwin long double math library functions have $LDBL128 appended.
    387   if (TM.getSubtarget<PPCSubtarget>().isDarwin()) {
    388     setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
    389     setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
    390     setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
    391     setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
    392     setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
    393     setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
    394     setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
    395     setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
    396     setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
    397     setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
    398   }
    399 
    400   setMinFunctionAlignment(2);
    401   if (PPCSubTarget.isDarwin())
    402     setPrefFunctionAlignment(4);
    403 
    404   computeRegisterProperties();
    405 }
    406 
    407 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
    408 /// function arguments in the caller parameter area.
    409 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
    410   const TargetMachine &TM = getTargetMachine();
    411   // Darwin passes everything on 4 byte boundary.
    412   if (TM.getSubtarget<PPCSubtarget>().isDarwin())
    413     return 4;
    414   // FIXME SVR4 TBD
    415   return 4;
    416 }
    417 
    418 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
    419   switch (Opcode) {
    420   default: return 0;
    421   case PPCISD::FSEL:            return "PPCISD::FSEL";
    422   case PPCISD::FCFID:           return "PPCISD::FCFID";
    423   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
    424   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
    425   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
    426   case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
    427   case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
    428   case PPCISD::VPERM:           return "PPCISD::VPERM";
    429   case PPCISD::Hi:              return "PPCISD::Hi";
    430   case PPCISD::Lo:              return "PPCISD::Lo";
    431   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
    432   case PPCISD::TOC_RESTORE:     return "PPCISD::TOC_RESTORE";
    433   case PPCISD::LOAD:            return "PPCISD::LOAD";
    434   case PPCISD::LOAD_TOC:        return "PPCISD::LOAD_TOC";
    435   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
    436   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
    437   case PPCISD::SRL:             return "PPCISD::SRL";
    438   case PPCISD::SRA:             return "PPCISD::SRA";
    439   case PPCISD::SHL:             return "PPCISD::SHL";
    440   case PPCISD::EXTSW_32:        return "PPCISD::EXTSW_32";
    441   case PPCISD::STD_32:          return "PPCISD::STD_32";
    442   case PPCISD::CALL_SVR4:       return "PPCISD::CALL_SVR4";
    443   case PPCISD::CALL_Darwin:     return "PPCISD::CALL_Darwin";
    444   case PPCISD::NOP:             return "PPCISD::NOP";
    445   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
    446   case PPCISD::BCTRL_Darwin:    return "PPCISD::BCTRL_Darwin";
    447   case PPCISD::BCTRL_SVR4:      return "PPCISD::BCTRL_SVR4";
    448   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
    449   case PPCISD::MFCR:            return "PPCISD::MFCR";
    450   case PPCISD::VCMP:            return "PPCISD::VCMP";
    451   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
    452   case PPCISD::LBRX:            return "PPCISD::LBRX";
    453   case PPCISD::STBRX:           return "PPCISD::STBRX";
    454   case PPCISD::LARX:            return "PPCISD::LARX";
    455   case PPCISD::STCX:            return "PPCISD::STCX";
    456   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
    457   case PPCISD::MFFS:            return "PPCISD::MFFS";
    458   case PPCISD::MTFSB0:          return "PPCISD::MTFSB0";
    459   case PPCISD::MTFSB1:          return "PPCISD::MTFSB1";
    460   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
    461   case PPCISD::MTFSF:           return "PPCISD::MTFSF";
    462   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
    463   }
    464 }
    465 
    466 MVT::SimpleValueType PPCTargetLowering::getSetCCResultType(EVT VT) const {
    467   return MVT::i32;
    468 }
    469 
    470 //===----------------------------------------------------------------------===//
    471 // Node matching predicates, for use by the tblgen matching code.
    472 //===----------------------------------------------------------------------===//
    473 
    474 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
    475 static bool isFloatingPointZero(SDValue Op) {
    476   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
    477     return CFP->getValueAPF().isZero();
    478   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
    479     // Maybe this has already been legalized into the constant pool?
    480     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
    481       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
    482         return CFP->getValueAPF().isZero();
    483   }
    484   return false;
    485 }
    486 
    487 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
    488 /// true if Op is undef or if it matches the specified value.
    489 static bool isConstantOrUndef(int Op, int Val) {
    490   return Op < 0 || Op == Val;
    491 }
    492 
    493 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
    494 /// VPKUHUM instruction.
    495 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
    496   if (!isUnary) {
    497     for (unsigned i = 0; i != 16; ++i)
    498       if (!isConstantOrUndef(N->getMaskElt(i),  i*2+1))
    499         return false;
    500   } else {
    501     for (unsigned i = 0; i != 8; ++i)
    502       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+1) ||
    503           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+1))
    504         return false;
    505   }
    506   return true;
    507 }
    508 
    509 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
    510 /// VPKUWUM instruction.
    511 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
    512   if (!isUnary) {
    513     for (unsigned i = 0; i != 16; i += 2)
    514       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
    515           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
    516         return false;
    517   } else {
    518     for (unsigned i = 0; i != 8; i += 2)
    519       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
    520           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3) ||
    521           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+2) ||
    522           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+3))
    523         return false;
    524   }
    525   return true;
    526 }
    527 
    528 /// isVMerge - Common function, used to match vmrg* shuffles.
    529 ///
    530 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
    531                      unsigned LHSStart, unsigned RHSStart) {
    532   assert(N->getValueType(0) == MVT::v16i8 &&
    533          "PPC only supports shuffles by bytes!");
    534   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
    535          "Unsupported merge size!");
    536 
    537   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
    538     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
    539       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
    540                              LHSStart+j+i*UnitSize) ||
    541           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
    542                              RHSStart+j+i*UnitSize))
    543         return false;
    544     }
    545   return true;
    546 }
    547 
    548 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
    549 /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
    550 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
    551                              bool isUnary) {
    552   if (!isUnary)
    553     return isVMerge(N, UnitSize, 8, 24);
    554   return isVMerge(N, UnitSize, 8, 8);
    555 }
    556 
    557 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
    558 /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
    559 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
    560                              bool isUnary) {
    561   if (!isUnary)
    562     return isVMerge(N, UnitSize, 0, 16);
    563   return isVMerge(N, UnitSize, 0, 0);
    564 }
    565 
    566 
    567 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
    568 /// amount, otherwise return -1.
    569 int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
    570   assert(N->getValueType(0) == MVT::v16i8 &&
    571          "PPC only supports shuffles by bytes!");
    572 
    573   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
    574 
    575   // Find the first non-undef value in the shuffle mask.
    576   unsigned i;
    577   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
    578     /*search*/;
    579 
    580   if (i == 16) return -1;  // all undef.
    581 
    582   // Otherwise, check to see if the rest of the elements are consecutively
    583   // numbered from this value.
    584   unsigned ShiftAmt = SVOp->getMaskElt(i);
    585   if (ShiftAmt < i) return -1;
    586   ShiftAmt -= i;
    587 
    588   if (!isUnary) {
    589     // Check the rest of the elements to see if they are consecutive.
    590     for (++i; i != 16; ++i)
    591       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
    592         return -1;
    593   } else {
    594     // Check the rest of the elements to see if they are consecutive.
    595     for (++i; i != 16; ++i)
    596       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
    597         return -1;
    598   }
    599   return ShiftAmt;
    600 }
    601 
    602 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
    603 /// specifies a splat of a single element that is suitable for input to
    604 /// VSPLTB/VSPLTH/VSPLTW.
    605 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
    606   assert(N->getValueType(0) == MVT::v16i8 &&
    607          (EltSize == 1 || EltSize == 2 || EltSize == 4));
    608 
    609   // This is a splat operation if each element of the permute is the same, and
    610   // if the value doesn't reference the second vector.
    611   unsigned ElementBase = N->getMaskElt(0);
    612 
    613   // FIXME: Handle UNDEF elements too!
    614   if (ElementBase >= 16)
    615     return false;
    616 
    617   // Check that the indices are consecutive, in the case of a multi-byte element
    618   // splatted with a v16i8 mask.
    619   for (unsigned i = 1; i != EltSize; ++i)
    620     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
    621       return false;
    622 
    623   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
    624     if (N->getMaskElt(i) < 0) continue;
    625     for (unsigned j = 0; j != EltSize; ++j)
    626       if (N->getMaskElt(i+j) != N->getMaskElt(j))
    627         return false;
    628   }
    629   return true;
    630 }
    631 
    632 /// isAllNegativeZeroVector - Returns true if all elements of build_vector
    633 /// are -0.0.
    634 bool PPC::isAllNegativeZeroVector(SDNode *N) {
    635   BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
    636 
    637   APInt APVal, APUndef;
    638   unsigned BitSize;
    639   bool HasAnyUndefs;
    640 
    641   if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
    642     if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
    643       return CFP->getValueAPF().isNegZero();
    644 
    645   return false;
    646 }
    647 
    648 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
    649 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
    650 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
    651   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
    652   assert(isSplatShuffleMask(SVOp, EltSize));
    653   return SVOp->getMaskElt(0) / EltSize;
    654 }
    655 
    656 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
    657 /// by using a vspltis[bhw] instruction of the specified element size, return
    658 /// the constant being splatted.  The ByteSize field indicates the number of
    659 /// bytes of each element [124] -> [bhw].
    660 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
    661   SDValue OpVal(0, 0);
    662 
    663   // If ByteSize of the splat is bigger than the element size of the
    664   // build_vector, then we have a case where we are checking for a splat where
    665   // multiple elements of the buildvector are folded together into a single
    666   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
    667   unsigned EltSize = 16/N->getNumOperands();
    668   if (EltSize < ByteSize) {
    669     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
    670     SDValue UniquedVals[4];
    671     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
    672 
    673     // See if all of the elements in the buildvector agree across.
    674     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
    675       if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
    676       // If the element isn't a constant, bail fully out.
    677       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
    678 
    679 
    680       if (UniquedVals[i&(Multiple-1)].getNode() == 0)
    681         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
    682       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
    683         return SDValue();  // no match.
    684     }
    685 
    686     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
    687     // either constant or undef values that are identical for each chunk.  See
    688     // if these chunks can form into a larger vspltis*.
    689 
    690     // Check to see if all of the leading entries are either 0 or -1.  If
    691     // neither, then this won't fit into the immediate field.
    692     bool LeadingZero = true;
    693     bool LeadingOnes = true;
    694     for (unsigned i = 0; i != Multiple-1; ++i) {
    695       if (UniquedVals[i].getNode() == 0) continue;  // Must have been undefs.
    696 
    697       LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
    698       LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
    699     }
    700     // Finally, check the least significant entry.
    701     if (LeadingZero) {
    702       if (UniquedVals[Multiple-1].getNode() == 0)
    703         return DAG.getTargetConstant(0, MVT::i32);  // 0,0,0,undef
    704       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
    705       if (Val < 16)
    706         return DAG.getTargetConstant(Val, MVT::i32);  // 0,0,0,4 -> vspltisw(4)
    707     }
    708     if (LeadingOnes) {
    709       if (UniquedVals[Multiple-1].getNode() == 0)
    710         return DAG.getTargetConstant(~0U, MVT::i32);  // -1,-1,-1,undef
    711       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
    712       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
    713         return DAG.getTargetConstant(Val, MVT::i32);
    714     }
    715 
    716     return SDValue();
    717   }
    718 
    719   // Check to see if this buildvec has a single non-undef value in its elements.
    720   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
    721     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
    722     if (OpVal.getNode() == 0)
    723       OpVal = N->getOperand(i);
    724     else if (OpVal != N->getOperand(i))
    725       return SDValue();
    726   }
    727 
    728   if (OpVal.getNode() == 0) return SDValue();  // All UNDEF: use implicit def.
    729 
    730   unsigned ValSizeInBytes = EltSize;
    731   uint64_t Value = 0;
    732   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
    733     Value = CN->getZExtValue();
    734   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
    735     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
    736     Value = FloatToBits(CN->getValueAPF().convertToFloat());
    737   }
    738 
    739   // If the splat value is larger than the element value, then we can never do
    740   // this splat.  The only case that we could fit the replicated bits into our
    741   // immediate field for would be zero, and we prefer to use vxor for it.
    742   if (ValSizeInBytes < ByteSize) return SDValue();
    743 
    744   // If the element value is larger than the splat value, cut it in half and
    745   // check to see if the two halves are equal.  Continue doing this until we
    746   // get to ByteSize.  This allows us to handle 0x01010101 as 0x01.
    747   while (ValSizeInBytes > ByteSize) {
    748     ValSizeInBytes >>= 1;
    749 
    750     // If the top half equals the bottom half, we're still ok.
    751     if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
    752          (Value                        & ((1 << (8*ValSizeInBytes))-1)))
    753       return SDValue();
    754   }
    755 
    756   // Properly sign extend the value.
    757   int ShAmt = (4-ByteSize)*8;
    758   int MaskVal = ((int)Value << ShAmt) >> ShAmt;
    759 
    760   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
    761   if (MaskVal == 0) return SDValue();
    762 
    763   // Finally, if this value fits in a 5 bit sext field, return it
    764   if (((MaskVal << (32-5)) >> (32-5)) == MaskVal)
    765     return DAG.getTargetConstant(MaskVal, MVT::i32);
    766   return SDValue();
    767 }
    768 
    769 //===----------------------------------------------------------------------===//
    770 //  Addressing Mode Selection
    771 //===----------------------------------------------------------------------===//
    772 
    773 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
    774 /// or 64-bit immediate, and if the value can be accurately represented as a
    775 /// sign extension from a 16-bit value.  If so, this returns true and the
    776 /// immediate.
    777 static bool isIntS16Immediate(SDNode *N, short &Imm) {
    778   if (N->getOpcode() != ISD::Constant)
    779     return false;
    780 
    781   Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
    782   if (N->getValueType(0) == MVT::i32)
    783     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
    784   else
    785     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
    786 }
    787 static bool isIntS16Immediate(SDValue Op, short &Imm) {
    788   return isIntS16Immediate(Op.getNode(), Imm);
    789 }
    790 
    791 
    792 /// SelectAddressRegReg - Given the specified addressed, check to see if it
    793 /// can be represented as an indexed [r+r] operation.  Returns false if it
    794 /// can be more efficiently represented with [r+imm].
    795 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
    796                                             SDValue &Index,
    797                                             SelectionDAG &DAG) const {
    798   short imm = 0;
    799   if (N.getOpcode() == ISD::ADD) {
    800     if (isIntS16Immediate(N.getOperand(1), imm))
    801       return false;    // r+i
    802     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
    803       return false;    // r+i
    804 
    805     Base = N.getOperand(0);
    806     Index = N.getOperand(1);
    807     return true;
    808   } else if (N.getOpcode() == ISD::OR) {
    809     if (isIntS16Immediate(N.getOperand(1), imm))
    810       return false;    // r+i can fold it if we can.
    811 
    812     // If this is an or of disjoint bitfields, we can codegen this as an add
    813     // (for better address arithmetic) if the LHS and RHS of the OR are provably
    814     // disjoint.
    815     APInt LHSKnownZero, LHSKnownOne;
    816     APInt RHSKnownZero, RHSKnownOne;
    817     DAG.ComputeMaskedBits(N.getOperand(0),
    818                           APInt::getAllOnesValue(N.getOperand(0)
    819                             .getValueSizeInBits()),
    820                           LHSKnownZero, LHSKnownOne);
    821 
    822     if (LHSKnownZero.getBoolValue()) {
    823       DAG.ComputeMaskedBits(N.getOperand(1),
    824                             APInt::getAllOnesValue(N.getOperand(1)
    825                               .getValueSizeInBits()),
    826                             RHSKnownZero, RHSKnownOne);
    827       // If all of the bits are known zero on the LHS or RHS, the add won't
    828       // carry.
    829       if (~(LHSKnownZero | RHSKnownZero) == 0) {
    830         Base = N.getOperand(0);
    831         Index = N.getOperand(1);
    832         return true;
    833       }
    834     }
    835   }
    836 
    837   return false;
    838 }
    839 
    840 /// Returns true if the address N can be represented by a base register plus
    841 /// a signed 16-bit displacement [r+imm], and if it is not better
    842 /// represented as reg+reg.
    843 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
    844                                             SDValue &Base,
    845                                             SelectionDAG &DAG) const {
    846   // FIXME dl should come from parent load or store, not from address
    847   DebugLoc dl = N.getDebugLoc();
    848   // If this can be more profitably realized as r+r, fail.
    849   if (SelectAddressRegReg(N, Disp, Base, DAG))
    850     return false;
    851 
    852   if (N.getOpcode() == ISD::ADD) {
    853     short imm = 0;
    854     if (isIntS16Immediate(N.getOperand(1), imm)) {
    855       Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
    856       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
    857         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
    858       } else {
    859         Base = N.getOperand(0);
    860       }
    861       return true; // [r+i]
    862     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
    863       // Match LOAD (ADD (X, Lo(G))).
    864      assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
    865              && "Cannot handle constant offsets yet!");
    866       Disp = N.getOperand(1).getOperand(0);  // The global address.
    867       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
    868              Disp.getOpcode() == ISD::TargetConstantPool ||
    869              Disp.getOpcode() == ISD::TargetJumpTable);
    870       Base = N.getOperand(0);
    871       return true;  // [&g+r]
    872     }
    873   } else if (N.getOpcode() == ISD::OR) {
    874     short imm = 0;
    875     if (isIntS16Immediate(N.getOperand(1), imm)) {
    876       // If this is an or of disjoint bitfields, we can codegen this as an add
    877       // (for better address arithmetic) if the LHS and RHS of the OR are
    878       // provably disjoint.
    879       APInt LHSKnownZero, LHSKnownOne;
    880       DAG.ComputeMaskedBits(N.getOperand(0),
    881                             APInt::getAllOnesValue(N.getOperand(0)
    882                                                    .getValueSizeInBits()),
    883                             LHSKnownZero, LHSKnownOne);
    884 
    885       if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
    886         // If all of the bits are known zero on the LHS or RHS, the add won't
    887         // carry.
    888         Base = N.getOperand(0);
    889         Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
    890         return true;
    891       }
    892     }
    893   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
    894     // Loading from a constant address.
    895 
    896     // If this address fits entirely in a 16-bit sext immediate field, codegen
    897     // this as "d, 0"
    898     short Imm;
    899     if (isIntS16Immediate(CN, Imm)) {
    900       Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
    901       Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
    902                              CN->getValueType(0));
    903       return true;
    904     }
    905 
    906     // Handle 32-bit sext immediates with LIS + addr mode.
    907     if (CN->getValueType(0) == MVT::i32 ||
    908         (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
    909       int Addr = (int)CN->getZExtValue();
    910 
    911       // Otherwise, break this down into an LIS + disp.
    912       Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
    913 
    914       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
    915       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
    916       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
    917       return true;
    918     }
    919   }
    920 
    921   Disp = DAG.getTargetConstant(0, getPointerTy());
    922   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
    923     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
    924   else
    925     Base = N;
    926   return true;      // [r+0]
    927 }
    928 
    929 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
    930 /// represented as an indexed [r+r] operation.
    931 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
    932                                                 SDValue &Index,
    933                                                 SelectionDAG &DAG) const {
    934   // Check to see if we can easily represent this as an [r+r] address.  This
    935   // will fail if it thinks that the address is more profitably represented as
    936   // reg+imm, e.g. where imm = 0.
    937   if (SelectAddressRegReg(N, Base, Index, DAG))
    938     return true;
    939 
    940   // If the operand is an addition, always emit this as [r+r], since this is
    941   // better (for code size, and execution, as the memop does the add for free)
    942   // than emitting an explicit add.
    943   if (N.getOpcode() == ISD::ADD) {
    944     Base = N.getOperand(0);
    945     Index = N.getOperand(1);
    946     return true;
    947   }
    948 
    949   // Otherwise, do it the hard way, using R0 as the base register.
    950   Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
    951                          N.getValueType());
    952   Index = N;
    953   return true;
    954 }
    955 
    956 /// SelectAddressRegImmShift - Returns true if the address N can be
    957 /// represented by a base register plus a signed 14-bit displacement
    958 /// [r+imm*4].  Suitable for use by STD and friends.
    959 bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
    960                                                  SDValue &Base,
    961                                                  SelectionDAG &DAG) const {
    962   // FIXME dl should come from the parent load or store, not the address
    963   DebugLoc dl = N.getDebugLoc();
    964   // If this can be more profitably realized as r+r, fail.
    965   if (SelectAddressRegReg(N, Disp, Base, DAG))
    966     return false;
    967 
    968   if (N.getOpcode() == ISD::ADD) {
    969     short imm = 0;
    970     if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
    971       Disp =  DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
    972       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
    973         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
    974       } else {
    975         Base = N.getOperand(0);
    976       }
    977       return true; // [r+i]
    978     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
    979       // Match LOAD (ADD (X, Lo(G))).
    980      assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
    981              && "Cannot handle constant offsets yet!");
    982       Disp = N.getOperand(1).getOperand(0);  // The global address.
    983       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
    984              Disp.getOpcode() == ISD::TargetConstantPool ||
    985              Disp.getOpcode() == ISD::TargetJumpTable);
    986       Base = N.getOperand(0);
    987       return true;  // [&g+r]
    988     }
    989   } else if (N.getOpcode() == ISD::OR) {
    990     short imm = 0;
    991     if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
    992       // If this is an or of disjoint bitfields, we can codegen this as an add
    993       // (for better address arithmetic) if the LHS and RHS of the OR are
    994       // provably disjoint.
    995       APInt LHSKnownZero, LHSKnownOne;
    996       DAG.ComputeMaskedBits(N.getOperand(0),
    997                             APInt::getAllOnesValue(N.getOperand(0)
    998                                                    .getValueSizeInBits()),
    999                             LHSKnownZero, LHSKnownOne);
   1000       if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
   1001         // If all of the bits are known zero on the LHS or RHS, the add won't
   1002         // carry.
   1003         Base = N.getOperand(0);
   1004         Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
   1005         return true;
   1006       }
   1007     }
   1008   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
   1009     // Loading from a constant address.  Verify low two bits are clear.
   1010     if ((CN->getZExtValue() & 3) == 0) {
   1011       // If this address fits entirely in a 14-bit sext immediate field, codegen
   1012       // this as "d, 0"
   1013       short Imm;
   1014       if (isIntS16Immediate(CN, Imm)) {
   1015         Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
   1016         Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
   1017                                CN->getValueType(0));
   1018         return true;
   1019       }
   1020 
   1021       // Fold the low-part of 32-bit absolute addresses into addr mode.
   1022       if (CN->getValueType(0) == MVT::i32 ||
   1023           (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
   1024         int Addr = (int)CN->getZExtValue();
   1025 
   1026         // Otherwise, break this down into an LIS + disp.
   1027         Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32);
   1028         Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32);
   1029         unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
   1030         Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base),0);
   1031         return true;
   1032       }
   1033     }
   1034   }
   1035 
   1036   Disp = DAG.getTargetConstant(0, getPointerTy());
   1037   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
   1038     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
   1039   else
   1040     Base = N;
   1041   return true;      // [r+0]
   1042 }
   1043 
   1044 
   1045 /// getPreIndexedAddressParts - returns true by value, base pointer and
   1046 /// offset pointer and addressing mode by reference if the node's address
   1047 /// can be legally represented as pre-indexed load / store address.
   1048 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
   1049                                                   SDValue &Offset,
   1050                                                   ISD::MemIndexedMode &AM,
   1051                                                   SelectionDAG &DAG) const {
   1052   // Disabled by default for now.
   1053   if (!EnablePPCPreinc) return false;
   1054 
   1055   SDValue Ptr;
   1056   EVT VT;
   1057   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
   1058     Ptr = LD->getBasePtr();
   1059     VT = LD->getMemoryVT();
   1060 
   1061   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
   1062     Ptr = ST->getBasePtr();
   1063     VT  = ST->getMemoryVT();
   1064   } else
   1065     return false;
   1066 
   1067   // PowerPC doesn't have preinc load/store instructions for vectors.
   1068   if (VT.isVector())
   1069     return false;
   1070 
   1071   // TODO: Check reg+reg first.
   1072 
   1073   // LDU/STU use reg+imm*4, others use reg+imm.
   1074   if (VT != MVT::i64) {
   1075     // reg + imm
   1076     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
   1077       return false;
   1078   } else {
   1079     // reg + imm * 4.
   1080     if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
   1081       return false;
   1082   }
   1083 
   1084   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
   1085     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
   1086     // sext i32 to i64 when addr mode is r+i.
   1087     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
   1088         LD->getExtensionType() == ISD::SEXTLOAD &&
   1089         isa<ConstantSDNode>(Offset))
   1090       return false;
   1091   }
   1092 
   1093   AM = ISD::PRE_INC;
   1094   return true;
   1095 }
   1096 
   1097 //===----------------------------------------------------------------------===//
   1098 //  LowerOperation implementation
   1099 //===----------------------------------------------------------------------===//
   1100 
   1101 /// GetLabelAccessInfo - Return true if we should reference labels using a
   1102 /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
   1103 static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
   1104                                unsigned &LoOpFlags, const GlobalValue *GV = 0) {
   1105   HiOpFlags = PPCII::MO_HA16;
   1106   LoOpFlags = PPCII::MO_LO16;
   1107 
   1108   // Don't use the pic base if not in PIC relocation model.  Or if we are on a
   1109   // non-darwin platform.  We don't support PIC on other platforms yet.
   1110   bool isPIC = TM.getRelocationModel() == Reloc::PIC_ &&
   1111                TM.getSubtarget<PPCSubtarget>().isDarwin();
   1112   if (isPIC) {
   1113     HiOpFlags |= PPCII::MO_PIC_FLAG;
   1114     LoOpFlags |= PPCII::MO_PIC_FLAG;
   1115   }
   1116 
   1117   // If this is a reference to a global value that requires a non-lazy-ptr, make
   1118   // sure that instruction lowering adds it.
   1119   if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) {
   1120     HiOpFlags |= PPCII::MO_NLP_FLAG;
   1121     LoOpFlags |= PPCII::MO_NLP_FLAG;
   1122 
   1123     if (GV->hasHiddenVisibility()) {
   1124       HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
   1125       LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
   1126     }
   1127   }
   1128 
   1129   return isPIC;
   1130 }
   1131 
   1132 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
   1133                              SelectionDAG &DAG) {
   1134   EVT PtrVT = HiPart.getValueType();
   1135   SDValue Zero = DAG.getConstant(0, PtrVT);
   1136   DebugLoc DL = HiPart.getDebugLoc();
   1137 
   1138   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
   1139   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
   1140 
   1141   // With PIC, the first instruction is actually "GR+hi(&G)".
   1142   if (isPIC)
   1143     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
   1144                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
   1145 
   1146   // Generate non-pic code that has direct accesses to the constant pool.
   1147   // The address of the global is just (hi(&g)+lo(&g)).
   1148   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
   1149 }
   1150 
   1151 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
   1152                                              SelectionDAG &DAG) const {
   1153   EVT PtrVT = Op.getValueType();
   1154   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
   1155   const Constant *C = CP->getConstVal();
   1156 
   1157   unsigned MOHiFlag, MOLoFlag;
   1158   bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
   1159   SDValue CPIHi =
   1160     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
   1161   SDValue CPILo =
   1162     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
   1163   return LowerLabelRef(CPIHi, CPILo, isPIC, DAG);
   1164 }
   1165 
   1166 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
   1167   EVT PtrVT = Op.getValueType();
   1168   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
   1169 
   1170   unsigned MOHiFlag, MOLoFlag;
   1171   bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
   1172   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
   1173   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
   1174   return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
   1175 }
   1176 
   1177 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
   1178                                              SelectionDAG &DAG) const {
   1179   EVT PtrVT = Op.getValueType();
   1180 
   1181   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
   1182 
   1183   unsigned MOHiFlag, MOLoFlag;
   1184   bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
   1185   SDValue TgtBAHi = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOHiFlag);
   1186   SDValue TgtBALo = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOLoFlag);
   1187   return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
   1188 }
   1189 
   1190 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
   1191                                               SelectionDAG &DAG) const {
   1192   EVT PtrVT = Op.getValueType();
   1193   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
   1194   DebugLoc DL = GSDN->getDebugLoc();
   1195   const GlobalValue *GV = GSDN->getGlobal();
   1196 
   1197   // 64-bit SVR4 ABI code is always position-independent.
   1198   // The actual address of the GlobalValue is stored in the TOC.
   1199   if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
   1200     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
   1201     return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
   1202                        DAG.getRegister(PPC::X2, MVT::i64));
   1203   }
   1204 
   1205   unsigned MOHiFlag, MOLoFlag;
   1206   bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV);
   1207 
   1208   SDValue GAHi =
   1209     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
   1210   SDValue GALo =
   1211     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
   1212 
   1213   SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG);
   1214 
   1215   // If the global reference is actually to a non-lazy-pointer, we have to do an
   1216   // extra load to get the address of the global.
   1217   if (MOHiFlag & PPCII::MO_NLP_FLAG)
   1218     Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
   1219                       false, false, 0);
   1220   return Ptr;
   1221 }
   1222 
   1223 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   1224   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
   1225   DebugLoc dl = Op.getDebugLoc();
   1226 
   1227   // If we're comparing for equality to zero, expose the fact that this is
   1228   // implented as a ctlz/srl pair on ppc, so that the dag combiner can
   1229   // fold the new nodes.
   1230   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
   1231     if (C->isNullValue() && CC == ISD::SETEQ) {
   1232       EVT VT = Op.getOperand(0).getValueType();
   1233       SDValue Zext = Op.getOperand(0);
   1234       if (VT.bitsLT(MVT::i32)) {
   1235         VT = MVT::i32;
   1236         Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
   1237       }
   1238       unsigned Log2b = Log2_32(VT.getSizeInBits());
   1239       SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
   1240       SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
   1241                                 DAG.getConstant(Log2b, MVT::i32));
   1242       return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
   1243     }
   1244     // Leave comparisons against 0 and -1 alone for now, since they're usually
   1245     // optimized.  FIXME: revisit this when we can custom lower all setcc
   1246     // optimizations.
   1247     if (C->isAllOnesValue() || C->isNullValue())
   1248       return SDValue();
   1249   }
   1250 
   1251   // If we have an integer seteq/setne, turn it into a compare against zero
   1252   // by xor'ing the rhs with the lhs, which is faster than setting a
   1253   // condition register, reading it back out, and masking the correct bit.  The
   1254   // normal approach here uses sub to do this instead of xor.  Using xor exposes
   1255   // the result to other bit-twiddling opportunities.
   1256   EVT LHSVT = Op.getOperand(0).getValueType();
   1257   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
   1258     EVT VT = Op.getValueType();
   1259     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
   1260                                 Op.getOperand(1));
   1261     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC);
   1262   }
   1263   return SDValue();
   1264 }
   1265 
   1266 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
   1267                                       const PPCSubtarget &Subtarget) const {
   1268   SDNode *Node = Op.getNode();
   1269   EVT VT = Node->getValueType(0);
   1270   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   1271   SDValue InChain = Node->getOperand(0);
   1272   SDValue VAListPtr = Node->getOperand(1);
   1273   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
   1274   DebugLoc dl = Node->getDebugLoc();
   1275 
   1276   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
   1277 
   1278   // gpr_index
   1279   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
   1280                                     VAListPtr, MachinePointerInfo(SV), MVT::i8,
   1281                                     false, false, 0);
   1282   InChain = GprIndex.getValue(1);
   1283 
   1284   if (VT == MVT::i64) {
   1285     // Check if GprIndex is even
   1286     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
   1287                                  DAG.getConstant(1, MVT::i32));
   1288     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
   1289                                 DAG.getConstant(0, MVT::i32), ISD::SETNE);
   1290     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
   1291                                           DAG.getConstant(1, MVT::i32));
   1292     // Align GprIndex to be even if it isn't
   1293     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
   1294                            GprIndex);
   1295   }
   1296 
   1297   // fpr index is 1 byte after gpr
   1298   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
   1299                                DAG.getConstant(1, MVT::i32));
   1300 
   1301   // fpr
   1302   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
   1303                                     FprPtr, MachinePointerInfo(SV), MVT::i8,
   1304                                     false, false, 0);
   1305   InChain = FprIndex.getValue(1);
   1306 
   1307   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
   1308                                        DAG.getConstant(8, MVT::i32));
   1309 
   1310   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
   1311                                         DAG.getConstant(4, MVT::i32));
   1312 
   1313   // areas
   1314   SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
   1315                                      MachinePointerInfo(), false, false, 0);
   1316   InChain = OverflowArea.getValue(1);
   1317 
   1318   SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
   1319                                     MachinePointerInfo(), false, false, 0);
   1320   InChain = RegSaveArea.getValue(1);
   1321 
   1322   // select overflow_area if index > 8
   1323   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
   1324                             DAG.getConstant(8, MVT::i32), ISD::SETLT);
   1325 
   1326   // adjustment constant gpr_index * 4/8
   1327   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
   1328                                     VT.isInteger() ? GprIndex : FprIndex,
   1329                                     DAG.getConstant(VT.isInteger() ? 4 : 8,
   1330                                                     MVT::i32));
   1331 
   1332   // OurReg = RegSaveArea + RegConstant
   1333   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
   1334                                RegConstant);
   1335 
   1336   // Floating types are 32 bytes into RegSaveArea
   1337   if (VT.isFloatingPoint())
   1338     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
   1339                          DAG.getConstant(32, MVT::i32));
   1340 
   1341   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
   1342   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
   1343                                    VT.isInteger() ? GprIndex : FprIndex,
   1344                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1,
   1345                                                    MVT::i32));
   1346 
   1347   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
   1348                               VT.isInteger() ? VAListPtr : FprPtr,
   1349                               MachinePointerInfo(SV),
   1350                               MVT::i8, false, false, 0);
   1351 
   1352   // determine if we should load from reg_save_area or overflow_area
   1353   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
   1354 
   1355   // increase overflow_area by 4/8 if gpr/fpr > 8
   1356   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
   1357                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
   1358                                           MVT::i32));
   1359 
   1360   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
   1361                              OverflowAreaPlusN);
   1362 
   1363   InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
   1364                               OverflowAreaPtr,
   1365                               MachinePointerInfo(),
   1366                               MVT::i32, false, false, 0);
   1367 
   1368   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, 0);
   1369 }
   1370 
   1371 SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op,
   1372                                            SelectionDAG &DAG) const {
   1373   SDValue Chain = Op.getOperand(0);
   1374   SDValue Trmp = Op.getOperand(1); // trampoline
   1375   SDValue FPtr = Op.getOperand(2); // nested function
   1376   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
   1377   DebugLoc dl = Op.getDebugLoc();
   1378 
   1379   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   1380   bool isPPC64 = (PtrVT == MVT::i64);
   1381   Type *IntPtrTy =
   1382     DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType(
   1383                                                              *DAG.getContext());
   1384 
   1385   TargetLowering::ArgListTy Args;
   1386   TargetLowering::ArgListEntry Entry;
   1387 
   1388   Entry.Ty = IntPtrTy;
   1389   Entry.Node = Trmp; Args.push_back(Entry);
   1390 
   1391   // TrampSize == (isPPC64 ? 48 : 40);
   1392   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40,
   1393                                isPPC64 ? MVT::i64 : MVT::i32);
   1394   Args.push_back(Entry);
   1395 
   1396   Entry.Node = FPtr; Args.push_back(Entry);
   1397   Entry.Node = Nest; Args.push_back(Entry);
   1398 
   1399   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
   1400   std::pair<SDValue, SDValue> CallResult =
   1401     LowerCallTo(Chain, Op.getValueType().getTypeForEVT(*DAG.getContext()),
   1402                 false, false, false, false, 0, CallingConv::C, false,
   1403                 /*isReturnValueUsed=*/true,
   1404                 DAG.getExternalSymbol("__trampoline_setup", PtrVT),
   1405                 Args, DAG, dl);
   1406 
   1407   SDValue Ops[] =
   1408     { CallResult.first, CallResult.second };
   1409 
   1410   return DAG.getMergeValues(Ops, 2, dl);
   1411 }
   1412 
   1413 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
   1414                                         const PPCSubtarget &Subtarget) const {
   1415   MachineFunction &MF = DAG.getMachineFunction();
   1416   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
   1417 
   1418   DebugLoc dl = Op.getDebugLoc();
   1419 
   1420   if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
   1421     // vastart just stores the address of the VarArgsFrameIndex slot into the
   1422     // memory location argument.
   1423     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   1424     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
   1425     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
   1426     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
   1427                         MachinePointerInfo(SV),
   1428                         false, false, 0);
   1429   }
   1430 
   1431   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
   1432   // We suppose the given va_list is already allocated.
   1433   //
   1434   // typedef struct {
   1435   //  char gpr;     /* index into the array of 8 GPRs
   1436   //                 * stored in the register save area
   1437   //                 * gpr=0 corresponds to r3,
   1438   //                 * gpr=1 to r4, etc.
   1439   //                 */
   1440   //  char fpr;     /* index into the array of 8 FPRs
   1441   //                 * stored in the register save area
   1442   //                 * fpr=0 corresponds to f1,
   1443   //                 * fpr=1 to f2, etc.
   1444   //                 */
   1445   //  char *overflow_arg_area;
   1446   //                /* location on stack that holds
   1447   //                 * the next overflow argument
   1448   //                 */
   1449   //  char *reg_save_area;
   1450   //               /* where r3:r10 and f1:f8 (if saved)
   1451   //                * are stored
   1452   //                */
   1453   // } va_list[1];
   1454 
   1455 
   1456   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32);
   1457   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32);
   1458 
   1459 
   1460   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   1461 
   1462   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
   1463                                             PtrVT);
   1464   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
   1465                                  PtrVT);
   1466 
   1467   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
   1468   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
   1469 
   1470   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
   1471   SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);
   1472 
   1473   uint64_t FPROffset = 1;
   1474   SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT);
   1475 
   1476   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
   1477 
   1478   // Store first byte : number of int regs
   1479   SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
   1480                                          Op.getOperand(1),
   1481                                          MachinePointerInfo(SV),
   1482                                          MVT::i8, false, false, 0);
   1483   uint64_t nextOffset = FPROffset;
   1484   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
   1485                                   ConstFPROffset);
   1486 
   1487   // Store second byte : number of float regs
   1488   SDValue secondStore =
   1489     DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
   1490                       MachinePointerInfo(SV, nextOffset), MVT::i8,
   1491                       false, false, 0);
   1492   nextOffset += StackOffset;
   1493   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
   1494 
   1495   // Store second word : arguments given on stack
   1496   SDValue thirdStore =
   1497     DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
   1498                  MachinePointerInfo(SV, nextOffset),
   1499                  false, false, 0);
   1500   nextOffset += FrameOffset;
   1501   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
   1502 
   1503   // Store third word : arguments given in registers
   1504   return DAG.getStore(thirdStore, dl, FR, nextPtr,
   1505                       MachinePointerInfo(SV, nextOffset),
   1506                       false, false, 0);
   1507 
   1508 }
   1509 
   1510 #include "PPCGenCallingConv.inc"
   1511 
   1512 static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
   1513                                      CCValAssign::LocInfo &LocInfo,
   1514                                      ISD::ArgFlagsTy &ArgFlags,
   1515                                      CCState &State) {
   1516   return true;
   1517 }
   1518 
   1519 static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
   1520                                             MVT &LocVT,
   1521                                             CCValAssign::LocInfo &LocInfo,
   1522                                             ISD::ArgFlagsTy &ArgFlags,
   1523                                             CCState &State) {
   1524   static const unsigned ArgRegs[] = {
   1525     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
   1526     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
   1527   };
   1528   const unsigned NumArgRegs = array_lengthof(ArgRegs);
   1529 
   1530   unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
   1531 
   1532   // Skip one register if the first unallocated register has an even register
   1533   // number and there are still argument registers available which have not been
   1534   // allocated yet. RegNum is actually an index into ArgRegs, which means we
   1535   // need to skip a register if RegNum is odd.
   1536   if (RegNum != NumArgRegs && RegNum % 2 == 1) {
   1537     State.AllocateReg(ArgRegs[RegNum]);
   1538   }
   1539 
   1540   // Always return false here, as this function only makes sure that the first
   1541   // unallocated register has an odd register number and does not actually
   1542   // allocate a register for the current argument.
   1543   return false;
   1544 }
   1545 
   1546 static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
   1547                                               MVT &LocVT,
   1548                                               CCValAssign::LocInfo &LocInfo,
   1549                                               ISD::ArgFlagsTy &ArgFlags,
   1550                                               CCState &State) {
   1551   static const unsigned ArgRegs[] = {
   1552     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
   1553     PPC::F8
   1554   };
   1555 
   1556   const unsigned NumArgRegs = array_lengthof(ArgRegs);
   1557 
   1558   unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
   1559 
   1560   // If there is only one Floating-point register left we need to put both f64
   1561   // values of a split ppc_fp128 value on the stack.
   1562   if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
   1563     State.AllocateReg(ArgRegs[RegNum]);
   1564   }
   1565 
   1566   // Always return false here, as this function only makes sure that the two f64
   1567   // values a ppc_fp128 value is split into are both passed in registers or both
   1568   // passed on the stack and does not actually allocate a register for the
   1569   // current argument.
   1570   return false;
   1571 }
   1572 
   1573 /// GetFPR - Get the set of FP registers that should be allocated for arguments,
   1574 /// on Darwin.
   1575 static const unsigned *GetFPR() {
   1576   static const unsigned FPR[] = {
   1577     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
   1578     PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
   1579   };
   1580 
   1581   return FPR;
   1582 }
   1583 
   1584 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
   1585 /// the stack.
   1586 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
   1587                                        unsigned PtrByteSize) {
   1588   unsigned ArgSize = ArgVT.getSizeInBits()/8;
   1589   if (Flags.isByVal())
   1590     ArgSize = Flags.getByValSize();
   1591   ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
   1592 
   1593   return ArgSize;
   1594 }
   1595 
   1596 SDValue
   1597 PPCTargetLowering::LowerFormalArguments(SDValue Chain,
   1598                                         CallingConv::ID CallConv, bool isVarArg,
   1599                                         const SmallVectorImpl<ISD::InputArg>
   1600                                           &Ins,
   1601                                         DebugLoc dl, SelectionDAG &DAG,
   1602                                         SmallVectorImpl<SDValue> &InVals)
   1603                                           const {
   1604   if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) {
   1605     return LowerFormalArguments_SVR4(Chain, CallConv, isVarArg, Ins,
   1606                                      dl, DAG, InVals);
   1607   } else {
   1608     return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
   1609                                        dl, DAG, InVals);
   1610   }
   1611 }
   1612 
   1613 SDValue
   1614 PPCTargetLowering::LowerFormalArguments_SVR4(
   1615                                       SDValue Chain,
   1616                                       CallingConv::ID CallConv, bool isVarArg,
   1617                                       const SmallVectorImpl<ISD::InputArg>
   1618                                         &Ins,
   1619                                       DebugLoc dl, SelectionDAG &DAG,
   1620                                       SmallVectorImpl<SDValue> &InVals) const {
   1621 
   1622   // 32-bit SVR4 ABI Stack Frame Layout:
   1623   //              +-----------------------------------+
   1624   //        +-->  |            Back chain             |
   1625   //        |     +-----------------------------------+
   1626   //        |     | Floating-point register save area |
   1627   //        |     +-----------------------------------+
   1628   //        |     |    General register save area     |
   1629   //        |     +-----------------------------------+
   1630   //        |     |          CR save word             |
   1631   //        |     +-----------------------------------+
   1632   //        |     |         VRSAVE save word          |
   1633   //        |     +-----------------------------------+
   1634   //        |     |         Alignment padding         |
   1635   //        |     +-----------------------------------+
   1636   //        |     |     Vector register save area     |
   1637   //        |     +-----------------------------------+
   1638   //        |     |       Local variable space        |
   1639   //        |     +-----------------------------------+
   1640   //        |     |        Parameter list area        |
   1641   //        |     +-----------------------------------+
   1642   //        |     |           LR save word            |
   1643   //        |     +-----------------------------------+
   1644   // SP-->  +---  |            Back chain             |
   1645   //              +-----------------------------------+
   1646   //
   1647   // Specifications:
   1648   //   System V Application Binary Interface PowerPC Processor Supplement
   1649   //   AltiVec Technology Programming Interface Manual
   1650 
   1651   MachineFunction &MF = DAG.getMachineFunction();
   1652   MachineFrameInfo *MFI = MF.getFrameInfo();
   1653   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
   1654 
   1655   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   1656   // Potential tail calls could cause overwriting of argument stack slots.
   1657   bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
   1658   unsigned PtrByteSize = 4;
   1659 
   1660   // Assign locations to all of the incoming arguments.
   1661   SmallVector<CCValAssign, 16> ArgLocs;
   1662   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
   1663 		 getTargetMachine(), ArgLocs, *DAG.getContext());
   1664 
   1665   // Reserve space for the linkage area on the stack.
   1666   CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
   1667 
   1668   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4);
   1669 
   1670   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
   1671     CCValAssign &VA = ArgLocs[i];
   1672 
   1673     // Arguments stored in registers.
   1674     if (VA.isRegLoc()) {
   1675       TargetRegisterClass *RC;
   1676       EVT ValVT = VA.getValVT();
   1677 
   1678       switch (ValVT.getSimpleVT().SimpleTy) {
   1679         default:
   1680           llvm_unreachable("ValVT not supported by formal arguments Lowering");
   1681         case MVT::i32:
   1682           RC = PPC::GPRCRegisterClass;
   1683           break;
   1684         case MVT::f32:
   1685           RC = PPC::F4RCRegisterClass;
   1686           break;
   1687         case MVT::f64:
   1688           RC = PPC::F8RCRegisterClass;
   1689           break;
   1690         case MVT::v16i8:
   1691         case MVT::v8i16:
   1692         case MVT::v4i32:
   1693         case MVT::v4f32:
   1694           RC = PPC::VRRCRegisterClass;
   1695           break;
   1696       }
   1697 
   1698       // Transform the arguments stored in physical registers into virtual ones.
   1699       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
   1700       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT);
   1701 
   1702       InVals.push_back(ArgValue);
   1703     } else {
   1704       // Argument stored in memory.
   1705       assert(VA.isMemLoc());
   1706 
   1707       unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
   1708       int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
   1709                                       isImmutable);
   1710 
   1711       // Create load nodes to retrieve arguments from the stack.
   1712       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
   1713       InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
   1714                                    MachinePointerInfo(),
   1715                                    false, false, 0));
   1716     }
   1717   }
   1718 
   1719   // Assign locations to all of the incoming aggregate by value arguments.
   1720   // Aggregates passed by value are stored in the local variable space of the
   1721   // caller's stack frame, right above the parameter list area.
   1722   SmallVector<CCValAssign, 16> ByValArgLocs;
   1723   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
   1724 		      getTargetMachine(), ByValArgLocs, *DAG.getContext());
   1725 
   1726   // Reserve stack space for the allocations in CCInfo.
   1727   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
   1728 
   1729   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4_ByVal);
   1730 
   1731   // Area that is at least reserved in the caller of this function.
   1732   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
   1733 
   1734   // Set the size that is at least reserved in caller of this function.  Tail
   1735   // call optimized function's reserved stack space needs to be aligned so that
   1736   // taking the difference between two stack areas will result in an aligned
   1737   // stack.
   1738   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
   1739 
   1740   MinReservedArea =
   1741     std::max(MinReservedArea,
   1742              PPCFrameLowering::getMinCallFrameSize(false, false));
   1743 
   1744   unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
   1745     getStackAlignment();
   1746   unsigned AlignMask = TargetAlign-1;
   1747   MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
   1748 
   1749   FI->setMinReservedArea(MinReservedArea);
   1750 
   1751   SmallVector<SDValue, 8> MemOps;
   1752 
   1753   // If the function takes variable number of arguments, make a frame index for
   1754   // the start of the first vararg value... for expansion of llvm.va_start.
   1755   if (isVarArg) {
   1756     static const unsigned GPArgRegs[] = {
   1757       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
   1758       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
   1759     };
   1760     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
   1761 
   1762     static const unsigned FPArgRegs[] = {
   1763       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
   1764       PPC::F8
   1765     };
   1766     const unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
   1767 
   1768     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs,
   1769                                                           NumGPArgRegs));
   1770     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs,
   1771                                                           NumFPArgRegs));
   1772 
   1773     // Make room for NumGPArgRegs and NumFPArgRegs.
   1774     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
   1775                 NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8;
   1776 
   1777     FuncInfo->setVarArgsStackOffset(
   1778       MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
   1779                              CCInfo.getNextStackOffset(), true));
   1780 
   1781     FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
   1782     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
   1783 
   1784     // The fixed integer arguments of a variadic function are stored to the
   1785     // VarArgsFrameIndex on the stack so that they may be loaded by deferencing
   1786     // the result of va_next.
   1787     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
   1788       // Get an existing live-in vreg, or add a new one.
   1789       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
   1790       if (!VReg)
   1791         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
   1792 
   1793       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
   1794       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
   1795                                    MachinePointerInfo(), false, false, 0);
   1796       MemOps.push_back(Store);
   1797       // Increment the address by four for the next argument to store
   1798       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
   1799       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
   1800     }
   1801 
   1802     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
   1803     // is set.
   1804     // The double arguments are stored to the VarArgsFrameIndex
   1805     // on the stack.
   1806     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
   1807       // Get an existing live-in vreg, or add a new one.
   1808       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
   1809       if (!VReg)
   1810         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
   1811 
   1812       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
   1813       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
   1814                                    MachinePointerInfo(), false, false, 0);
   1815       MemOps.push_back(Store);
   1816       // Increment the address by eight for the next argument to store
   1817       SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
   1818                                          PtrVT);
   1819       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
   1820     }
   1821   }
   1822 
   1823   if (!MemOps.empty())
   1824     Chain = DAG.getNode(ISD::TokenFactor, dl,
   1825                         MVT::Other, &MemOps[0], MemOps.size());
   1826 
   1827   return Chain;
   1828 }
   1829 
   1830 SDValue
   1831 PPCTargetLowering::LowerFormalArguments_Darwin(
   1832                                       SDValue Chain,
   1833                                       CallingConv::ID CallConv, bool isVarArg,
   1834                                       const SmallVectorImpl<ISD::InputArg>
   1835                                         &Ins,
   1836                                       DebugLoc dl, SelectionDAG &DAG,
   1837                                       SmallVectorImpl<SDValue> &InVals) const {
   1838   // TODO: add description of PPC stack frame format, or at least some docs.
   1839   //
   1840   MachineFunction &MF = DAG.getMachineFunction();
   1841   MachineFrameInfo *MFI = MF.getFrameInfo();
   1842   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
   1843 
   1844   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   1845   bool isPPC64 = PtrVT == MVT::i64;
   1846   // Potential tail calls could cause overwriting of argument stack slots.
   1847   bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
   1848   unsigned PtrByteSize = isPPC64 ? 8 : 4;
   1849 
   1850   unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
   1851   // Area that is at least reserved in caller of this function.
   1852   unsigned MinReservedArea = ArgOffset;
   1853 
   1854   static const unsigned GPR_32[] = {           // 32-bit registers.
   1855     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
   1856     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
   1857   };
   1858   static const unsigned GPR_64[] = {           // 64-bit registers.
   1859     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
   1860     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
   1861   };
   1862 
   1863   static const unsigned *FPR = GetFPR();
   1864 
   1865   static const unsigned VR[] = {
   1866     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
   1867     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
   1868   };
   1869 
   1870   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
   1871   const unsigned Num_FPR_Regs = 13;
   1872   const unsigned Num_VR_Regs  = array_lengthof( VR);
   1873 
   1874   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
   1875 
   1876   const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
   1877 
   1878   // In 32-bit non-varargs functions, the stack space for vectors is after the
   1879   // stack space for non-vectors.  We do not use this space unless we have
   1880   // too many vectors to fit in registers, something that only occurs in
   1881   // constructed examples:), but we have to walk the arglist to figure
   1882   // that out...for the pathological case, compute VecArgOffset as the
   1883   // start of the vector parameter area.  Computing VecArgOffset is the
   1884   // entire point of the following loop.
   1885   unsigned VecArgOffset = ArgOffset;
   1886   if (!isVarArg && !isPPC64) {
   1887     for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
   1888          ++ArgNo) {
   1889       EVT ObjectVT = Ins[ArgNo].VT;
   1890       unsigned ObjSize = ObjectVT.getSizeInBits()/8;
   1891       ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
   1892 
   1893       if (Flags.isByVal()) {
   1894         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
   1895         ObjSize = Flags.getByValSize();
   1896         unsigned ArgSize =
   1897                 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
   1898         VecArgOffset += ArgSize;
   1899         continue;
   1900       }
   1901 
   1902       switch(ObjectVT.getSimpleVT().SimpleTy) {
   1903       default: llvm_unreachable("Unhandled argument type!");
   1904       case MVT::i32:
   1905       case MVT::f32:
   1906         VecArgOffset += isPPC64 ? 8 : 4;
   1907         break;
   1908       case MVT::i64:  // PPC64
   1909       case MVT::f64:
   1910         VecArgOffset += 8;
   1911         break;
   1912       case MVT::v4f32:
   1913       case MVT::v4i32:
   1914       case MVT::v8i16:
   1915       case MVT::v16i8:
   1916         // Nothing to do, we're only looking at Nonvector args here.
   1917         break;
   1918       }
   1919     }
   1920   }
   1921   // We've found where the vector parameter area in memory is.  Skip the
   1922   // first 12 parameters; these don't use that memory.
   1923   VecArgOffset = ((VecArgOffset+15)/16)*16;
   1924   VecArgOffset += 12*16;
   1925 
   1926   // Add DAG nodes to load the arguments or copy them out of registers.  On
   1927   // entry to a function on PPC, the arguments start after the linkage area,
   1928   // although the first ones are often in registers.
   1929 
   1930   SmallVector<SDValue, 8> MemOps;
   1931   unsigned nAltivecParamsAtEnd = 0;
   1932   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
   1933     SDValue ArgVal;
   1934     bool needsLoad = false;
   1935     EVT ObjectVT = Ins[ArgNo].VT;
   1936     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
   1937     unsigned ArgSize = ObjSize;
   1938     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
   1939 
   1940     unsigned CurArgOffset = ArgOffset;
   1941 
   1942     // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
   1943     if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
   1944         ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
   1945       if (isVarArg || isPPC64) {
   1946         MinReservedArea = ((MinReservedArea+15)/16)*16;
   1947         MinReservedArea += CalculateStackSlotSize(ObjectVT,
   1948                                                   Flags,
   1949                                                   PtrByteSize);
   1950       } else  nAltivecParamsAtEnd++;
   1951     } else
   1952       // Calculate min reserved area.
   1953       MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
   1954                                                 Flags,
   1955                                                 PtrByteSize);
   1956 
   1957     // FIXME the codegen can be much improved in some cases.
   1958     // We do not have to keep everything in memory.
   1959     if (Flags.isByVal()) {
   1960       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
   1961       ObjSize = Flags.getByValSize();
   1962       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
   1963       // Objects of size 1 and 2 are right justified, everything else is
   1964       // left justified.  This means the memory address is adjusted forwards.
   1965       if (ObjSize==1 || ObjSize==2) {
   1966         CurArgOffset = CurArgOffset + (4 - ObjSize);
   1967       }
   1968       // The value of the object is its address.
   1969       int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
   1970       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
   1971       InVals.push_back(FIN);
   1972       if (ObjSize==1 || ObjSize==2) {
   1973         if (GPR_idx != Num_GPR_Regs) {
   1974           unsigned VReg;
   1975           if (isPPC64)
   1976             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
   1977           else
   1978             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
   1979           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
   1980           SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
   1981                                             MachinePointerInfo(),
   1982                                             ObjSize==1 ? MVT::i8 : MVT::i16,
   1983                                             false, false, 0);
   1984           MemOps.push_back(Store);
   1985           ++GPR_idx;
   1986         }
   1987 
   1988         ArgOffset += PtrByteSize;
   1989 
   1990         continue;
   1991       }
   1992       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
   1993         // Store whatever pieces of the object are in registers
   1994         // to memory.  ArgVal will be address of the beginning of
   1995         // the object.
   1996         if (GPR_idx != Num_GPR_Regs) {
   1997           unsigned VReg;
   1998           if (isPPC64)
   1999             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
   2000           else
   2001             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
   2002           int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
   2003           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
   2004           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
   2005           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
   2006                                        MachinePointerInfo(),
   2007                                        false, false, 0);
   2008           MemOps.push_back(Store);
   2009           ++GPR_idx;
   2010           ArgOffset += PtrByteSize;
   2011         } else {
   2012           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
   2013           break;
   2014         }
   2015       }
   2016       continue;
   2017     }
   2018 
   2019     switch (ObjectVT.getSimpleVT().SimpleTy) {
   2020     default: llvm_unreachable("Unhandled argument type!");
   2021     case MVT::i32:
   2022       if (!isPPC64) {
   2023         if (GPR_idx != Num_GPR_Regs) {
   2024           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
   2025           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
   2026           ++GPR_idx;
   2027         } else {
   2028           needsLoad = true;
   2029           ArgSize = PtrByteSize;
   2030         }
   2031         // All int arguments reserve stack space in the Darwin ABI.
   2032         ArgOffset += PtrByteSize;
   2033         break;
   2034       }
   2035       // FALLTHROUGH
   2036     case MVT::i64:  // PPC64
   2037       if (GPR_idx != Num_GPR_Regs) {
   2038         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
   2039         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
   2040 
   2041         if (ObjectVT == MVT::i32) {
   2042           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
   2043           // value to MVT::i64 and then truncate to the correct register size.
   2044           if (Flags.isSExt())
   2045             ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
   2046                                  DAG.getValueType(ObjectVT));
   2047           else if (Flags.isZExt())
   2048             ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
   2049                                  DAG.getValueType(ObjectVT));
   2050 
   2051           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
   2052         }
   2053 
   2054         ++GPR_idx;
   2055       } else {
   2056         needsLoad = true;
   2057         ArgSize = PtrByteSize;
   2058       }
   2059       // All int arguments reserve stack space in the Darwin ABI.
   2060       ArgOffset += 8;
   2061       break;
   2062 
   2063     case MVT::f32:
   2064     case MVT::f64:
   2065       // Every 4 bytes of argument space consumes one of the GPRs available for
   2066       // argument passing.
   2067       if (GPR_idx != Num_GPR_Regs) {
   2068         ++GPR_idx;
   2069         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
   2070           ++GPR_idx;
   2071       }
   2072       if (FPR_idx != Num_FPR_Regs) {
   2073         unsigned VReg;
   2074 
   2075         if (ObjectVT == MVT::f32)
   2076           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
   2077         else
   2078           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
   2079 
   2080         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
   2081         ++FPR_idx;
   2082       } else {
   2083         needsLoad = true;
   2084       }
   2085 
   2086       // All FP arguments reserve stack space in the Darwin ABI.
   2087       ArgOffset += isPPC64 ? 8 : ObjSize;
   2088       break;
   2089     case MVT::v4f32:
   2090     case MVT::v4i32:
   2091     case MVT::v8i16:
   2092     case MVT::v16i8:
   2093       // Note that vector arguments in registers don't reserve stack space,
   2094       // except in varargs functions.
   2095       if (VR_idx != Num_VR_Regs) {
   2096         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
   2097         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
   2098         if (isVarArg) {
   2099           while ((ArgOffset % 16) != 0) {
   2100             ArgOffset += PtrByteSize;
   2101             if (GPR_idx != Num_GPR_Regs)
   2102               GPR_idx++;
   2103           }
   2104           ArgOffset += 16;
   2105           GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
   2106         }
   2107         ++VR_idx;
   2108       } else {
   2109         if (!isVarArg && !isPPC64) {
   2110           // Vectors go after all the nonvectors.
   2111           CurArgOffset = VecArgOffset;
   2112           VecArgOffset += 16;
   2113         } else {
   2114           // Vectors are aligned.
   2115           ArgOffset = ((ArgOffset+15)/16)*16;
   2116           CurArgOffset = ArgOffset;
   2117           ArgOffset += 16;
   2118         }
   2119         needsLoad = true;
   2120       }
   2121       break;
   2122     }
   2123 
   2124     // We need to load the argument to a virtual register if we determined above
   2125     // that we ran out of physical registers of the appropriate type.
   2126     if (needsLoad) {
   2127       int FI = MFI->CreateFixedObject(ObjSize,
   2128                                       CurArgOffset + (ArgSize - ObjSize),
   2129                                       isImmutable);
   2130       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
   2131       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
   2132                            false, false, 0);
   2133     }
   2134 
   2135     InVals.push_back(ArgVal);
   2136   }
   2137 
   2138   // Set the size that is at least reserved in caller of this function.  Tail
   2139   // call optimized function's reserved stack space needs to be aligned so that
   2140   // taking the difference between two stack areas will result in an aligned
   2141   // stack.
   2142   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
   2143   // Add the Altivec parameters at the end, if needed.
   2144   if (nAltivecParamsAtEnd) {
   2145     MinReservedArea = ((MinReservedArea+15)/16)*16;
   2146     MinReservedArea += 16*nAltivecParamsAtEnd;
   2147   }
   2148   MinReservedArea =
   2149     std::max(MinReservedArea,
   2150              PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
   2151   unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
   2152     getStackAlignment();
   2153   unsigned AlignMask = TargetAlign-1;
   2154   MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
   2155   FI->setMinReservedArea(MinReservedArea);
   2156 
   2157   // If the function takes variable number of arguments, make a frame index for
   2158   // the start of the first vararg value... for expansion of llvm.va_start.
   2159   if (isVarArg) {
   2160     int Depth = ArgOffset;
   2161 
   2162     FuncInfo->setVarArgsFrameIndex(
   2163       MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
   2164                              Depth, true));
   2165     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
   2166 
   2167     // If this function is vararg, store any remaining integer argument regs
   2168     // to their spots on the stack so that they may be loaded by deferencing the
   2169     // result of va_next.
   2170     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
   2171       unsigned VReg;
   2172 
   2173       if (isPPC64)
   2174         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
   2175       else
   2176         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
   2177 
   2178       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
   2179       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
   2180                                    MachinePointerInfo(), false, false, 0);
   2181       MemOps.push_back(Store);
   2182       // Increment the address by four for the next argument to store
   2183       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
   2184       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
   2185     }
   2186   }
   2187 
   2188   if (!MemOps.empty())
   2189     Chain = DAG.getNode(ISD::TokenFactor, dl,
   2190                         MVT::Other, &MemOps[0], MemOps.size());
   2191 
   2192   return Chain;
   2193 }
   2194 
   2195 /// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus
   2196 /// linkage area for the Darwin ABI.
   2197 static unsigned
   2198 CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
   2199                                      bool isPPC64,
   2200                                      bool isVarArg,
   2201                                      unsigned CC,
   2202                                      const SmallVectorImpl<ISD::OutputArg>
   2203                                        &Outs,
   2204                                      const SmallVectorImpl<SDValue> &OutVals,
   2205                                      unsigned &nAltivecParamsAtEnd) {
   2206   // Count how many bytes are to be pushed on the stack, including the linkage
   2207   // area, and parameter passing area.  We start with 24/48 bytes, which is
   2208   // prereserved space for [SP][CR][LR][3 x unused].
   2209   unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true);
   2210   unsigned NumOps = Outs.size();
   2211   unsigned PtrByteSize = isPPC64 ? 8 : 4;
   2212 
   2213   // Add up all the space actually used.
   2214   // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
   2215   // they all go in registers, but we must reserve stack space for them for
   2216   // possible use by the caller.  In varargs or 64-bit calls, parameters are
   2217   // assigned stack space in order, with padding so Altivec parameters are
   2218   // 16-byte aligned.
   2219   nAltivecParamsAtEnd = 0;
   2220   for (unsigned i = 0; i != NumOps; ++i) {
   2221     ISD::ArgFlagsTy Flags = Outs[i].Flags;
   2222     EVT ArgVT = Outs[i].VT;
   2223     // Varargs Altivec parameters are padded to a 16 byte boundary.
   2224     if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
   2225         ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
   2226       if (!isVarArg && !isPPC64) {
   2227         // Non-varargs Altivec parameters go after all the non-Altivec
   2228         // parameters; handle those later so we know how much padding we need.
   2229         nAltivecParamsAtEnd++;
   2230         continue;
   2231       }
   2232       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
   2233       NumBytes = ((NumBytes+15)/16)*16;
   2234     }
   2235     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
   2236   }
   2237 
   2238    // Allow for Altivec parameters at the end, if needed.
   2239   if (nAltivecParamsAtEnd) {
   2240     NumBytes = ((NumBytes+15)/16)*16;
   2241     NumBytes += 16*nAltivecParamsAtEnd;
   2242   }
   2243 
   2244   // The prolog code of the callee may store up to 8 GPR argument registers to
   2245   // the stack, allowing va_start to index over them in memory if its varargs.
   2246   // Because we cannot tell if this is needed on the caller side, we have to
   2247   // conservatively assume that it is needed.  As such, make sure we have at
   2248   // least enough stack space for the caller to store the 8 GPRs.
   2249   NumBytes = std::max(NumBytes,
   2250                       PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
   2251 
   2252   // Tail call needs the stack to be aligned.
   2253   if (CC==CallingConv::Fast && GuaranteedTailCallOpt) {
   2254     unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
   2255       getStackAlignment();
   2256     unsigned AlignMask = TargetAlign-1;
   2257     NumBytes = (NumBytes + AlignMask) & ~AlignMask;
   2258   }
   2259 
   2260   return NumBytes;
   2261 }
   2262 
   2263 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
   2264 /// adjusted to accommodate the arguments for the tailcall.
   2265 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
   2266                                    unsigned ParamSize) {
   2267 
   2268   if (!isTailCall) return 0;
   2269 
   2270   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
   2271   unsigned CallerMinReservedArea = FI->getMinReservedArea();
   2272   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
   2273   // Remember only if the new adjustement is bigger.
   2274   if (SPDiff < FI->getTailCallSPDelta())
   2275     FI->setTailCallSPDelta(SPDiff);
   2276 
   2277   return SPDiff;
   2278 }
   2279 
   2280 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
   2281 /// for tail call optimization. Targets which want to do tail call
   2282 /// optimization should implement this function.
   2283 bool
   2284 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   2285                                                      CallingConv::ID CalleeCC,
   2286                                                      bool isVarArg,
   2287                                       const SmallVectorImpl<ISD::InputArg> &Ins,
   2288                                                      SelectionDAG& DAG) const {
   2289   if (!GuaranteedTailCallOpt)
   2290     return false;
   2291 
   2292   // Variable argument functions are not supported.
   2293   if (isVarArg)
   2294     return false;
   2295 
   2296   MachineFunction &MF = DAG.getMachineFunction();
   2297   CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
   2298   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
   2299     // Functions containing by val parameters are not supported.
   2300     for (unsigned i = 0; i != Ins.size(); i++) {
   2301        ISD::ArgFlagsTy Flags = Ins[i].Flags;
   2302        if (Flags.isByVal()) return false;
   2303     }
   2304 
   2305     // Non PIC/GOT  tail calls are supported.
   2306     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
   2307       return true;
   2308 
   2309     // At the moment we can only do local tail calls (in same module, hidden
   2310     // or protected) if we are generating PIC.
   2311     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
   2312       return G->getGlobal()->hasHiddenVisibility()
   2313           || G->getGlobal()->hasProtectedVisibility();
   2314   }
   2315 
   2316   return false;
   2317 }
   2318 
   2319 /// isCallCompatibleAddress - Return the immediate to use if the specified
   2320 /// 32-bit value is representable in the immediate field of a BxA instruction.
   2321 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
   2322   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
   2323   if (!C) return 0;
   2324 
   2325   int Addr = C->getZExtValue();
   2326   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
   2327       (Addr << 6 >> 6) != Addr)
   2328     return 0;  // Top 6 bits have to be sext of immediate.
   2329 
   2330   return DAG.getConstant((int)C->getZExtValue() >> 2,
   2331                          DAG.getTargetLoweringInfo().getPointerTy()).getNode();
   2332 }
   2333 
   2334 namespace {
   2335 
   2336 struct TailCallArgumentInfo {
   2337   SDValue Arg;
   2338   SDValue FrameIdxOp;
   2339   int       FrameIdx;
   2340 
   2341   TailCallArgumentInfo() : FrameIdx(0) {}
   2342 };
   2343 
   2344 }
   2345 
   2346 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
   2347 static void
   2348 StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
   2349                                            SDValue Chain,
   2350                    const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs,
   2351                    SmallVector<SDValue, 8> &MemOpChains,
   2352                    DebugLoc dl) {
   2353   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
   2354     SDValue Arg = TailCallArgs[i].Arg;
   2355     SDValue FIN = TailCallArgs[i].FrameIdxOp;
   2356     int FI = TailCallArgs[i].FrameIdx;
   2357     // Store relative to framepointer.
   2358     MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
   2359                                        MachinePointerInfo::getFixedStack(FI),
   2360                                        false, false, 0));
   2361   }
   2362 }
   2363 
   2364 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
   2365 /// the appropriate stack slot for the tail call optimized function call.
   2366 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
   2367                                                MachineFunction &MF,
   2368                                                SDValue Chain,
   2369                                                SDValue OldRetAddr,
   2370                                                SDValue OldFP,
   2371                                                int SPDiff,
   2372                                                bool isPPC64,
   2373                                                bool isDarwinABI,
   2374                                                DebugLoc dl) {
   2375   if (SPDiff) {
   2376     // Calculate the new stack slot for the return address.
   2377     int SlotSize = isPPC64 ? 8 : 4;
   2378     int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64,
   2379                                                                    isDarwinABI);
   2380     int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
   2381                                                           NewRetAddrLoc, true);
   2382     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
   2383     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
   2384     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
   2385                          MachinePointerInfo::getFixedStack(NewRetAddr),
   2386                          false, false, 0);
   2387 
   2388     // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
   2389     // slot as the FP is never overwritten.
   2390     if (isDarwinABI) {
   2391       int NewFPLoc =
   2392         SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
   2393       int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
   2394                                                           true);
   2395       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
   2396       Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
   2397                            MachinePointerInfo::getFixedStack(NewFPIdx),
   2398                            false, false, 0);
   2399     }
   2400   }
   2401   return Chain;
   2402 }
   2403 
   2404 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
   2405 /// the position of the argument.
   2406 static void
   2407 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
   2408                          SDValue Arg, int SPDiff, unsigned ArgOffset,
   2409                       SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
   2410   int Offset = ArgOffset + SPDiff;
   2411   uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
   2412   int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
   2413   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
   2414   SDValue FIN = DAG.getFrameIndex(FI, VT);
   2415   TailCallArgumentInfo Info;
   2416   Info.Arg = Arg;
   2417   Info.FrameIdxOp = FIN;
   2418   Info.FrameIdx = FI;
   2419   TailCallArguments.push_back(Info);
   2420 }
   2421 
   2422 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
   2423 /// stack slot. Returns the chain as result and the loaded frame pointers in
   2424 /// LROpOut/FPOpout. Used when tail calling.
   2425 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
   2426                                                         int SPDiff,
   2427                                                         SDValue Chain,
   2428                                                         SDValue &LROpOut,
   2429                                                         SDValue &FPOpOut,
   2430                                                         bool isDarwinABI,
   2431                                                         DebugLoc dl) const {
   2432   if (SPDiff) {
   2433     // Load the LR and FP stack slot for later adjusting.
   2434     EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
   2435     LROpOut = getReturnAddrFrameIndex(DAG);
   2436     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
   2437                           false, false, 0);
   2438     Chain = SDValue(LROpOut.getNode(), 1);
   2439 
   2440     // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
   2441     // slot as the FP is never overwritten.
   2442     if (isDarwinABI) {
   2443       FPOpOut = getFramePointerFrameIndex(DAG);
   2444       FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
   2445                             false, false, 0);
   2446       Chain = SDValue(FPOpOut.getNode(), 1);
   2447     }
   2448   }
   2449   return Chain;
   2450 }
   2451 
   2452 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
   2453 /// by "Src" to address "Dst" of size "Size".  Alignment information is
   2454 /// specified by the specific parameter attribute. The copy will be passed as
   2455 /// a byval function parameter.
   2456 /// Sometimes what we are copying is the end of a larger object, the part that
   2457 /// does not fit in registers.
   2458 static SDValue
   2459 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
   2460                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
   2461                           DebugLoc dl) {
   2462   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
   2463   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
   2464                        false, false, MachinePointerInfo(0),
   2465                        MachinePointerInfo(0));
   2466 }
   2467 
   2468 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
   2469 /// tail calls.
   2470 static void
   2471 LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
   2472                  SDValue Arg, SDValue PtrOff, int SPDiff,
   2473                  unsigned ArgOffset, bool isPPC64, bool isTailCall,
   2474                  bool isVector, SmallVector<SDValue, 8> &MemOpChains,
   2475                  SmallVector<TailCallArgumentInfo, 8> &TailCallArguments,
   2476                  DebugLoc dl) {
   2477   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   2478   if (!isTailCall) {
   2479     if (isVector) {
   2480       SDValue StackPtr;
   2481       if (isPPC64)
   2482         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
   2483       else
   2484         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
   2485       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
   2486                            DAG.getConstant(ArgOffset, PtrVT));
   2487     }
   2488     MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
   2489                                        MachinePointerInfo(), false, false, 0));
   2490   // Calculate and remember argument location.
   2491   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
   2492                                   TailCallArguments);
   2493 }
   2494 
   2495 static
   2496 void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
   2497                      DebugLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
   2498                      SDValue LROp, SDValue FPOp, bool isDarwinABI,
   2499                      SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) {
   2500   MachineFunction &MF = DAG.getMachineFunction();
   2501 
   2502   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
   2503   // might overwrite each other in case of tail call optimization.
   2504   SmallVector<SDValue, 8> MemOpChains2;
   2505   // Do not flag preceding copytoreg stuff together with the following stuff.
   2506   InFlag = SDValue();
   2507   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
   2508                                     MemOpChains2, dl);
   2509   if (!MemOpChains2.empty())
   2510     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
   2511                         &MemOpChains2[0], MemOpChains2.size());
   2512 
   2513   // Store the return address to the appropriate stack slot.
   2514   Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
   2515                                         isPPC64, isDarwinABI, dl);
   2516 
   2517   // Emit callseq_end just before tailcall node.
   2518   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
   2519                              DAG.getIntPtrConstant(0, true), InFlag);
   2520   InFlag = Chain.getValue(1);
   2521 }
   2522 
   2523 static
   2524 unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
   2525                      SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall,
   2526                      SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
   2527                      SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys,
   2528                      const PPCSubtarget &PPCSubTarget) {
   2529 
   2530   bool isPPC64 = PPCSubTarget.isPPC64();
   2531   bool isSVR4ABI = PPCSubTarget.isSVR4ABI();
   2532 
   2533   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   2534   NodeTys.push_back(MVT::Other);   // Returns a chain
   2535   NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
   2536 
   2537   unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin;
   2538 
   2539   bool needIndirectCall = true;
   2540   if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
   2541     // If this is an absolute destination address, use the munged value.
   2542     Callee = SDValue(Dest, 0);
   2543     needIndirectCall = false;
   2544   }
   2545 
   2546   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
   2547     // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
   2548     // Use indirect calls for ALL functions calls in JIT mode, since the
   2549     // far-call stubs may be outside relocation limits for a BL instruction.
   2550     if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
   2551       unsigned OpFlags = 0;
   2552       if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
   2553           (!PPCSubTarget.getTargetTriple().isMacOSX() ||
   2554            PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
   2555           (G->getGlobal()->isDeclaration() ||
   2556            G->getGlobal()->isWeakForLinker())) {
   2557         // PC-relative references to external symbols should go through $stub,
   2558         // unless we're building with the leopard linker or later, which
   2559         // automatically synthesizes these stubs.
   2560         OpFlags = PPCII::MO_DARWIN_STUB;
   2561       }
   2562 
   2563       // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
   2564       // every direct call is) turn it into a TargetGlobalAddress /
   2565       // TargetExternalSymbol node so that legalize doesn't hack it.
   2566       Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
   2567                                           Callee.getValueType(),
   2568                                           0, OpFlags);
   2569       needIndirectCall = false;
   2570     }
   2571   }
   2572 
   2573   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
   2574     unsigned char OpFlags = 0;
   2575 
   2576     if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
   2577         (!PPCSubTarget.getTargetTriple().isMacOSX() ||
   2578          PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
   2579       // PC-relative references to external symbols should go through $stub,
   2580       // unless we're building with the leopard linker or later, which
   2581       // automatically synthesizes these stubs.
   2582       OpFlags = PPCII::MO_DARWIN_STUB;
   2583     }
   2584 
   2585     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
   2586                                          OpFlags);
   2587     needIndirectCall = false;
   2588   }
   2589 
   2590   if (needIndirectCall) {
   2591     // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
   2592     // to do the call, we can't use PPCISD::CALL.
   2593     SDValue MTCTROps[] = {Chain, Callee, InFlag};
   2594 
   2595     if (isSVR4ABI && isPPC64) {
   2596       // Function pointers in the 64-bit SVR4 ABI do not point to the function
   2597       // entry point, but to the function descriptor (the function entry point
   2598       // address is part of the function descriptor though).
   2599       // The function descriptor is a three doubleword structure with the
   2600       // following fields: function entry point, TOC base address and
   2601       // environment pointer.
   2602       // Thus for a call through a function pointer, the following actions need
   2603       // to be performed:
   2604       //   1. Save the TOC of the caller in the TOC save area of its stack
   2605       //      frame (this is done in LowerCall_Darwin()).
   2606       //   2. Load the address of the function entry point from the function
   2607       //      descriptor.
   2608       //   3. Load the TOC of the callee from the function descriptor into r2.
   2609       //   4. Load the environment pointer from the function descriptor into
   2610       //      r11.
   2611       //   5. Branch to the function entry point address.
   2612       //   6. On return of the callee, the TOC of the caller needs to be
   2613       //      restored (this is done in FinishCall()).
   2614       //
   2615       // All those operations are flagged together to ensure that no other
   2616       // operations can be scheduled in between. E.g. without flagging the
   2617       // operations together, a TOC access in the caller could be scheduled
   2618       // between the load of the callee TOC and the branch to the callee, which
   2619       // results in the TOC access going through the TOC of the callee instead
   2620       // of going through the TOC of the caller, which leads to incorrect code.
   2621 
   2622       // Load the address of the function entry point from the function
   2623       // descriptor.
   2624       SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue);
   2625       SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps,
   2626                                         InFlag.getNode() ? 3 : 2);
   2627       Chain = LoadFuncPtr.getValue(1);
   2628       InFlag = LoadFuncPtr.getValue(2);
   2629 
   2630       // Load environment pointer into r11.
   2631       // Offset of the environment pointer within the function descriptor.
   2632       SDValue PtrOff = DAG.getIntPtrConstant(16);
   2633 
   2634       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
   2635       SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr,
   2636                                        InFlag);
   2637       Chain = LoadEnvPtr.getValue(1);
   2638       InFlag = LoadEnvPtr.getValue(2);
   2639 
   2640       SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
   2641                                         InFlag);
   2642       Chain = EnvVal.getValue(0);
   2643       InFlag = EnvVal.getValue(1);
   2644 
   2645       // Load TOC of the callee into r2. We are using a target-specific load
   2646       // with r2 hard coded, because the result of a target-independent load
   2647       // would never go directly into r2, since r2 is a reserved register (which
   2648       // prevents the register allocator from allocating it), resulting in an
   2649       // additional register being allocated and an unnecessary move instruction
   2650       // being generated.
   2651       VTs = DAG.getVTList(MVT::Other, MVT::Glue);
   2652       SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
   2653                                        Callee, InFlag);
   2654       Chain = LoadTOCPtr.getValue(0);
   2655       InFlag = LoadTOCPtr.getValue(1);
   2656 
   2657       MTCTROps[0] = Chain;
   2658       MTCTROps[1] = LoadFuncPtr;
   2659       MTCTROps[2] = InFlag;
   2660     }
   2661 
   2662     Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps,
   2663                         2 + (InFlag.getNode() != 0));
   2664     InFlag = Chain.getValue(1);
   2665 
   2666     NodeTys.clear();
   2667     NodeTys.push_back(MVT::Other);
   2668     NodeTys.push_back(MVT::Glue);
   2669     Ops.push_back(Chain);
   2670     CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin;
   2671     Callee.setNode(0);
   2672     // Add CTR register as callee so a bctr can be emitted later.
   2673     if (isTailCall)
   2674       Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
   2675   }
   2676 
   2677   // If this is a direct call, pass the chain and the callee.
   2678   if (Callee.getNode()) {
   2679     Ops.push_back(Chain);
   2680     Ops.push_back(Callee);
   2681   }
   2682   // If this is a tail call add stack pointer delta.
   2683   if (isTailCall)
   2684     Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
   2685 
   2686   // Add argument registers to the end of the list so that they are known live
   2687   // into the call.
   2688   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
   2689     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
   2690                                   RegsToPass[i].second.getValueType()));
   2691 
   2692   return CallOpc;
   2693 }
   2694 
   2695 SDValue
   2696 PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
   2697                                    CallingConv::ID CallConv, bool isVarArg,
   2698                                    const SmallVectorImpl<ISD::InputArg> &Ins,
   2699                                    DebugLoc dl, SelectionDAG &DAG,
   2700                                    SmallVectorImpl<SDValue> &InVals) const {
   2701 
   2702   SmallVector<CCValAssign, 16> RVLocs;
   2703   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
   2704 		    getTargetMachine(), RVLocs, *DAG.getContext());
   2705   CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
   2706 
   2707   // Copy all of the result registers out of their specified physreg.
   2708   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
   2709     CCValAssign &VA = RVLocs[i];
   2710     EVT VT = VA.getValVT();
   2711     assert(VA.isRegLoc() && "Can only return in registers!");
   2712     Chain = DAG.getCopyFromReg(Chain, dl,
   2713                                VA.getLocReg(), VT, InFlag).getValue(1);
   2714     InVals.push_back(Chain.getValue(0));
   2715     InFlag = Chain.getValue(2);
   2716   }
   2717 
   2718   return Chain;
   2719 }
   2720 
   2721 SDValue
   2722 PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
   2723                               bool isTailCall, bool isVarArg,
   2724                               SelectionDAG &DAG,
   2725                               SmallVector<std::pair<unsigned, SDValue>, 8>
   2726                                 &RegsToPass,
   2727                               SDValue InFlag, SDValue Chain,
   2728                               SDValue &Callee,
   2729                               int SPDiff, unsigned NumBytes,
   2730                               const SmallVectorImpl<ISD::InputArg> &Ins,
   2731                               SmallVectorImpl<SDValue> &InVals) const {
   2732   std::vector<EVT> NodeTys;
   2733   SmallVector<SDValue, 8> Ops;
   2734   unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
   2735                                  isTailCall, RegsToPass, Ops, NodeTys,
   2736                                  PPCSubTarget);
   2737 
   2738   // When performing tail call optimization the callee pops its arguments off
   2739   // the stack. Account for this here so these bytes can be pushed back on in
   2740   // PPCRegisterInfo::eliminateCallFramePseudoInstr.
   2741   int BytesCalleePops =
   2742     (CallConv==CallingConv::Fast && GuaranteedTailCallOpt) ? NumBytes : 0;
   2743 
   2744   if (InFlag.getNode())
   2745     Ops.push_back(InFlag);
   2746 
   2747   // Emit tail call.
   2748   if (isTailCall) {
   2749     // If this is the first return lowered for this function, add the regs
   2750     // to the liveout set for the function.
   2751     if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
   2752       SmallVector<CCValAssign, 16> RVLocs;
   2753       CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
   2754 		     getTargetMachine(), RVLocs, *DAG.getContext());
   2755       CCInfo.AnalyzeCallResult(Ins, RetCC_PPC);
   2756       for (unsigned i = 0; i != RVLocs.size(); ++i)
   2757         DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
   2758     }
   2759 
   2760     assert(((Callee.getOpcode() == ISD::Register &&
   2761              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
   2762             Callee.getOpcode() == ISD::TargetExternalSymbol ||
   2763             Callee.getOpcode() == ISD::TargetGlobalAddress ||
   2764             isa<ConstantSDNode>(Callee)) &&
   2765     "Expecting an global address, external symbol, absolute value or register");
   2766 
   2767     return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size());
   2768   }
   2769 
   2770   Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
   2771   InFlag = Chain.getValue(1);
   2772 
   2773   // Add a NOP immediately after the branch instruction when using the 64-bit
   2774   // SVR4 ABI. At link time, if caller and callee are in a different module and
   2775   // thus have a different TOC, the call will be replaced with a call to a stub
   2776   // function which saves the current TOC, loads the TOC of the callee and
   2777   // branches to the callee. The NOP will be replaced with a load instruction
   2778   // which restores the TOC of the caller from the TOC save slot of the current
   2779   // stack frame. If caller and callee belong to the same module (and have the
   2780   // same TOC), the NOP will remain unchanged.
   2781   if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
   2782     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
   2783     if (CallOpc == PPCISD::BCTRL_SVR4) {
   2784       // This is a call through a function pointer.
   2785       // Restore the caller TOC from the save area into R2.
   2786       // See PrepareCall() for more information about calls through function
   2787       // pointers in the 64-bit SVR4 ABI.
   2788       // We are using a target-specific load with r2 hard coded, because the
   2789       // result of a target-independent load would never go directly into r2,
   2790       // since r2 is a reserved register (which prevents the register allocator
   2791       // from allocating it), resulting in an additional register being
   2792       // allocated and an unnecessary move instruction being generated.
   2793       Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
   2794       InFlag = Chain.getValue(1);
   2795     } else {
   2796       // Otherwise insert NOP.
   2797       InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Glue, InFlag);
   2798     }
   2799   }
   2800 
   2801   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
   2802                              DAG.getIntPtrConstant(BytesCalleePops, true),
   2803                              InFlag);
   2804   if (!Ins.empty())
   2805     InFlag = Chain.getValue(1);
   2806 
   2807   return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
   2808                          Ins, dl, DAG, InVals);
   2809 }
   2810 
   2811 SDValue
   2812 PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   2813                              CallingConv::ID CallConv, bool isVarArg,
   2814                              bool &isTailCall,
   2815                              const SmallVectorImpl<ISD::OutputArg> &Outs,
   2816                              const SmallVectorImpl<SDValue> &OutVals,
   2817                              const SmallVectorImpl<ISD::InputArg> &Ins,
   2818                              DebugLoc dl, SelectionDAG &DAG,
   2819                              SmallVectorImpl<SDValue> &InVals) const {
   2820   if (isTailCall)
   2821     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
   2822                                                    Ins, DAG);
   2823 
   2824   if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
   2825     return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg,
   2826                           isTailCall, Outs, OutVals, Ins,
   2827                           dl, DAG, InVals);
   2828 
   2829   return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
   2830                           isTailCall, Outs, OutVals, Ins,
   2831                           dl, DAG, InVals);
   2832 }
   2833 
   2834 SDValue
   2835 PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
   2836                                   CallingConv::ID CallConv, bool isVarArg,
   2837                                   bool isTailCall,
   2838                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
   2839                                   const SmallVectorImpl<SDValue> &OutVals,
   2840                                   const SmallVectorImpl<ISD::InputArg> &Ins,
   2841                                   DebugLoc dl, SelectionDAG &DAG,
   2842                                   SmallVectorImpl<SDValue> &InVals) const {
   2843   // See PPCTargetLowering::LowerFormalArguments_SVR4() for a description
   2844   // of the 32-bit SVR4 ABI stack frame layout.
   2845 
   2846   assert((CallConv == CallingConv::C ||
   2847           CallConv == CallingConv::Fast) && "Unknown calling convention!");
   2848 
   2849   unsigned PtrByteSize = 4;
   2850 
   2851   MachineFunction &MF = DAG.getMachineFunction();
   2852 
   2853   // Mark this function as potentially containing a function that contains a
   2854   // tail call. As a consequence the frame pointer will be used for dynamicalloc
   2855   // and restoring the callers stack pointer in this functions epilog. This is
   2856   // done because by tail calling the called function might overwrite the value
   2857   // in this function's (MF) stack pointer stack slot 0(SP).
   2858   if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
   2859     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
   2860 
   2861   // Count how many bytes are to be pushed on the stack, including the linkage
   2862   // area, parameter list area and the part of the local variable space which
   2863   // contains copies of aggregates which are passed by value.
   2864 
   2865   // Assign locations to all of the outgoing arguments.
   2866   SmallVector<CCValAssign, 16> ArgLocs;
   2867   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
   2868 		 getTargetMachine(), ArgLocs, *DAG.getContext());
   2869 
   2870   // Reserve space for the linkage area on the stack.
   2871   CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
   2872 
   2873   if (isVarArg) {
   2874     // Handle fixed and variable vector arguments differently.
   2875     // Fixed vector arguments go into registers as long as registers are
   2876     // available. Variable vector arguments always go into memory.
   2877     unsigned NumArgs = Outs.size();
   2878 
   2879     for (unsigned i = 0; i != NumArgs; ++i) {
   2880       MVT ArgVT = Outs[i].VT;
   2881       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
   2882       bool Result;
   2883 
   2884       if (Outs[i].IsFixed) {
   2885         Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
   2886                              CCInfo);
   2887       } else {
   2888         Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
   2889                                     ArgFlags, CCInfo);
   2890       }
   2891 
   2892       if (Result) {
   2893 #ifndef NDEBUG
   2894         errs() << "Call operand #" << i << " has unhandled type "
   2895              << EVT(ArgVT).getEVTString() << "\n";
   2896 #endif
   2897         llvm_unreachable(0);
   2898       }
   2899     }
   2900   } else {
   2901     // All arguments are treated the same.
   2902     CCInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4);
   2903   }
   2904 
   2905   // Assign locations to all of the outgoing aggregate by value arguments.
   2906   SmallVector<CCValAssign, 16> ByValArgLocs;
   2907   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
   2908 		      getTargetMachine(), ByValArgLocs, *DAG.getContext());
   2909 
   2910   // Reserve stack space for the allocations in CCInfo.
   2911   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
   2912 
   2913   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4_ByVal);
   2914 
   2915   // Size of the linkage area, parameter list area and the part of the local
   2916   // space variable where copies of aggregates which are passed by value are
   2917   // stored.
   2918   unsigned NumBytes = CCByValInfo.getNextStackOffset();
   2919 
   2920   // Calculate by how many bytes the stack has to be adjusted in case of tail
   2921   // call optimization.
   2922   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
   2923 
   2924   // Adjust the stack pointer for the new arguments...
   2925   // These operations are automatically eliminated by the prolog/epilog pass
   2926   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
   2927   SDValue CallSeqStart = Chain;
   2928 
   2929   // Load the return address and frame pointer so it can be moved somewhere else
   2930   // later.
   2931   SDValue LROp, FPOp;
   2932   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
   2933                                        dl);
   2934 
   2935   // Set up a copy of the stack pointer for use loading and storing any
   2936   // arguments that may not fit in the registers available for argument
   2937   // passing.
   2938   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
   2939 
   2940   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
   2941   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
   2942   SmallVector<SDValue, 8> MemOpChains;
   2943 
   2944   // Walk the register/memloc assignments, inserting copies/loads.
   2945   for (unsigned i = 0, j = 0, e = ArgLocs.size();
   2946        i != e;
   2947        ++i) {
   2948     CCValAssign &VA = ArgLocs[i];
   2949     SDValue Arg = OutVals[i];
   2950     ISD::ArgFlagsTy Flags = Outs[i].Flags;
   2951 
   2952     if (Flags.isByVal()) {
   2953       // Argument is an aggregate which is passed by value, thus we need to
   2954       // create a copy of it in the local variable space of the current stack
   2955       // frame (which is the stack frame of the caller) and pass the address of
   2956       // this copy to the callee.
   2957       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
   2958       CCValAssign &ByValVA = ByValArgLocs[j++];
   2959       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
   2960 
   2961       // Memory reserved in the local variable space of the callers stack frame.
   2962       unsigned LocMemOffset = ByValVA.getLocMemOffset();
   2963 
   2964       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
   2965       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
   2966 
   2967       // Create a copy of the argument in the local area of the current
   2968       // stack frame.
   2969       SDValue MemcpyCall =
   2970         CreateCopyOfByValArgument(Arg, PtrOff,
   2971                                   CallSeqStart.getNode()->getOperand(0),
   2972                                   Flags, DAG, dl);
   2973 
   2974       // This must go outside the CALLSEQ_START..END.
   2975       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
   2976                            CallSeqStart.getNode()->getOperand(1));
   2977       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
   2978                              NewCallSeqStart.getNode());
   2979       Chain = CallSeqStart = NewCallSeqStart;
   2980 
   2981       // Pass the address of the aggregate copy on the stack either in a
   2982       // physical register or in the parameter list area of the current stack
   2983       // frame to the callee.
   2984       Arg = PtrOff;
   2985     }
   2986 
   2987     if (VA.isRegLoc()) {
   2988       // Put argument in a physical register.
   2989       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
   2990     } else {
   2991       // Put argument in the parameter list area of the current stack frame.
   2992       assert(VA.isMemLoc());
   2993       unsigned LocMemOffset = VA.getLocMemOffset();
   2994 
   2995       if (!isTailCall) {
   2996         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
   2997         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
   2998 
   2999         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
   3000                                            MachinePointerInfo(),
   3001                                            false, false, 0));
   3002       } else {
   3003         // Calculate and remember argument location.
   3004         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
   3005                                  TailCallArguments);
   3006       }
   3007     }
   3008   }
   3009 
   3010   if (!MemOpChains.empty())
   3011     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
   3012                         &MemOpChains[0], MemOpChains.size());
   3013 
   3014   // Set CR6 to true if this is a vararg call.
   3015   if (isVarArg) {
   3016     SDValue SetCR(DAG.getMachineNode(PPC::CRSET, dl, MVT::i32), 0);
   3017     RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR));
   3018   }
   3019 
   3020   // Build a sequence of copy-to-reg nodes chained together with token chain
   3021   // and flag operands which copy the outgoing args into the appropriate regs.
   3022   SDValue InFlag;
   3023   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
   3024     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
   3025                              RegsToPass[i].second, InFlag);
   3026     InFlag = Chain.getValue(1);
   3027   }
   3028 
   3029   if (isTailCall)
   3030     PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
   3031                     false, TailCallArguments);
   3032 
   3033   return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
   3034                     RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
   3035                     Ins, InVals);
   3036 }
   3037 
   3038 SDValue
   3039 PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
   3040                                     CallingConv::ID CallConv, bool isVarArg,
   3041                                     bool isTailCall,
   3042                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
   3043                                     const SmallVectorImpl<SDValue> &OutVals,
   3044                                     const SmallVectorImpl<ISD::InputArg> &Ins,
   3045                                     DebugLoc dl, SelectionDAG &DAG,
   3046                                     SmallVectorImpl<SDValue> &InVals) const {
   3047 
   3048   unsigned NumOps  = Outs.size();
   3049 
   3050   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   3051   bool isPPC64 = PtrVT == MVT::i64;
   3052   unsigned PtrByteSize = isPPC64 ? 8 : 4;
   3053 
   3054   MachineFunction &MF = DAG.getMachineFunction();
   3055 
   3056   // Mark this function as potentially containing a function that contains a
   3057   // tail call. As a consequence the frame pointer will be used for dynamicalloc
   3058   // and restoring the callers stack pointer in this functions epilog. This is
   3059   // done because by tail calling the called function might overwrite the value
   3060   // in this function's (MF) stack pointer stack slot 0(SP).
   3061   if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
   3062     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
   3063 
   3064   unsigned nAltivecParamsAtEnd = 0;
   3065 
   3066   // Count how many bytes are to be pushed on the stack, including the linkage
   3067   // area, and parameter passing area.  We start with 24/48 bytes, which is
   3068   // prereserved space for [SP][CR][LR][3 x unused].
   3069   unsigned NumBytes =
   3070     CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv,
   3071                                          Outs, OutVals,
   3072                                          nAltivecParamsAtEnd);
   3073 
   3074   // Calculate by how many bytes the stack has to be adjusted in case of tail
   3075   // call optimization.
   3076   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
   3077 
   3078   // To protect arguments on the stack from being clobbered in a tail call,
   3079   // force all the loads to happen before doing any other lowering.
   3080   if (isTailCall)
   3081     Chain = DAG.getStackArgumentTokenFactor(Chain);
   3082 
   3083   // Adjust the stack pointer for the new arguments...
   3084   // These operations are automatically eliminated by the prolog/epilog pass
   3085   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
   3086   SDValue CallSeqStart = Chain;
   3087 
   3088   // Load the return address and frame pointer so it can be move somewhere else
   3089   // later.
   3090   SDValue LROp, FPOp;
   3091   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
   3092                                        dl);
   3093 
   3094   // Set up a copy of the stack pointer for use loading and storing any
   3095   // arguments that may not fit in the registers available for argument
   3096   // passing.
   3097   SDValue StackPtr;
   3098   if (isPPC64)
   3099     StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
   3100   else
   3101     StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
   3102 
   3103   // Figure out which arguments are going to go in registers, and which in
   3104   // memory.  Also, if this is a vararg function, floating point operations
   3105   // must be stored to our stack, and loaded into integer regs as well, if
   3106   // any integer regs are available for argument passing.
   3107   unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
   3108   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
   3109 
   3110   static const unsigned GPR_32[] = {           // 32-bit registers.
   3111     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
   3112     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
   3113   };
   3114   static const unsigned GPR_64[] = {           // 64-bit registers.
   3115     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
   3116     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
   3117   };
   3118   static const unsigned *FPR = GetFPR();
   3119 
   3120   static const unsigned VR[] = {
   3121     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
   3122     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
   3123   };
   3124   const unsigned NumGPRs = array_lengthof(GPR_32);
   3125   const unsigned NumFPRs = 13;
   3126   const unsigned NumVRs  = array_lengthof(VR);
   3127 
   3128   const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
   3129 
   3130   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
   3131   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
   3132 
   3133   SmallVector<SDValue, 8> MemOpChains;
   3134   for (unsigned i = 0; i != NumOps; ++i) {
   3135     SDValue Arg = OutVals[i];
   3136     ISD::ArgFlagsTy Flags = Outs[i].Flags;
   3137 
   3138     // PtrOff will be used to store the current argument to the stack if a
   3139     // register cannot be found for it.
   3140     SDValue PtrOff;
   3141 
   3142     PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
   3143 
   3144     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
   3145 
   3146     // On PPC64, promote integers to 64-bit values.
   3147     if (isPPC64 && Arg.getValueType() == MVT::i32) {
   3148       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
   3149       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
   3150       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
   3151     }
   3152 
   3153     // FIXME memcpy is used way more than necessary.  Correctness first.
   3154     if (Flags.isByVal()) {
   3155       unsigned Size = Flags.getByValSize();
   3156       if (Size==1 || Size==2) {
   3157         // Very small objects are passed right-justified.
   3158         // Everything else is passed left-justified.
   3159         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
   3160         if (GPR_idx != NumGPRs) {
   3161           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
   3162                                         MachinePointerInfo(), VT,
   3163                                         false, false, 0);
   3164           MemOpChains.push_back(Load.getValue(1));
   3165           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
   3166 
   3167           ArgOffset += PtrByteSize;
   3168         } else {
   3169           SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType());
   3170           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
   3171           SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
   3172                                 CallSeqStart.getNode()->getOperand(0),
   3173                                 Flags, DAG, dl);
   3174           // This must go outside the CALLSEQ_START..END.
   3175           SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
   3176                                CallSeqStart.getNode()->getOperand(1));
   3177           DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
   3178                                  NewCallSeqStart.getNode());
   3179           Chain = CallSeqStart = NewCallSeqStart;
   3180           ArgOffset += PtrByteSize;
   3181         }
   3182         continue;
   3183       }
   3184       // Copy entire object into memory.  There are cases where gcc-generated
   3185       // code assumes it is there, even if it could be put entirely into
   3186       // registers.  (This is not what the doc says.)
   3187       SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
   3188                             CallSeqStart.getNode()->getOperand(0),
   3189                             Flags, DAG, dl);
   3190       // This must go outside the CALLSEQ_START..END.
   3191       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
   3192                            CallSeqStart.getNode()->getOperand(1));
   3193       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode());
   3194       Chain = CallSeqStart = NewCallSeqStart;
   3195       // And copy the pieces of it that fit into registers.
   3196       for (unsigned j=0; j<Size; j+=PtrByteSize) {
   3197         SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
   3198         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
   3199         if (GPR_idx != NumGPRs) {
   3200           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
   3201                                      MachinePointerInfo(),
   3202                                      false, false, 0);
   3203           MemOpChains.push_back(Load.getValue(1));
   3204           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
   3205           ArgOffset += PtrByteSize;
   3206         } else {
   3207           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
   3208           break;
   3209         }
   3210       }
   3211       continue;
   3212     }
   3213 
   3214     switch (Arg.getValueType().getSimpleVT().SimpleTy) {
   3215     default: llvm_unreachable("Unexpected ValueType for argument!");
   3216     case MVT::i32:
   3217     case MVT::i64:
   3218       if (GPR_idx != NumGPRs) {
   3219         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
   3220       } else {
   3221         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
   3222                          isPPC64, isTailCall, false, MemOpChains,
   3223                          TailCallArguments, dl);
   3224       }
   3225       ArgOffset += PtrByteSize;
   3226       break;
   3227     case MVT::f32:
   3228     case MVT::f64:
   3229       if (FPR_idx != NumFPRs) {
   3230         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
   3231 
   3232         if (isVarArg) {
   3233           SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
   3234                                        MachinePointerInfo(), false, false, 0);
   3235           MemOpChains.push_back(Store);
   3236 
   3237           // Float varargs are always shadowed in available integer registers
   3238           if (GPR_idx != NumGPRs) {
   3239             SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
   3240                                        MachinePointerInfo(), false, false, 0);
   3241             MemOpChains.push_back(Load.getValue(1));
   3242             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
   3243           }
   3244           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
   3245             SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
   3246             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
   3247             SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
   3248                                        MachinePointerInfo(),
   3249                                        false, false, 0);
   3250             MemOpChains.push_back(Load.getValue(1));
   3251             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
   3252           }
   3253         } else {
   3254           // If we have any FPRs remaining, we may also have GPRs remaining.
   3255           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
   3256           // GPRs.
   3257           if (GPR_idx != NumGPRs)
   3258             ++GPR_idx;
   3259           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
   3260               !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
   3261             ++GPR_idx;
   3262         }
   3263       } else {
   3264         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
   3265                          isPPC64, isTailCall, false, MemOpChains,
   3266                          TailCallArguments, dl);
   3267       }
   3268       if (isPPC64)
   3269         ArgOffset += 8;
   3270       else
   3271         ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
   3272       break;
   3273     case MVT::v4f32:
   3274     case MVT::v4i32:
   3275     case MVT::v8i16:
   3276     case MVT::v16i8:
   3277       if (isVarArg) {
   3278         // These go aligned on the stack, or in the corresponding R registers
   3279         // when within range.  The Darwin PPC ABI doc claims they also go in
   3280         // V registers; in fact gcc does this only for arguments that are
   3281         // prototyped, not for those that match the ...  We do it for all
   3282         // arguments, seems to work.
   3283         while (ArgOffset % 16 !=0) {
   3284           ArgOffset += PtrByteSize;
   3285           if (GPR_idx != NumGPRs)
   3286             GPR_idx++;
   3287         }
   3288         // We could elide this store in the case where the object fits
   3289         // entirely in R registers.  Maybe later.
   3290         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
   3291                             DAG.getConstant(ArgOffset, PtrVT));
   3292         SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
   3293                                      MachinePointerInfo(), false, false, 0);
   3294         MemOpChains.push_back(Store);
   3295         if (VR_idx != NumVRs) {
   3296           SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
   3297                                      MachinePointerInfo(),
   3298                                      false, false, 0);
   3299           MemOpChains.push_back(Load.getValue(1));
   3300           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
   3301         }
   3302         ArgOffset += 16;
   3303         for (unsigned i=0; i<16; i+=PtrByteSize) {
   3304           if (GPR_idx == NumGPRs)
   3305             break;
   3306           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
   3307                                   DAG.getConstant(i, PtrVT));
   3308           SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
   3309                                      false, false, 0);
   3310           MemOpChains.push_back(Load.getValue(1));
   3311           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
   3312         }
   3313         break;
   3314       }
   3315 
   3316       // Non-varargs Altivec params generally go in registers, but have
   3317       // stack space allocated at the end.
   3318       if (VR_idx != NumVRs) {
   3319         // Doesn't have GPR space allocated.
   3320         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
   3321       } else if (nAltivecParamsAtEnd==0) {
   3322         // We are emitting Altivec params in order.
   3323         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
   3324                          isPPC64, isTailCall, true, MemOpChains,
   3325                          TailCallArguments, dl);
   3326         ArgOffset += 16;
   3327       }
   3328       break;
   3329     }
   3330   }
   3331   // If all Altivec parameters fit in registers, as they usually do,
   3332   // they get stack space following the non-Altivec parameters.  We
   3333   // don't track this here because nobody below needs it.
   3334   // If there are more Altivec parameters than fit in registers emit
   3335   // the stores here.
   3336   if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
   3337     unsigned j = 0;
   3338     // Offset is aligned; skip 1st 12 params which go in V registers.
   3339     ArgOffset = ((ArgOffset+15)/16)*16;
   3340     ArgOffset += 12*16;
   3341     for (unsigned i = 0; i != NumOps; ++i) {
   3342       SDValue Arg = OutVals[i];
   3343       EVT ArgType = Outs[i].VT;
   3344       if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
   3345           ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
   3346         if (++j > NumVRs) {
   3347           SDValue PtrOff;
   3348           // We are emitting Altivec params in order.
   3349           LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
   3350                            isPPC64, isTailCall, true, MemOpChains,
   3351                            TailCallArguments, dl);
   3352           ArgOffset += 16;
   3353         }
   3354       }
   3355     }
   3356   }
   3357 
   3358   if (!MemOpChains.empty())
   3359     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
   3360                         &MemOpChains[0], MemOpChains.size());
   3361 
   3362   // Check if this is an indirect call (MTCTR/BCTRL).
   3363   // See PrepareCall() for more information about calls through function
   3364   // pointers in the 64-bit SVR4 ABI.
   3365   if (!isTailCall && isPPC64 && PPCSubTarget.isSVR4ABI() &&
   3366       !dyn_cast<GlobalAddressSDNode>(Callee) &&
   3367       !dyn_cast<ExternalSymbolSDNode>(Callee) &&
   3368       !isBLACompatibleAddress(Callee, DAG)) {
   3369     // Load r2 into a virtual register and store it to the TOC save area.
   3370     SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
   3371     // TOC save area offset.
   3372     SDValue PtrOff = DAG.getIntPtrConstant(40);
   3373     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
   3374     Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
   3375                          false, false, 0);
   3376   }
   3377 
   3378   // On Darwin, R12 must contain the address of an indirect callee.  This does
   3379   // not mean the MTCTR instruction must use R12; it's easier to model this as
   3380   // an extra parameter, so do that.
   3381   if (!isTailCall &&
   3382       !dyn_cast<GlobalAddressSDNode>(Callee) &&
   3383       !dyn_cast<ExternalSymbolSDNode>(Callee) &&
   3384       !isBLACompatibleAddress(Callee, DAG))
   3385     RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
   3386                                                    PPC::R12), Callee));
   3387 
   3388   // Build a sequence of copy-to-reg nodes chained together with token chain
   3389   // and flag operands which copy the outgoing args into the appropriate regs.
   3390   SDValue InFlag;
   3391   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
   3392     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
   3393                              RegsToPass[i].second, InFlag);
   3394     InFlag = Chain.getValue(1);
   3395   }
   3396 
   3397   if (isTailCall)
   3398     PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
   3399                     FPOp, true, TailCallArguments);
   3400 
   3401   return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
   3402                     RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
   3403                     Ins, InVals);
   3404 }
   3405 
   3406 SDValue
   3407 PPCTargetLowering::LowerReturn(SDValue Chain,
   3408                                CallingConv::ID CallConv, bool isVarArg,
   3409                                const SmallVectorImpl<ISD::OutputArg> &Outs,
   3410                                const SmallVectorImpl<SDValue> &OutVals,
   3411                                DebugLoc dl, SelectionDAG &DAG) const {
   3412 
   3413   SmallVector<CCValAssign, 16> RVLocs;
   3414   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
   3415 		 getTargetMachine(), RVLocs, *DAG.getContext());
   3416   CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
   3417 
   3418   // If this is the first return lowered for this function, add the regs to the
   3419   // liveout set for the function.
   3420   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
   3421     for (unsigned i = 0; i != RVLocs.size(); ++i)
   3422       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
   3423   }
   3424 
   3425   SDValue Flag;
   3426 
   3427   // Copy the result values into the output registers.
   3428   for (unsigned i = 0; i != RVLocs.size(); ++i) {
   3429     CCValAssign &VA = RVLocs[i];
   3430     assert(VA.isRegLoc() && "Can only return in registers!");
   3431     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
   3432                              OutVals[i], Flag);
   3433     Flag = Chain.getValue(1);
   3434   }
   3435 
   3436   if (Flag.getNode())
   3437     return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
   3438   else
   3439     return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain);
   3440 }
   3441 
   3442 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
   3443                                    const PPCSubtarget &Subtarget) const {
   3444   // When we pop the dynamic allocation we need to restore the SP link.
   3445   DebugLoc dl = Op.getDebugLoc();
   3446 
   3447   // Get the corect type for pointers.
   3448   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   3449 
   3450   // Construct the stack pointer operand.
   3451   bool isPPC64 = Subtarget.isPPC64();
   3452   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
   3453   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
   3454 
   3455   // Get the operands for the STACKRESTORE.
   3456   SDValue Chain = Op.getOperand(0);
   3457   SDValue SaveSP = Op.getOperand(1);
   3458 
   3459   // Load the old link SP.
   3460   SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
   3461                                    MachinePointerInfo(),
   3462                                    false, false, 0);
   3463 
   3464   // Restore the stack pointer.
   3465   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
   3466 
   3467   // Store the old link SP.
   3468   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(),
   3469                       false, false, 0);
   3470 }
   3471 
   3472 
   3473 
   3474 SDValue
   3475 PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
   3476   MachineFunction &MF = DAG.getMachineFunction();
   3477   bool isPPC64 = PPCSubTarget.isPPC64();
   3478   bool isDarwinABI = PPCSubTarget.isDarwinABI();
   3479   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   3480 
   3481   // Get current frame pointer save index.  The users of this index will be
   3482   // primarily DYNALLOC instructions.
   3483   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
   3484   int RASI = FI->getReturnAddrSaveIndex();
   3485 
   3486   // If the frame pointer save index hasn't been defined yet.
   3487   if (!RASI) {
   3488     // Find out what the fix offset of the frame pointer save area.
   3489     int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
   3490     // Allocate the frame index for frame pointer save area.
   3491     RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true);
   3492     // Save the result.
   3493     FI->setReturnAddrSaveIndex(RASI);
   3494   }
   3495   return DAG.getFrameIndex(RASI, PtrVT);
   3496 }
   3497 
   3498 SDValue
   3499 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
   3500   MachineFunction &MF = DAG.getMachineFunction();
   3501   bool isPPC64 = PPCSubTarget.isPPC64();
   3502   bool isDarwinABI = PPCSubTarget.isDarwinABI();
   3503   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   3504 
   3505   // Get current frame pointer save index.  The users of this index will be
   3506   // primarily DYNALLOC instructions.
   3507   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
   3508   int FPSI = FI->getFramePointerSaveIndex();
   3509 
   3510   // If the frame pointer save index hasn't been defined yet.
   3511   if (!FPSI) {
   3512     // Find out what the fix offset of the frame pointer save area.
   3513     int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64,
   3514                                                            isDarwinABI);
   3515 
   3516     // Allocate the frame index for frame pointer save area.
   3517     FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
   3518     // Save the result.
   3519     FI->setFramePointerSaveIndex(FPSI);
   3520   }
   3521   return DAG.getFrameIndex(FPSI, PtrVT);
   3522 }
   3523 
   3524 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
   3525                                          SelectionDAG &DAG,
   3526                                          const PPCSubtarget &Subtarget) const {
   3527   // Get the inputs.
   3528   SDValue Chain = Op.getOperand(0);
   3529   SDValue Size  = Op.getOperand(1);
   3530   DebugLoc dl = Op.getDebugLoc();
   3531 
   3532   // Get the corect type for pointers.
   3533   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   3534   // Negate the size.
   3535   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
   3536                                   DAG.getConstant(0, PtrVT), Size);
   3537   // Construct a node for the frame pointer save index.
   3538   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
   3539   // Build a DYNALLOC node.
   3540   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
   3541   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
   3542   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
   3543 }
   3544 
   3545 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
   3546 /// possible.
   3547 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   3548   // Not FP? Not a fsel.
   3549   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
   3550       !Op.getOperand(2).getValueType().isFloatingPoint())
   3551     return Op;
   3552 
   3553   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
   3554 
   3555   // Cannot handle SETEQ/SETNE.
   3556   if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op;
   3557 
   3558   EVT ResVT = Op.getValueType();
   3559   EVT CmpVT = Op.getOperand(0).getValueType();
   3560   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
   3561   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
   3562   DebugLoc dl = Op.getDebugLoc();
   3563 
   3564   // If the RHS of the comparison is a 0.0, we don't need to do the
   3565   // subtraction at all.
   3566   if (isFloatingPointZero(RHS))
   3567     switch (CC) {
   3568     default: break;       // SETUO etc aren't handled by fsel.
   3569     case ISD::SETULT:
   3570     case ISD::SETLT:
   3571       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
   3572     case ISD::SETOGE:
   3573     case ISD::SETGE:
   3574       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
   3575         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
   3576       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
   3577     case ISD::SETUGT:
   3578     case ISD::SETGT:
   3579       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
   3580     case ISD::SETOLE:
   3581     case ISD::SETLE:
   3582       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
   3583         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
   3584       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
   3585                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
   3586     }
   3587 
   3588   SDValue Cmp;
   3589   switch (CC) {
   3590   default: break;       // SETUO etc aren't handled by fsel.
   3591   case ISD::SETULT:
   3592   case ISD::SETLT:
   3593     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
   3594     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
   3595       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
   3596       return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
   3597   case ISD::SETOGE:
   3598   case ISD::SETGE:
   3599     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
   3600     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
   3601       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
   3602       return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
   3603   case ISD::SETUGT:
   3604   case ISD::SETGT:
   3605     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
   3606     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
   3607       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
   3608       return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
   3609   case ISD::SETOLE:
   3610   case ISD::SETLE:
   3611     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
   3612     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
   3613       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
   3614       return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
   3615   }
   3616   return Op;
   3617 }
   3618 
   3619 // FIXME: Split this code up when LegalizeDAGTypes lands.
   3620 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
   3621                                            DebugLoc dl) const {
   3622   assert(Op.getOperand(0).getValueType().isFloatingPoint());
   3623   SDValue Src = Op.getOperand(0);
   3624   if (Src.getValueType() == MVT::f32)
   3625     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
   3626 
   3627   SDValue Tmp;
   3628   switch (Op.getValueType().getSimpleVT().SimpleTy) {
   3629   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
   3630   case MVT::i32:
   3631     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
   3632                                                          PPCISD::FCTIDZ,
   3633                       dl, MVT::f64, Src);
   3634     break;
   3635   case MVT::i64:
   3636     Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src);
   3637     break;
   3638   }
   3639 
   3640   // Convert the FP value to an int value through memory.
   3641   SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64);
   3642 
   3643   // Emit a store to the stack slot.
   3644   SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
   3645                                MachinePointerInfo(), false, false, 0);
   3646 
   3647   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
   3648   // add in a bias.
   3649   if (Op.getValueType() == MVT::i32)
   3650     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
   3651                         DAG.getConstant(4, FIPtr.getValueType()));
   3652   return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(),
   3653                      false, false, 0);
   3654 }
   3655 
   3656 SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
   3657                                            SelectionDAG &DAG) const {
   3658   DebugLoc dl = Op.getDebugLoc();
   3659   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
   3660   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
   3661     return SDValue();
   3662 
   3663   if (Op.getOperand(0).getValueType() == MVT::i64) {
   3664     SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0));
   3665     SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits);
   3666     if (Op.getValueType() == MVT::f32)
   3667       FP = DAG.getNode(ISD::FP_ROUND, dl,
   3668                        MVT::f32, FP, DAG.getIntPtrConstant(0));
   3669     return FP;
   3670   }
   3671 
   3672   assert(Op.getOperand(0).getValueType() == MVT::i32 &&
   3673          "Unhandled SINT_TO_FP type in custom expander!");
   3674   // Since we only generate this in 64-bit mode, we can take advantage of
   3675   // 64-bit registers.  In particular, sign extend the input value into the
   3676   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
   3677   // then lfd it and fcfid it.
   3678   MachineFunction &MF = DAG.getMachineFunction();
   3679   MachineFrameInfo *FrameInfo = MF.getFrameInfo();
   3680   int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
   3681   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   3682   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
   3683 
   3684   SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32,
   3685                                 Op.getOperand(0));
   3686 
   3687   // STD the extended value into the stack slot.
   3688   MachineMemOperand *MMO =
   3689     MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
   3690                             MachineMemOperand::MOStore, 8, 8);
   3691   SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx };
   3692   SDValue Store =
   3693     DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other),
   3694                             Ops, 4, MVT::i64, MMO);
   3695   // Load the value as a double.
   3696   SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(),
   3697                            false, false, 0);
   3698 
   3699   // FCFID it and return it.
   3700   SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
   3701   if (Op.getValueType() == MVT::f32)
   3702     FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
   3703   return FP;
   3704 }
   3705 
   3706 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
   3707                                             SelectionDAG &DAG) const {
   3708   DebugLoc dl = Op.getDebugLoc();
   3709   /*
   3710    The rounding mode is in bits 30:31 of FPSR, and has the following
   3711    settings:
   3712      00 Round to nearest
   3713      01 Round to 0
   3714      10 Round to +inf
   3715      11 Round to -inf
   3716 
   3717   FLT_ROUNDS, on the other hand, expects the following:
   3718     -1 Undefined
   3719      0 Round to 0
   3720      1 Round to nearest
   3721      2 Round to +inf
   3722      3 Round to -inf
   3723 
   3724   To perform the conversion, we do:
   3725     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
   3726   */
   3727 
   3728   MachineFunction &MF = DAG.getMachineFunction();
   3729   EVT VT = Op.getValueType();
   3730   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   3731   std::vector<EVT> NodeTys;
   3732   SDValue MFFSreg, InFlag;
   3733 
   3734   // Save FP Control Word to register
   3735   NodeTys.push_back(MVT::f64);    // return register
   3736   NodeTys.push_back(MVT::Glue);   // unused in this context
   3737   SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
   3738 
   3739   // Save FP register to stack slot
   3740   int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
   3741   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
   3742   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
   3743                                StackSlot, MachinePointerInfo(), false, false,0);
   3744 
   3745   // Load FP Control Word from low 32 bits of stack slot.
   3746   SDValue Four = DAG.getConstant(4, PtrVT);
   3747   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
   3748   SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
   3749                             false, false, 0);
   3750 
   3751   // Transform as necessary
   3752   SDValue CWD1 =
   3753     DAG.getNode(ISD::AND, dl, MVT::i32,
   3754                 CWD, DAG.getConstant(3, MVT::i32));
   3755   SDValue CWD2 =
   3756     DAG.getNode(ISD::SRL, dl, MVT::i32,
   3757                 DAG.getNode(ISD::AND, dl, MVT::i32,
   3758                             DAG.getNode(ISD::XOR, dl, MVT::i32,
   3759                                         CWD, DAG.getConstant(3, MVT::i32)),
   3760                             DAG.getConstant(3, MVT::i32)),
   3761                 DAG.getConstant(1, MVT::i32));
   3762 
   3763   SDValue RetVal =
   3764     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
   3765 
   3766   return DAG.getNode((VT.getSizeInBits() < 16 ?
   3767                       ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
   3768 }
   3769 
   3770 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
   3771   EVT VT = Op.getValueType();
   3772   unsigned BitWidth = VT.getSizeInBits();
   3773   DebugLoc dl = Op.getDebugLoc();
   3774   assert(Op.getNumOperands() == 3 &&
   3775          VT == Op.getOperand(1).getValueType() &&
   3776          "Unexpected SHL!");
   3777 
   3778   // Expand into a bunch of logical ops.  Note that these ops
   3779   // depend on the PPC behavior for oversized shift amounts.
   3780   SDValue Lo = Op.getOperand(0);
   3781   SDValue Hi = Op.getOperand(1);
   3782   SDValue Amt = Op.getOperand(2);
   3783   EVT AmtVT = Amt.getValueType();
   3784 
   3785   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
   3786                              DAG.getConstant(BitWidth, AmtVT), Amt);
   3787   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
   3788   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
   3789   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
   3790   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
   3791                              DAG.getConstant(-BitWidth, AmtVT));
   3792   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
   3793   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
   3794   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
   3795   SDValue OutOps[] = { OutLo, OutHi };
   3796   return DAG.getMergeValues(OutOps, 2, dl);
   3797 }
   3798 
   3799 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
   3800   EVT VT = Op.getValueType();
   3801   DebugLoc dl = Op.getDebugLoc();
   3802   unsigned BitWidth = VT.getSizeInBits();
   3803   assert(Op.getNumOperands() == 3 &&
   3804          VT == Op.getOperand(1).getValueType() &&
   3805          "Unexpected SRL!");
   3806 
   3807   // Expand into a bunch of logical ops.  Note that these ops
   3808   // depend on the PPC behavior for oversized shift amounts.
   3809   SDValue Lo = Op.getOperand(0);
   3810   SDValue Hi = Op.getOperand(1);
   3811   SDValue Amt = Op.getOperand(2);
   3812   EVT AmtVT = Amt.getValueType();
   3813 
   3814   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
   3815                              DAG.getConstant(BitWidth, AmtVT), Amt);
   3816   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
   3817   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
   3818   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
   3819   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
   3820                              DAG.getConstant(-BitWidth, AmtVT));
   3821   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
   3822   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
   3823   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
   3824   SDValue OutOps[] = { OutLo, OutHi };
   3825   return DAG.getMergeValues(OutOps, 2, dl);
   3826 }
   3827 
   3828 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
   3829   DebugLoc dl = Op.getDebugLoc();
   3830   EVT VT = Op.getValueType();
   3831   unsigned BitWidth = VT.getSizeInBits();
   3832   assert(Op.getNumOperands() == 3 &&
   3833          VT == Op.getOperand(1).getValueType() &&
   3834          "Unexpected SRA!");
   3835 
   3836   // Expand into a bunch of logical ops, followed by a select_cc.
   3837   SDValue Lo = Op.getOperand(0);
   3838   SDValue Hi = Op.getOperand(1);
   3839   SDValue Amt = Op.getOperand(2);
   3840   EVT AmtVT = Amt.getValueType();
   3841 
   3842   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
   3843                              DAG.getConstant(BitWidth, AmtVT), Amt);
   3844   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
   3845   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
   3846   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
   3847   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
   3848                              DAG.getConstant(-BitWidth, AmtVT));
   3849   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
   3850   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
   3851   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
   3852                                   Tmp4, Tmp6, ISD::SETLE);
   3853   SDValue OutOps[] = { OutLo, OutHi };
   3854   return DAG.getMergeValues(OutOps, 2, dl);
   3855 }
   3856 
   3857 //===----------------------------------------------------------------------===//
   3858 // Vector related lowering.
   3859 //
   3860 
   3861 /// BuildSplatI - Build a canonical splati of Val with an element size of
   3862 /// SplatSize.  Cast the result to VT.
   3863 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
   3864                              SelectionDAG &DAG, DebugLoc dl) {
   3865   assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
   3866 
   3867   static const EVT VTys[] = { // canonical VT to use for each size.
   3868     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
   3869   };
   3870 
   3871   EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
   3872 
   3873   // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
   3874   if (Val == -1)
   3875     SplatSize = 1;
   3876 
   3877   EVT CanonicalVT = VTys[SplatSize-1];
   3878 
   3879   // Build a canonical splat for this value.
   3880   SDValue Elt = DAG.getConstant(Val, MVT::i32);
   3881   SmallVector<SDValue, 8> Ops;
   3882   Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
   3883   SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT,
   3884                               &Ops[0], Ops.size());
   3885   return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
   3886 }
   3887 
   3888 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
   3889 /// specified intrinsic ID.
   3890 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
   3891                                 SelectionDAG &DAG, DebugLoc dl,
   3892                                 EVT DestVT = MVT::Other) {
   3893   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
   3894   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
   3895                      DAG.getConstant(IID, MVT::i32), LHS, RHS);
   3896 }
   3897 
   3898 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
   3899 /// specified intrinsic ID.
   3900 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
   3901                                 SDValue Op2, SelectionDAG &DAG,
   3902                                 DebugLoc dl, EVT DestVT = MVT::Other) {
   3903   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
   3904   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
   3905                      DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
   3906 }
   3907 
   3908 
   3909 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
   3910 /// amount.  The result has the specified value type.
   3911 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
   3912                              EVT VT, SelectionDAG &DAG, DebugLoc dl) {
   3913   // Force LHS/RHS to be the right type.
   3914   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
   3915   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
   3916 
   3917   int Ops[16];
   3918   for (unsigned i = 0; i != 16; ++i)
   3919     Ops[i] = i + Amt;
   3920   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
   3921   return DAG.getNode(ISD::BITCAST, dl, VT, T);
   3922 }
   3923 
   3924 // If this is a case we can't handle, return null and let the default
   3925 // expansion code take care of it.  If we CAN select this case, and if it
   3926 // selects to a single instruction, return Op.  Otherwise, if we can codegen
   3927 // this case more efficiently than a constant pool load, lower it to the
   3928 // sequence of ops that should be used.
   3929 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
   3930                                              SelectionDAG &DAG) const {
   3931   DebugLoc dl = Op.getDebugLoc();
   3932   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
   3933   assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
   3934 
   3935   // Check if this is a splat of a constant value.
   3936   APInt APSplatBits, APSplatUndef;
   3937   unsigned SplatBitSize;
   3938   bool HasAnyUndefs;
   3939   if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
   3940                              HasAnyUndefs, 0, true) || SplatBitSize > 32)
   3941     return SDValue();
   3942 
   3943   unsigned SplatBits = APSplatBits.getZExtValue();
   3944   unsigned SplatUndef = APSplatUndef.getZExtValue();
   3945   unsigned SplatSize = SplatBitSize / 8;
   3946 
   3947   // First, handle single instruction cases.
   3948 
   3949   // All zeros?
   3950   if (SplatBits == 0) {
   3951     // Canonicalize all zero vectors to be v4i32.
   3952     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
   3953       SDValue Z = DAG.getConstant(0, MVT::i32);
   3954       Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
   3955       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
   3956     }
   3957     return Op;
   3958   }
   3959 
   3960   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
   3961   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
   3962                     (32-SplatBitSize));
   3963   if (SextVal >= -16 && SextVal <= 15)
   3964     return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
   3965 
   3966 
   3967   // Two instruction sequences.
   3968 
   3969   // If this value is in the range [-32,30] and is even, use:
   3970   //    tmp = VSPLTI[bhw], result = add tmp, tmp
   3971   if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
   3972     SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl);
   3973     Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res);
   3974     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
   3975   }
   3976 
   3977   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
   3978   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
   3979   // for fneg/fabs.
   3980   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
   3981     // Make -1 and vspltisw -1:
   3982     SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
   3983 
   3984     // Make the VSLW intrinsic, computing 0x8000_0000.
   3985     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
   3986                                    OnesV, DAG, dl);
   3987 
   3988     // xor by OnesV to invert it.
   3989     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
   3990     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
   3991   }
   3992 
   3993   // Check to see if this is a wide variety of vsplti*, binop self cases.
   3994   static const signed char SplatCsts[] = {
   3995     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
   3996     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
   3997   };
   3998 
   3999   for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
   4000     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
   4001     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
   4002     int i = SplatCsts[idx];
   4003 
   4004     // Figure out what shift amount will be used by altivec if shifted by i in
   4005     // this splat size.
   4006     unsigned TypeShiftAmt = i & (SplatBitSize-1);
   4007 
   4008     // vsplti + shl self.
   4009     if (SextVal == (i << (int)TypeShiftAmt)) {
   4010       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
   4011       static const unsigned IIDs[] = { // Intrinsic to use for each size.
   4012         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
   4013         Intrinsic::ppc_altivec_vslw
   4014       };
   4015       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
   4016       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
   4017     }
   4018 
   4019     // vsplti + srl self.
   4020     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
   4021       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
   4022       static const unsigned IIDs[] = { // Intrinsic to use for each size.
   4023         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
   4024         Intrinsic::ppc_altivec_vsrw
   4025       };
   4026       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
   4027       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
   4028     }
   4029 
   4030     // vsplti + sra self.
   4031     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
   4032       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
   4033       static const unsigned IIDs[] = { // Intrinsic to use for each size.
   4034         Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
   4035         Intrinsic::ppc_altivec_vsraw
   4036       };
   4037       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
   4038       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
   4039     }
   4040 
   4041     // vsplti + rol self.
   4042     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
   4043                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
   4044       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
   4045       static const unsigned IIDs[] = { // Intrinsic to use for each size.
   4046         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
   4047         Intrinsic::ppc_altivec_vrlw
   4048       };
   4049       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
   4050       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
   4051     }
   4052 
   4053     // t = vsplti c, result = vsldoi t, t, 1
   4054     if (SextVal == ((i << 8) | (i < 0 ? 0xFF : 0))) {
   4055       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
   4056       return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
   4057     }
   4058     // t = vsplti c, result = vsldoi t, t, 2
   4059     if (SextVal == ((i << 16) | (i < 0 ? 0xFFFF : 0))) {
   4060       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
   4061       return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
   4062     }
   4063     // t = vsplti c, result = vsldoi t, t, 3
   4064     if (SextVal == ((i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
   4065       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
   4066       return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
   4067     }
   4068   }
   4069 
   4070   // Three instruction sequences.
   4071 
   4072   // Odd, in range [17,31]:  (vsplti C)-(vsplti -16).
   4073   if (SextVal >= 0 && SextVal <= 31) {
   4074     SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl);
   4075     SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
   4076     LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS);
   4077     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
   4078   }
   4079   // Odd, in range [-31,-17]:  (vsplti C)+(vsplti -16).
   4080   if (SextVal >= -31 && SextVal <= 0) {
   4081     SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl);
   4082     SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
   4083     LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS);
   4084     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
   4085   }
   4086 
   4087   return SDValue();
   4088 }
   4089 
   4090 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
   4091 /// the specified operations to build the shuffle.
   4092 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
   4093                                       SDValue RHS, SelectionDAG &DAG,
   4094                                       DebugLoc dl) {
   4095   unsigned OpNum = (PFEntry >> 26) & 0x0F;
   4096   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
   4097   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
   4098 
   4099   enum {
   4100     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
   4101     OP_VMRGHW,
   4102     OP_VMRGLW,
   4103     OP_VSPLTISW0,
   4104     OP_VSPLTISW1,
   4105     OP_VSPLTISW2,
   4106     OP_VSPLTISW3,
   4107     OP_VSLDOI4,
   4108     OP_VSLDOI8,
   4109     OP_VSLDOI12
   4110   };
   4111 
   4112   if (OpNum == OP_COPY) {
   4113     if (LHSID == (1*9+2)*9+3) return LHS;
   4114     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
   4115     return RHS;
   4116   }
   4117 
   4118   SDValue OpLHS, OpRHS;
   4119   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
   4120   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
   4121 
   4122   int ShufIdxs[16];
   4123   switch (OpNum) {
   4124   default: llvm_unreachable("Unknown i32 permute!");
   4125   case OP_VMRGHW:
   4126     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
   4127     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
   4128     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
   4129     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
   4130     break;
   4131   case OP_VMRGLW:
   4132     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
   4133     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
   4134     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
   4135     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
   4136     break;
   4137   case OP_VSPLTISW0:
   4138     for (unsigned i = 0; i != 16; ++i)
   4139       ShufIdxs[i] = (i&3)+0;
   4140     break;
   4141   case OP_VSPLTISW1:
   4142     for (unsigned i = 0; i != 16; ++i)
   4143       ShufIdxs[i] = (i&3)+4;
   4144     break;
   4145   case OP_VSPLTISW2:
   4146     for (unsigned i = 0; i != 16; ++i)
   4147       ShufIdxs[i] = (i&3)+8;
   4148     break;
   4149   case OP_VSPLTISW3:
   4150     for (unsigned i = 0; i != 16; ++i)
   4151       ShufIdxs[i] = (i&3)+12;
   4152     break;
   4153   case OP_VSLDOI4:
   4154     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
   4155   case OP_VSLDOI8:
   4156     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
   4157   case OP_VSLDOI12:
   4158     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
   4159   }
   4160   EVT VT = OpLHS.getValueType();
   4161   OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
   4162   OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
   4163   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
   4164   return DAG.getNode(ISD::BITCAST, dl, VT, T);
   4165 }
   4166 
   4167 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
   4168 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
   4169 /// return the code it can be lowered into.  Worst case, it can always be
   4170 /// lowered into a vperm.
   4171 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
   4172                                                SelectionDAG &DAG) const {
   4173   DebugLoc dl = Op.getDebugLoc();
   4174   SDValue V1 = Op.getOperand(0);
   4175   SDValue V2 = Op.getOperand(1);
   4176   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
   4177   EVT VT = Op.getValueType();
   4178 
   4179   // Cases that are handled by instructions that take permute immediates
   4180   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
   4181   // selected by the instruction selector.
   4182   if (V2.getOpcode() == ISD::UNDEF) {
   4183     if (PPC::isSplatShuffleMask(SVOp, 1) ||
   4184         PPC::isSplatShuffleMask(SVOp, 2) ||
   4185         PPC::isSplatShuffleMask(SVOp, 4) ||
   4186         PPC::isVPKUWUMShuffleMask(SVOp, true) ||
   4187         PPC::isVPKUHUMShuffleMask(SVOp, true) ||
   4188         PPC::isVSLDOIShuffleMask(SVOp, true) != -1 ||
   4189         PPC::isVMRGLShuffleMask(SVOp, 1, true) ||
   4190         PPC::isVMRGLShuffleMask(SVOp, 2, true) ||
   4191         PPC::isVMRGLShuffleMask(SVOp, 4, true) ||
   4192         PPC::isVMRGHShuffleMask(SVOp, 1, true) ||
   4193         PPC::isVMRGHShuffleMask(SVOp, 2, true) ||
   4194         PPC::isVMRGHShuffleMask(SVOp, 4, true)) {
   4195       return Op;
   4196     }
   4197   }
   4198 
   4199   // Altivec has a variety of "shuffle immediates" that take two vector inputs
   4200   // and produce a fixed permutation.  If any of these match, do not lower to
   4201   // VPERM.
   4202   if (PPC::isVPKUWUMShuffleMask(SVOp, false) ||
   4203       PPC::isVPKUHUMShuffleMask(SVOp, false) ||
   4204       PPC::isVSLDOIShuffleMask(SVOp, false) != -1 ||
   4205       PPC::isVMRGLShuffleMask(SVOp, 1, false) ||
   4206       PPC::isVMRGLShuffleMask(SVOp, 2, false) ||
   4207       PPC::isVMRGLShuffleMask(SVOp, 4, false) ||
   4208       PPC::isVMRGHShuffleMask(SVOp, 1, false) ||
   4209       PPC::isVMRGHShuffleMask(SVOp, 2, false) ||
   4210       PPC::isVMRGHShuffleMask(SVOp, 4, false))
   4211     return Op;
   4212 
   4213   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
   4214   // perfect shuffle table to emit an optimal matching sequence.
   4215   SmallVector<int, 16> PermMask;
   4216   SVOp->getMask(PermMask);
   4217 
   4218   unsigned PFIndexes[4];
   4219   bool isFourElementShuffle = true;
   4220   for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
   4221     unsigned EltNo = 8;   // Start out undef.
   4222     for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
   4223       if (PermMask[i*4+j] < 0)
   4224         continue;   // Undef, ignore it.
   4225 
   4226       unsigned ByteSource = PermMask[i*4+j];
   4227       if ((ByteSource & 3) != j) {
   4228         isFourElementShuffle = false;
   4229         break;
   4230       }
   4231 
   4232       if (EltNo == 8) {
   4233         EltNo = ByteSource/4;
   4234       } else if (EltNo != ByteSource/4) {
   4235         isFourElementShuffle = false;
   4236         break;
   4237       }
   4238     }
   4239     PFIndexes[i] = EltNo;
   4240   }
   4241 
   4242   // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
   4243   // perfect shuffle vector to determine if it is cost effective to do this as
   4244   // discrete instructions, or whether we should use a vperm.
   4245   if (isFourElementShuffle) {
   4246     // Compute the index in the perfect shuffle table.
   4247     unsigned PFTableIndex =
   4248       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
   4249 
   4250     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
   4251     unsigned Cost  = (PFEntry >> 30);
   4252 
   4253     // Determining when to avoid vperm is tricky.  Many things affect the cost
   4254     // of vperm, particularly how many times the perm mask needs to be computed.
   4255     // For example, if the perm mask can be hoisted out of a loop or is already
   4256     // used (perhaps because there are multiple permutes with the same shuffle
   4257     // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
   4258     // the loop requires an extra register.
   4259     //
   4260     // As a compromise, we only emit discrete instructions if the shuffle can be
   4261     // generated in 3 or fewer operations.  When we have loop information
   4262     // available, if this block is within a loop, we should avoid using vperm
   4263     // for 3-operation perms and use a constant pool load instead.
   4264     if (Cost < 3)
   4265       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
   4266   }
   4267 
   4268   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
   4269   // vector that will get spilled to the constant pool.
   4270   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
   4271 
   4272   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
   4273   // that it is in input element units, not in bytes.  Convert now.
   4274   EVT EltVT = V1.getValueType().getVectorElementType();
   4275   unsigned BytesPerElement = EltVT.getSizeInBits()/8;
   4276 
   4277   SmallVector<SDValue, 16> ResultMask;
   4278   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
   4279     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
   4280 
   4281     for (unsigned j = 0; j != BytesPerElement; ++j)
   4282       ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
   4283                                            MVT::i32));
   4284   }
   4285 
   4286   SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
   4287                                     &ResultMask[0], ResultMask.size());
   4288   return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask);
   4289 }
   4290 
   4291 /// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
   4292 /// altivec comparison.  If it is, return true and fill in Opc/isDot with
   4293 /// information about the intrinsic.
   4294 static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
   4295                                   bool &isDot) {
   4296   unsigned IntrinsicID =
   4297     cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
   4298   CompareOpc = -1;
   4299   isDot = false;
   4300   switch (IntrinsicID) {
   4301   default: return false;
   4302     // Comparison predicates.
   4303   case Intrinsic::ppc_altivec_vcmpbfp_p:  CompareOpc = 966; isDot = 1; break;
   4304   case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
   4305   case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc =   6; isDot = 1; break;
   4306   case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc =  70; isDot = 1; break;
   4307   case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
   4308   case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
   4309   case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
   4310   case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
   4311   case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
   4312   case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
   4313   case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
   4314   case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
   4315   case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
   4316 
   4317     // Normal Comparisons.
   4318   case Intrinsic::ppc_altivec_vcmpbfp:    CompareOpc = 966; isDot = 0; break;
   4319   case Intrinsic::ppc_altivec_vcmpeqfp:   CompareOpc = 198; isDot = 0; break;
   4320   case Intrinsic::ppc_altivec_vcmpequb:   CompareOpc =   6; isDot = 0; break;
   4321   case Intrinsic::ppc_altivec_vcmpequh:   CompareOpc =  70; isDot = 0; break;
   4322   case Intrinsic::ppc_altivec_vcmpequw:   CompareOpc = 134; isDot = 0; break;
   4323   case Intrinsic::ppc_altivec_vcmpgefp:   CompareOpc = 454; isDot = 0; break;
   4324   case Intrinsic::ppc_altivec_vcmpgtfp:   CompareOpc = 710; isDot = 0; break;
   4325   case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
   4326   case Intrinsic::ppc_altivec_vcmpgtsh:   CompareOpc = 838; isDot = 0; break;
   4327   case Intrinsic::ppc_altivec_vcmpgtsw:   CompareOpc = 902; isDot = 0; break;
   4328   case Intrinsic::ppc_altivec_vcmpgtub:   CompareOpc = 518; isDot = 0; break;
   4329   case Intrinsic::ppc_altivec_vcmpgtuh:   CompareOpc = 582; isDot = 0; break;
   4330   case Intrinsic::ppc_altivec_vcmpgtuw:   CompareOpc = 646; isDot = 0; break;
   4331   }
   4332   return true;
   4333 }
   4334 
   4335 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
   4336 /// lower, do it, otherwise return null.
   4337 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   4338                                                    SelectionDAG &DAG) const {
   4339   // If this is a lowered altivec predicate compare, CompareOpc is set to the
   4340   // opcode number of the comparison.
   4341   DebugLoc dl = Op.getDebugLoc();
   4342   int CompareOpc;
   4343   bool isDot;
   4344   if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
   4345     return SDValue();    // Don't custom lower most intrinsics.
   4346 
   4347   // If this is a non-dot comparison, make the VCMP node and we are done.
   4348   if (!isDot) {
   4349     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
   4350                               Op.getOperand(1), Op.getOperand(2),
   4351                               DAG.getConstant(CompareOpc, MVT::i32));
   4352     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
   4353   }
   4354 
   4355   // Create the PPCISD altivec 'dot' comparison node.
   4356   SDValue Ops[] = {
   4357     Op.getOperand(2),  // LHS
   4358     Op.getOperand(3),  // RHS
   4359     DAG.getConstant(CompareOpc, MVT::i32)
   4360   };
   4361   std::vector<EVT> VTs;
   4362   VTs.push_back(Op.getOperand(2).getValueType());
   4363   VTs.push_back(MVT::Glue);
   4364   SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
   4365 
   4366   // Now that we have the comparison, emit a copy from the CR to a GPR.
   4367   // This is flagged to the above dot comparison.
   4368   SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32,
   4369                                 DAG.getRegister(PPC::CR6, MVT::i32),
   4370                                 CompNode.getValue(1));
   4371 
   4372   // Unpack the result based on how the target uses it.
   4373   unsigned BitNo;   // Bit # of CR6.
   4374   bool InvertBit;   // Invert result?
   4375   switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
   4376   default:  // Can't happen, don't crash on invalid number though.
   4377   case 0:   // Return the value of the EQ bit of CR6.
   4378     BitNo = 0; InvertBit = false;
   4379     break;
   4380   case 1:   // Return the inverted value of the EQ bit of CR6.
   4381     BitNo = 0; InvertBit = true;
   4382     break;
   4383   case 2:   // Return the value of the LT bit of CR6.
   4384     BitNo = 2; InvertBit = false;
   4385     break;
   4386   case 3:   // Return the inverted value of the LT bit of CR6.
   4387     BitNo = 2; InvertBit = true;
   4388     break;
   4389   }
   4390 
   4391   // Shift the bit into the low position.
   4392   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
   4393                       DAG.getConstant(8-(3-BitNo), MVT::i32));
   4394   // Isolate the bit.
   4395   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
   4396                       DAG.getConstant(1, MVT::i32));
   4397 
   4398   // If we are supposed to, toggle the bit.
   4399   if (InvertBit)
   4400     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
   4401                         DAG.getConstant(1, MVT::i32));
   4402   return Flags;
   4403 }
   4404 
   4405 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
   4406                                                    SelectionDAG &DAG) const {
   4407   DebugLoc dl = Op.getDebugLoc();
   4408   // Create a stack slot that is 16-byte aligned.
   4409   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
   4410   int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
   4411   EVT PtrVT = getPointerTy();
   4412   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
   4413 
   4414   // Store the input value into Value#0 of the stack slot.
   4415   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
   4416                                Op.getOperand(0), FIdx, MachinePointerInfo(),
   4417                                false, false, 0);
   4418   // Load it out.
   4419   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
   4420                      false, false, 0);
   4421 }
   4422 
   4423 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
   4424   DebugLoc dl = Op.getDebugLoc();
   4425   if (Op.getValueType() == MVT::v4i32) {
   4426     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
   4427 
   4428     SDValue Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG, dl);
   4429     SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
   4430 
   4431     SDValue RHSSwap =   // = vrlw RHS, 16
   4432       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
   4433 
   4434     // Shrinkify inputs to v8i16.
   4435     LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
   4436     RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
   4437     RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
   4438 
   4439     // Low parts multiplied together, generating 32-bit results (we ignore the
   4440     // top parts).
   4441     SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
   4442                                         LHS, RHS, DAG, dl, MVT::v4i32);
   4443 
   4444     SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
   4445                                       LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
   4446     // Shift the high parts up 16 bits.
   4447     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
   4448                               Neg16, DAG, dl);
   4449     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
   4450   } else if (Op.getValueType() == MVT::v8i16) {
   4451     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
   4452 
   4453     SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
   4454 
   4455     return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
   4456                             LHS, RHS, Zero, DAG, dl);
   4457   } else if (Op.getValueType() == MVT::v16i8) {
   4458     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
   4459 
   4460     // Multiply the even 8-bit parts, producing 16-bit sums.
   4461     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
   4462                                            LHS, RHS, DAG, dl, MVT::v8i16);
   4463     EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
   4464 
   4465     // Multiply the odd 8-bit parts, producing 16-bit sums.
   4466     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
   4467                                           LHS, RHS, DAG, dl, MVT::v8i16);
   4468     OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
   4469 
   4470     // Merge the results together.
   4471     int Ops[16];
   4472     for (unsigned i = 0; i != 8; ++i) {
   4473       Ops[i*2  ] = 2*i+1;
   4474       Ops[i*2+1] = 2*i+1+16;
   4475     }
   4476     return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
   4477   } else {
   4478     llvm_unreachable("Unknown mul to lower!");
   4479   }
   4480 }
   4481 
   4482 /// LowerOperation - Provide custom lowering hooks for some operations.
   4483 ///
   4484 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   4485   switch (Op.getOpcode()) {
   4486   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
   4487   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
   4488   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
   4489   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
   4490   case ISD::GlobalTLSAddress:   llvm_unreachable("TLS not implemented for PPC");
   4491   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
   4492   case ISD::SETCC:              return LowerSETCC(Op, DAG);
   4493   case ISD::TRAMPOLINE:         return LowerTRAMPOLINE(Op, DAG);
   4494   case ISD::VASTART:
   4495     return LowerVASTART(Op, DAG, PPCSubTarget);
   4496 
   4497   case ISD::VAARG:
   4498     return LowerVAARG(Op, DAG, PPCSubTarget);
   4499 
   4500   case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
   4501   case ISD::DYNAMIC_STACKALLOC:
   4502     return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
   4503 
   4504   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
   4505   case ISD::FP_TO_UINT:
   4506   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
   4507                                                        Op.getDebugLoc());
   4508   case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
   4509   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
   4510 
   4511   // Lower 64-bit shifts.
   4512   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
   4513   case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
   4514   case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
   4515 
   4516   // Vector-related lowering.
   4517   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
   4518   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
   4519   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
   4520   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
   4521   case ISD::MUL:                return LowerMUL(Op, DAG);
   4522 
   4523   // Frame & Return address.
   4524   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
   4525   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
   4526   }
   4527   return SDValue();
   4528 }
   4529 
   4530 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
   4531                                            SmallVectorImpl<SDValue>&Results,
   4532                                            SelectionDAG &DAG) const {
   4533   const TargetMachine &TM = getTargetMachine();
   4534   DebugLoc dl = N->getDebugLoc();
   4535   switch (N->getOpcode()) {
   4536   default:
   4537     assert(false && "Do not know how to custom type legalize this operation!");
   4538     return;
   4539   case ISD::VAARG: {
   4540     if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
   4541         || TM.getSubtarget<PPCSubtarget>().isPPC64())
   4542       return;
   4543 
   4544     EVT VT = N->getValueType(0);
   4545 
   4546     if (VT == MVT::i64) {
   4547       SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget);
   4548 
   4549       Results.push_back(NewNode);
   4550       Results.push_back(NewNode.getValue(1));
   4551     }
   4552     return;
   4553   }
   4554   case ISD::FP_ROUND_INREG: {
   4555     assert(N->getValueType(0) == MVT::ppcf128);
   4556     assert(N->getOperand(0).getValueType() == MVT::ppcf128);
   4557     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
   4558                              MVT::f64, N->getOperand(0),
   4559                              DAG.getIntPtrConstant(0));
   4560     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
   4561                              MVT::f64, N->getOperand(0),
   4562                              DAG.getIntPtrConstant(1));
   4563 
   4564     // This sequence changes FPSCR to do round-to-zero, adds the two halves
   4565     // of the long double, and puts FPSCR back the way it was.  We do not
   4566     // actually model FPSCR.
   4567     std::vector<EVT> NodeTys;
   4568     SDValue Ops[4], Result, MFFSreg, InFlag, FPreg;
   4569 
   4570     NodeTys.push_back(MVT::f64);   // Return register
   4571     NodeTys.push_back(MVT::Glue);    // Returns a flag for later insns
   4572     Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
   4573     MFFSreg = Result.getValue(0);
   4574     InFlag = Result.getValue(1);
   4575 
   4576     NodeTys.clear();
   4577     NodeTys.push_back(MVT::Glue);   // Returns a flag
   4578     Ops[0] = DAG.getConstant(31, MVT::i32);
   4579     Ops[1] = InFlag;
   4580     Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2);
   4581     InFlag = Result.getValue(0);
   4582 
   4583     NodeTys.clear();
   4584     NodeTys.push_back(MVT::Glue);   // Returns a flag
   4585     Ops[0] = DAG.getConstant(30, MVT::i32);
   4586     Ops[1] = InFlag;
   4587     Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2);
   4588     InFlag = Result.getValue(0);
   4589 
   4590     NodeTys.clear();
   4591     NodeTys.push_back(MVT::f64);    // result of add
   4592     NodeTys.push_back(MVT::Glue);   // Returns a flag
   4593     Ops[0] = Lo;
   4594     Ops[1] = Hi;
   4595     Ops[2] = InFlag;
   4596     Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3);
   4597     FPreg = Result.getValue(0);
   4598     InFlag = Result.getValue(1);
   4599 
   4600     NodeTys.clear();
   4601     NodeTys.push_back(MVT::f64);
   4602     Ops[0] = DAG.getConstant(1, MVT::i32);
   4603     Ops[1] = MFFSreg;
   4604     Ops[2] = FPreg;
   4605     Ops[3] = InFlag;
   4606     Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4);
   4607     FPreg = Result.getValue(0);
   4608 
   4609     // We know the low half is about to be thrown away, so just use something
   4610     // convenient.
   4611     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
   4612                                 FPreg, FPreg));
   4613     return;
   4614   }
   4615   case ISD::FP_TO_SINT:
   4616     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
   4617     return;
   4618   }
   4619 }
   4620 
   4621 
   4622 //===----------------------------------------------------------------------===//
   4623 //  Other Lowering Code
   4624 //===----------------------------------------------------------------------===//
   4625 
   4626 MachineBasicBlock *
   4627 PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
   4628                                     bool is64bit, unsigned BinOpcode) const {
   4629   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
   4630   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   4631 
   4632   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   4633   MachineFunction *F = BB->getParent();
   4634   MachineFunction::iterator It = BB;
   4635   ++It;
   4636 
   4637   unsigned dest = MI->getOperand(0).getReg();
   4638   unsigned ptrA = MI->getOperand(1).getReg();
   4639   unsigned ptrB = MI->getOperand(2).getReg();
   4640   unsigned incr = MI->getOperand(3).getReg();
   4641   DebugLoc dl = MI->getDebugLoc();
   4642 
   4643   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
   4644   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
   4645   F->insert(It, loopMBB);
   4646   F->insert(It, exitMBB);
   4647   exitMBB->splice(exitMBB->begin(), BB,
   4648                   llvm::next(MachineBasicBlock::iterator(MI)),
   4649                   BB->end());
   4650   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
   4651 
   4652   MachineRegisterInfo &RegInfo = F->getRegInfo();
   4653   unsigned TmpReg = (!BinOpcode) ? incr :
   4654     RegInfo.createVirtualRegister(
   4655        is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
   4656                  (const TargetRegisterClass *) &PPC::GPRCRegClass);
   4657 
   4658   //  thisMBB:
   4659   //   ...
   4660   //   fallthrough --> loopMBB
   4661   BB->addSuccessor(loopMBB);
   4662 
   4663   //  loopMBB:
   4664   //   l[wd]arx dest, ptr
   4665   //   add r0, dest, incr
   4666   //   st[wd]cx. r0, ptr
   4667   //   bne- loopMBB
   4668   //   fallthrough --> exitMBB
   4669   BB = loopMBB;
   4670   BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
   4671     .addReg(ptrA).addReg(ptrB);
   4672   if (BinOpcode)
   4673     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
   4674   BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
   4675     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
   4676   BuildMI(BB, dl, TII->get(PPC::BCC))
   4677     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
   4678   BB->addSuccessor(loopMBB);
   4679   BB->addSuccessor(exitMBB);
   4680 
   4681   //  exitMBB:
   4682   //   ...
   4683   BB = exitMBB;
   4684   return BB;
   4685 }
   4686 
   4687 MachineBasicBlock *
   4688 PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
   4689                                             MachineBasicBlock *BB,
   4690                                             bool is8bit,    // operation
   4691                                             unsigned BinOpcode) const {
   4692   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
   4693   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   4694   // In 64 bit mode we have to use 64 bits for addresses, even though the
   4695   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
   4696   // registers without caring whether they're 32 or 64, but here we're
   4697   // doing actual arithmetic on the addresses.
   4698   bool is64bit = PPCSubTarget.isPPC64();
   4699   unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
   4700 
   4701   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   4702   MachineFunction *F = BB->getParent();
   4703   MachineFunction::iterator It = BB;
   4704   ++It;
   4705 
   4706   unsigned dest = MI->getOperand(0).getReg();
   4707   unsigned ptrA = MI->getOperand(1).getReg();
   4708   unsigned ptrB = MI->getOperand(2).getReg();
   4709   unsigned incr = MI->getOperand(3).getReg();
   4710   DebugLoc dl = MI->getDebugLoc();
   4711 
   4712   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
   4713   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
   4714   F->insert(It, loopMBB);
   4715   F->insert(It, exitMBB);
   4716   exitMBB->splice(exitMBB->begin(), BB,
   4717                   llvm::next(MachineBasicBlock::iterator(MI)),
   4718                   BB->end());
   4719   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
   4720 
   4721   MachineRegisterInfo &RegInfo = F->getRegInfo();
   4722   const TargetRegisterClass *RC =
   4723     is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
   4724               (const TargetRegisterClass *) &PPC::GPRCRegClass;
   4725   unsigned PtrReg = RegInfo.createVirtualRegister(RC);
   4726   unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
   4727   unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
   4728   unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
   4729   unsigned MaskReg = RegInfo.createVirtualRegister(RC);
   4730   unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
   4731   unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
   4732   unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
   4733   unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
   4734   unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
   4735   unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
   4736   unsigned Ptr1Reg;
   4737   unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
   4738 
   4739   //  thisMBB:
   4740   //   ...
   4741   //   fallthrough --> loopMBB
   4742   BB->addSuccessor(loopMBB);
   4743 
   4744   // The 4-byte load must be aligned, while a char or short may be
   4745   // anywhere in the word.  Hence all this nasty bookkeeping code.
   4746   //   add ptr1, ptrA, ptrB [copy if ptrA==0]
   4747   //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
   4748   //   xori shift, shift1, 24 [16]
   4749   //   rlwinm ptr, ptr1, 0, 0, 29
   4750   //   slw incr2, incr, shift
   4751   //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
   4752   //   slw mask, mask2, shift
   4753   //  loopMBB:
   4754   //   lwarx tmpDest, ptr
   4755   //   add tmp, tmpDest, incr2
   4756   //   andc tmp2, tmpDest, mask
   4757   //   and tmp3, tmp, mask
   4758   //   or tmp4, tmp3, tmp2
   4759   //   stwcx. tmp4, ptr
   4760   //   bne- loopMBB
   4761   //   fallthrough --> exitMBB
   4762   //   srw dest, tmpDest, shift
   4763   if (ptrA != ZeroReg) {
   4764     Ptr1Reg = RegInfo.createVirtualRegister(RC);
   4765     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
   4766       .addReg(ptrA).addReg(ptrB);
   4767   } else {
   4768     Ptr1Reg = ptrB;
   4769   }
   4770   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
   4771       .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
   4772   BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
   4773       .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
   4774   if (is64bit)
   4775     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
   4776       .addReg(Ptr1Reg).addImm(0).addImm(61);
   4777   else
   4778     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
   4779       .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
   4780   BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
   4781       .addReg(incr).addReg(ShiftReg);
   4782   if (is8bit)
   4783     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
   4784   else {
   4785     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
   4786     BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
   4787   }
   4788   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
   4789       .addReg(Mask2Reg).addReg(ShiftReg);
   4790 
   4791   BB = loopMBB;
   4792   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
   4793     .addReg(ZeroReg).addReg(PtrReg);
   4794   if (BinOpcode)
   4795     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
   4796       .addReg(Incr2Reg).addReg(TmpDestReg);
   4797   BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
   4798     .addReg(TmpDestReg).addReg(MaskReg);
   4799   BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
   4800     .addReg(TmpReg).addReg(MaskReg);
   4801   BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
   4802     .addReg(Tmp3Reg).addReg(Tmp2Reg);
   4803   BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
   4804     .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
   4805   BuildMI(BB, dl, TII->get(PPC::BCC))
   4806     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
   4807   BB->addSuccessor(loopMBB);
   4808   BB->addSuccessor(exitMBB);
   4809 
   4810   //  exitMBB:
   4811   //   ...
   4812   BB = exitMBB;
   4813   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
   4814     .addReg(ShiftReg);
   4815   return BB;
   4816 }
   4817 
   4818 MachineBasicBlock *
   4819 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   4820                                                MachineBasicBlock *BB) const {
   4821   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   4822 
   4823   // To "insert" these instructions we actually have to insert their
   4824   // control-flow patterns.
   4825   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   4826   MachineFunction::iterator It = BB;
   4827   ++It;
   4828 
   4829   MachineFunction *F = BB->getParent();
   4830 
   4831   if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
   4832       MI->getOpcode() == PPC::SELECT_CC_I8 ||
   4833       MI->getOpcode() == PPC::SELECT_CC_F4 ||
   4834       MI->getOpcode() == PPC::SELECT_CC_F8 ||
   4835       MI->getOpcode() == PPC::SELECT_CC_VRRC) {
   4836 
   4837     // The incoming instruction knows the destination vreg to set, the
   4838     // condition code register to branch on, the true/false values to
   4839     // select between, and a branch opcode to use.
   4840 
   4841     //  thisMBB:
   4842     //  ...
   4843     //   TrueVal = ...
   4844     //   cmpTY ccX, r1, r2
   4845     //   bCC copy1MBB
   4846     //   fallthrough --> copy0MBB
   4847     MachineBasicBlock *thisMBB = BB;
   4848     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
   4849     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
   4850     unsigned SelectPred = MI->getOperand(4).getImm();
   4851     DebugLoc dl = MI->getDebugLoc();
   4852     F->insert(It, copy0MBB);
   4853     F->insert(It, sinkMBB);
   4854 
   4855     // Transfer the remainder of BB and its successor edges to sinkMBB.
   4856     sinkMBB->splice(sinkMBB->begin(), BB,
   4857                     llvm::next(MachineBasicBlock::iterator(MI)),
   4858                     BB->end());
   4859     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
   4860 
   4861     // Next, add the true and fallthrough blocks as its successors.
   4862     BB->addSuccessor(copy0MBB);
   4863     BB->addSuccessor(sinkMBB);
   4864 
   4865     BuildMI(BB, dl, TII->get(PPC::BCC))
   4866       .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
   4867 
   4868     //  copy0MBB:
   4869     //   %FalseValue = ...
   4870     //   # fallthrough to sinkMBB
   4871     BB = copy0MBB;
   4872 
   4873     // Update machine-CFG edges
   4874     BB->addSuccessor(sinkMBB);
   4875 
   4876     //  sinkMBB:
   4877     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
   4878     //  ...
   4879     BB = sinkMBB;
   4880     BuildMI(*BB, BB->begin(), dl,
   4881             TII->get(PPC::PHI), MI->getOperand(0).getReg())
   4882       .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
   4883       .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
   4884   }
   4885   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
   4886     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
   4887   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
   4888     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
   4889   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
   4890     BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
   4891   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
   4892     BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
   4893 
   4894   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
   4895     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
   4896   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
   4897     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
   4898   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
   4899     BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
   4900   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
   4901     BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
   4902 
   4903   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
   4904     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
   4905   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
   4906     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
   4907   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
   4908     BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
   4909   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
   4910     BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
   4911 
   4912   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
   4913     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
   4914   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
   4915     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
   4916   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
   4917     BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
   4918   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
   4919     BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
   4920 
   4921   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
   4922     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC);
   4923   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
   4924     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC);
   4925   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
   4926     BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC);
   4927   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
   4928     BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8);
   4929 
   4930   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
   4931     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
   4932   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
   4933     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
   4934   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
   4935     BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
   4936   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
   4937     BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
   4938 
   4939   else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
   4940     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
   4941   else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
   4942     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
   4943   else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
   4944     BB = EmitAtomicBinary(MI, BB, false, 0);
   4945   else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
   4946     BB = EmitAtomicBinary(MI, BB, true, 0);
   4947 
   4948   else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
   4949            MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
   4950     bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
   4951 
   4952     unsigned dest   = MI->getOperand(0).getReg();
   4953     unsigned ptrA   = MI->getOperand(1).getReg();
   4954     unsigned ptrB   = MI->getOperand(2).getReg();
   4955     unsigned oldval = MI->getOperand(3).getReg();
   4956     unsigned newval = MI->getOperand(4).getReg();
   4957     DebugLoc dl     = MI->getDebugLoc();
   4958 
   4959     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
   4960     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
   4961     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
   4962     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
   4963     F->insert(It, loop1MBB);
   4964     F->insert(It, loop2MBB);
   4965     F->insert(It, midMBB);
   4966     F->insert(It, exitMBB);
   4967     exitMBB->splice(exitMBB->begin(), BB,
   4968                     llvm::next(MachineBasicBlock::iterator(MI)),
   4969                     BB->end());
   4970     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
   4971 
   4972     //  thisMBB:
   4973     //   ...
   4974     //   fallthrough --> loopMBB
   4975     BB->addSuccessor(loop1MBB);
   4976 
   4977     // loop1MBB:
   4978     //   l[wd]arx dest, ptr
   4979     //   cmp[wd] dest, oldval
   4980     //   bne- midMBB
   4981     // loop2MBB:
   4982     //   st[wd]cx. newval, ptr
   4983     //   bne- loopMBB
   4984     //   b exitBB
   4985     // midMBB:
   4986     //   st[wd]cx. dest, ptr
   4987     // exitBB:
   4988     BB = loop1MBB;
   4989     BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
   4990       .addReg(ptrA).addReg(ptrB);
   4991     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
   4992       .addReg(oldval).addReg(dest);
   4993     BuildMI(BB, dl, TII->get(PPC::BCC))
   4994       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
   4995     BB->addSuccessor(loop2MBB);
   4996     BB->addSuccessor(midMBB);
   4997 
   4998     BB = loop2MBB;
   4999     BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
   5000       .addReg(newval).addReg(ptrA).addReg(ptrB);
   5001     BuildMI(BB, dl, TII->get(PPC::BCC))
   5002       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
   5003     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
   5004     BB->addSuccessor(loop1MBB);
   5005     BB->addSuccessor(exitMBB);
   5006 
   5007     BB = midMBB;
   5008     BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
   5009       .addReg(dest).addReg(ptrA).addReg(ptrB);
   5010     BB->addSuccessor(exitMBB);
   5011 
   5012     //  exitMBB:
   5013     //   ...
   5014     BB = exitMBB;
   5015   } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
   5016              MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
   5017     // We must use 64-bit registers for addresses when targeting 64-bit,
   5018     // since we're actually doing arithmetic on them.  Other registers
   5019     // can be 32-bit.
   5020     bool is64bit = PPCSubTarget.isPPC64();
   5021     bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
   5022 
   5023     unsigned dest   = MI->getOperand(0).getReg();
   5024     unsigned ptrA   = MI->getOperand(1).getReg();
   5025     unsigned ptrB   = MI->getOperand(2).getReg();
   5026     unsigned oldval = MI->getOperand(3).getReg();
   5027     unsigned newval = MI->getOperand(4).getReg();
   5028     DebugLoc dl     = MI->getDebugLoc();
   5029 
   5030     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
   5031     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
   5032     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
   5033     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
   5034     F->insert(It, loop1MBB);
   5035     F->insert(It, loop2MBB);
   5036     F->insert(It, midMBB);
   5037     F->insert(It, exitMBB);
   5038     exitMBB->splice(exitMBB->begin(), BB,
   5039                     llvm::next(MachineBasicBlock::iterator(MI)),
   5040                     BB->end());
   5041     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
   5042 
   5043     MachineRegisterInfo &RegInfo = F->getRegInfo();
   5044     const TargetRegisterClass *RC =
   5045       is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
   5046                 (const TargetRegisterClass *) &PPC::GPRCRegClass;
   5047     unsigned PtrReg = RegInfo.createVirtualRegister(RC);
   5048     unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
   5049     unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
   5050     unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
   5051     unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
   5052     unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
   5053     unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
   5054     unsigned MaskReg = RegInfo.createVirtualRegister(RC);
   5055     unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
   5056     unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
   5057     unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
   5058     unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
   5059     unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
   5060     unsigned Ptr1Reg;
   5061     unsigned TmpReg = RegInfo.createVirtualRegister(RC);
   5062     unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
   5063     //  thisMBB:
   5064     //   ...
   5065     //   fallthrough --> loopMBB
   5066     BB->addSuccessor(loop1MBB);
   5067 
   5068     // The 4-byte load must be aligned, while a char or short may be
   5069     // anywhere in the word.  Hence all this nasty bookkeeping code.
   5070     //   add ptr1, ptrA, ptrB [copy if ptrA==0]
   5071     //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
   5072     //   xori shift, shift1, 24 [16]
   5073     //   rlwinm ptr, ptr1, 0, 0, 29
   5074     //   slw newval2, newval, shift
   5075     //   slw oldval2, oldval,shift
   5076     //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
   5077     //   slw mask, mask2, shift
   5078     //   and newval3, newval2, mask
   5079     //   and oldval3, oldval2, mask
   5080     // loop1MBB:
   5081     //   lwarx tmpDest, ptr
   5082     //   and tmp, tmpDest, mask
   5083     //   cmpw tmp, oldval3
   5084     //   bne- midMBB
   5085     // loop2MBB:
   5086     //   andc tmp2, tmpDest, mask
   5087     //   or tmp4, tmp2, newval3
   5088     //   stwcx. tmp4, ptr
   5089     //   bne- loop1MBB
   5090     //   b exitBB
   5091     // midMBB:
   5092     //   stwcx. tmpDest, ptr
   5093     // exitBB:
   5094     //   srw dest, tmpDest, shift
   5095     if (ptrA != ZeroReg) {
   5096       Ptr1Reg = RegInfo.createVirtualRegister(RC);
   5097       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
   5098         .addReg(ptrA).addReg(ptrB);
   5099     } else {
   5100       Ptr1Reg = ptrB;
   5101     }
   5102     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
   5103         .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
   5104     BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
   5105         .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
   5106     if (is64bit)
   5107       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
   5108         .addReg(Ptr1Reg).addImm(0).addImm(61);
   5109     else
   5110       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
   5111         .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
   5112     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
   5113         .addReg(newval).addReg(ShiftReg);
   5114     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
   5115         .addReg(oldval).addReg(ShiftReg);
   5116     if (is8bit)
   5117       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
   5118     else {
   5119       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
   5120       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
   5121         .addReg(Mask3Reg).addImm(65535);
   5122     }
   5123     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
   5124         .addReg(Mask2Reg).addReg(ShiftReg);
   5125     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
   5126         .addReg(NewVal2Reg).addReg(MaskReg);
   5127     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
   5128         .addReg(OldVal2Reg).addReg(MaskReg);
   5129 
   5130     BB = loop1MBB;
   5131     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
   5132         .addReg(ZeroReg).addReg(PtrReg);
   5133     BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
   5134         .addReg(TmpDestReg).addReg(MaskReg);
   5135     BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
   5136         .addReg(TmpReg).addReg(OldVal3Reg);
   5137     BuildMI(BB, dl, TII->get(PPC::BCC))
   5138         .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
   5139     BB->addSuccessor(loop2MBB);
   5140     BB->addSuccessor(midMBB);
   5141 
   5142     BB = loop2MBB;
   5143     BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
   5144         .addReg(TmpDestReg).addReg(MaskReg);
   5145     BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
   5146         .addReg(Tmp2Reg).addReg(NewVal3Reg);
   5147     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
   5148         .addReg(ZeroReg).addReg(PtrReg);
   5149     BuildMI(BB, dl, TII->get(PPC::BCC))
   5150       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
   5151     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
   5152     BB->addSuccessor(loop1MBB);
   5153     BB->addSuccessor(exitMBB);
   5154 
   5155     BB = midMBB;
   5156     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
   5157       .addReg(ZeroReg).addReg(PtrReg);
   5158     BB->addSuccessor(exitMBB);
   5159 
   5160     //  exitMBB:
   5161     //   ...
   5162     BB = exitMBB;
   5163     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
   5164       .addReg(ShiftReg);
   5165   } else {
   5166     llvm_unreachable("Unexpected instr type to insert");
   5167   }
   5168 
   5169   MI->eraseFromParent();   // The pseudo instruction is gone now.
   5170   return BB;
   5171 }
   5172 
   5173 //===----------------------------------------------------------------------===//
   5174 // Target Optimization Hooks
   5175 //===----------------------------------------------------------------------===//
   5176 
   5177 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
   5178                                              DAGCombinerInfo &DCI) const {
   5179   const TargetMachine &TM = getTargetMachine();
   5180   SelectionDAG &DAG = DCI.DAG;
   5181   DebugLoc dl = N->getDebugLoc();
   5182   switch (N->getOpcode()) {
   5183   default: break;
   5184   case PPCISD::SHL:
   5185     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
   5186       if (C->isNullValue())   // 0 << V -> 0.
   5187         return N->getOperand(0);
   5188     }
   5189     break;
   5190   case PPCISD::SRL:
   5191     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
   5192       if (C->isNullValue())   // 0 >>u V -> 0.
   5193         return N->getOperand(0);
   5194     }
   5195     break;
   5196   case PPCISD::SRA:
   5197     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
   5198       if (C->isNullValue() ||   //  0 >>s V -> 0.
   5199           C->isAllOnesValue())    // -1 >>s V -> -1.
   5200         return N->getOperand(0);
   5201     }
   5202     break;
   5203 
   5204   case ISD::SINT_TO_FP:
   5205     if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
   5206       if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
   5207         // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
   5208         // We allow the src/dst to be either f32/f64, but the intermediate
   5209         // type must be i64.
   5210         if (N->getOperand(0).getValueType() == MVT::i64 &&
   5211             N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) {
   5212           SDValue Val = N->getOperand(0).getOperand(0);
   5213           if (Val.getValueType() == MVT::f32) {
   5214             Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
   5215             DCI.AddToWorklist(Val.getNode());
   5216           }
   5217 
   5218           Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val);
   5219           DCI.AddToWorklist(Val.getNode());
   5220           Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val);
   5221           DCI.AddToWorklist(Val.getNode());
   5222           if (N->getValueType(0) == MVT::f32) {
   5223             Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val,
   5224                               DAG.getIntPtrConstant(0));
   5225             DCI.AddToWorklist(Val.getNode());
   5226           }
   5227           return Val;
   5228         } else if (N->getOperand(0).getValueType() == MVT::i32) {
   5229           // If the intermediate type is i32, we can avoid the load/store here
   5230           // too.
   5231         }
   5232       }
   5233     }
   5234     break;
   5235   case ISD::STORE:
   5236     // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
   5237     if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
   5238         !cast<StoreSDNode>(N)->isTruncatingStore() &&
   5239         N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
   5240         N->getOperand(1).getValueType() == MVT::i32 &&
   5241         N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
   5242       SDValue Val = N->getOperand(1).getOperand(0);
   5243       if (Val.getValueType() == MVT::f32) {
   5244         Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
   5245         DCI.AddToWorklist(Val.getNode());
   5246       }
   5247       Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
   5248       DCI.AddToWorklist(Val.getNode());
   5249 
   5250       Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val,
   5251                         N->getOperand(2), N->getOperand(3));
   5252       DCI.AddToWorklist(Val.getNode());
   5253       return Val;
   5254     }
   5255 
   5256     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
   5257     if (cast<StoreSDNode>(N)->isUnindexed() &&
   5258         N->getOperand(1).getOpcode() == ISD::BSWAP &&
   5259         N->getOperand(1).getNode()->hasOneUse() &&
   5260         (N->getOperand(1).getValueType() == MVT::i32 ||
   5261          N->getOperand(1).getValueType() == MVT::i16)) {
   5262       SDValue BSwapOp = N->getOperand(1).getOperand(0);
   5263       // Do an any-extend to 32-bits if this is a half-word input.
   5264       if (BSwapOp.getValueType() == MVT::i16)
   5265         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
   5266 
   5267       SDValue Ops[] = {
   5268         N->getOperand(0), BSwapOp, N->getOperand(2),
   5269         DAG.getValueType(N->getOperand(1).getValueType())
   5270       };
   5271       return
   5272         DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
   5273                                 Ops, array_lengthof(Ops),
   5274                                 cast<StoreSDNode>(N)->getMemoryVT(),
   5275                                 cast<StoreSDNode>(N)->getMemOperand());
   5276     }
   5277     break;
   5278   case ISD::BSWAP:
   5279     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
   5280     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
   5281         N->getOperand(0).hasOneUse() &&
   5282         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) {
   5283       SDValue Load = N->getOperand(0);
   5284       LoadSDNode *LD = cast<LoadSDNode>(Load);
   5285       // Create the byte-swapping load.
   5286       SDValue Ops[] = {
   5287         LD->getChain(),    // Chain
   5288         LD->getBasePtr(),  // Ptr
   5289         DAG.getValueType(N->getValueType(0)) // VT
   5290       };
   5291       SDValue BSLoad =
   5292         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
   5293                                 DAG.getVTList(MVT::i32, MVT::Other), Ops, 3,
   5294                                 LD->getMemoryVT(), LD->getMemOperand());
   5295 
   5296       // If this is an i16 load, insert the truncate.
   5297       SDValue ResVal = BSLoad;
   5298       if (N->getValueType(0) == MVT::i16)
   5299         ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
   5300 
   5301       // First, combine the bswap away.  This makes the value produced by the
   5302       // load dead.
   5303       DCI.CombineTo(N, ResVal);
   5304 
   5305       // Next, combine the load away, we give it a bogus result value but a real
   5306       // chain result.  The result value is dead because the bswap is dead.
   5307       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
   5308 
   5309       // Return N so it doesn't get rechecked!
   5310       return SDValue(N, 0);
   5311     }
   5312 
   5313     break;
   5314   case PPCISD::VCMP: {
   5315     // If a VCMPo node already exists with exactly the same operands as this
   5316     // node, use its result instead of this node (VCMPo computes both a CR6 and
   5317     // a normal output).
   5318     //
   5319     if (!N->getOperand(0).hasOneUse() &&
   5320         !N->getOperand(1).hasOneUse() &&
   5321         !N->getOperand(2).hasOneUse()) {
   5322 
   5323       // Scan all of the users of the LHS, looking for VCMPo's that match.
   5324       SDNode *VCMPoNode = 0;
   5325 
   5326       SDNode *LHSN = N->getOperand(0).getNode();
   5327       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
   5328            UI != E; ++UI)
   5329         if (UI->getOpcode() == PPCISD::VCMPo &&
   5330             UI->getOperand(1) == N->getOperand(1) &&
   5331             UI->getOperand(2) == N->getOperand(2) &&
   5332             UI->getOperand(0) == N->getOperand(0)) {
   5333           VCMPoNode = *UI;
   5334           break;
   5335         }
   5336 
   5337       // If there is no VCMPo node, or if the flag value has a single use, don't
   5338       // transform this.
   5339       if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
   5340         break;
   5341 
   5342       // Look at the (necessarily single) use of the flag value.  If it has a
   5343       // chain, this transformation is more complex.  Note that multiple things
   5344       // could use the value result, which we should ignore.
   5345       SDNode *FlagUser = 0;
   5346       for (SDNode::use_iterator UI = VCMPoNode->use_begin();
   5347            FlagUser == 0; ++UI) {
   5348         assert(UI != VCMPoNode->use_end() && "Didn't find user!");
   5349         SDNode *User = *UI;
   5350         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
   5351           if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
   5352             FlagUser = User;
   5353             break;
   5354           }
   5355         }
   5356       }
   5357 
   5358       // If the user is a MFCR instruction, we know this is safe.  Otherwise we
   5359       // give up for right now.
   5360       if (FlagUser->getOpcode() == PPCISD::MFCR)
   5361         return SDValue(VCMPoNode, 0);
   5362     }
   5363     break;
   5364   }
   5365   case ISD::BR_CC: {
   5366     // If this is a branch on an altivec predicate comparison, lower this so
   5367     // that we don't have to do a MFCR: instead, branch directly on CR6.  This
   5368     // lowering is done pre-legalize, because the legalizer lowers the predicate
   5369     // compare down to code that is difficult to reassemble.
   5370     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
   5371     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
   5372     int CompareOpc;
   5373     bool isDot;
   5374 
   5375     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
   5376         isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
   5377         getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
   5378       assert(isDot && "Can't compare against a vector result!");
   5379 
   5380       // If this is a comparison against something other than 0/1, then we know
   5381       // that the condition is never/always true.
   5382       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
   5383       if (Val != 0 && Val != 1) {
   5384         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
   5385           return N->getOperand(0);
   5386         // Always !=, turn it into an unconditional branch.
   5387         return DAG.getNode(ISD::BR, dl, MVT::Other,
   5388                            N->getOperand(0), N->getOperand(4));
   5389       }
   5390 
   5391       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
   5392 
   5393       // Create the PPCISD altivec 'dot' comparison node.
   5394       std::vector<EVT> VTs;
   5395       SDValue Ops[] = {
   5396         LHS.getOperand(2),  // LHS of compare
   5397         LHS.getOperand(3),  // RHS of compare
   5398         DAG.getConstant(CompareOpc, MVT::i32)
   5399       };
   5400       VTs.push_back(LHS.getOperand(2).getValueType());
   5401       VTs.push_back(MVT::Glue);
   5402       SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
   5403 
   5404       // Unpack the result based on how the target uses it.
   5405       PPC::Predicate CompOpc;
   5406       switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
   5407       default:  // Can't happen, don't crash on invalid number though.
   5408       case 0:   // Branch on the value of the EQ bit of CR6.
   5409         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
   5410         break;
   5411       case 1:   // Branch on the inverted value of the EQ bit of CR6.
   5412         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
   5413         break;
   5414       case 2:   // Branch on the value of the LT bit of CR6.
   5415         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
   5416         break;
   5417       case 3:   // Branch on the inverted value of the LT bit of CR6.
   5418         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
   5419         break;
   5420       }
   5421 
   5422       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
   5423                          DAG.getConstant(CompOpc, MVT::i32),
   5424                          DAG.getRegister(PPC::CR6, MVT::i32),
   5425                          N->getOperand(4), CompNode.getValue(1));
   5426     }
   5427     break;
   5428   }
   5429   }
   5430 
   5431   return SDValue();
   5432 }
   5433 
   5434 //===----------------------------------------------------------------------===//
   5435 // Inline Assembly Support
   5436 //===----------------------------------------------------------------------===//
   5437 
   5438 void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
   5439                                                        const APInt &Mask,
   5440                                                        APInt &KnownZero,
   5441                                                        APInt &KnownOne,
   5442                                                        const SelectionDAG &DAG,
   5443                                                        unsigned Depth) const {
   5444   KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
   5445   switch (Op.getOpcode()) {
   5446   default: break;
   5447   case PPCISD::LBRX: {
   5448     // lhbrx is known to have the top bits cleared out.
   5449     if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
   5450       KnownZero = 0xFFFF0000;
   5451     break;
   5452   }
   5453   case ISD::INTRINSIC_WO_CHAIN: {
   5454     switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
   5455     default: break;
   5456     case Intrinsic::ppc_altivec_vcmpbfp_p:
   5457     case Intrinsic::ppc_altivec_vcmpeqfp_p:
   5458     case Intrinsic::ppc_altivec_vcmpequb_p:
   5459     case Intrinsic::ppc_altivec_vcmpequh_p:
   5460     case Intrinsic::ppc_altivec_vcmpequw_p:
   5461     case Intrinsic::ppc_altivec_vcmpgefp_p:
   5462     case Intrinsic::ppc_altivec_vcmpgtfp_p:
   5463     case Intrinsic::ppc_altivec_vcmpgtsb_p:
   5464     case Intrinsic::ppc_altivec_vcmpgtsh_p:
   5465     case Intrinsic::ppc_altivec_vcmpgtsw_p:
   5466     case Intrinsic::ppc_altivec_vcmpgtub_p:
   5467     case Intrinsic::ppc_altivec_vcmpgtuh_p:
   5468     case Intrinsic::ppc_altivec_vcmpgtuw_p:
   5469       KnownZero = ~1U;  // All bits but the low one are known to be zero.
   5470       break;
   5471     }
   5472   }
   5473   }
   5474 }
   5475 
   5476 
   5477 /// getConstraintType - Given a constraint, return the type of
   5478 /// constraint it is for this target.
   5479 PPCTargetLowering::ConstraintType
   5480 PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
   5481   if (Constraint.size() == 1) {
   5482     switch (Constraint[0]) {
   5483     default: break;
   5484     case 'b':
   5485     case 'r':
   5486     case 'f':
   5487     case 'v':
   5488     case 'y':
   5489       return C_RegisterClass;
   5490     }
   5491   }
   5492   return TargetLowering::getConstraintType(Constraint);
   5493 }
   5494 
   5495 /// Examine constraint type and operand type and determine a weight value.
   5496 /// This object must already have been set up with the operand type
   5497 /// and the current alternative constraint selected.
   5498 TargetLowering::ConstraintWeight
   5499 PPCTargetLowering::getSingleConstraintMatchWeight(
   5500     AsmOperandInfo &info, const char *constraint) const {
   5501   ConstraintWeight weight = CW_Invalid;
   5502   Value *CallOperandVal = info.CallOperandVal;
   5503     // If we don't have a value, we can't do a match,
   5504     // but allow it at the lowest weight.
   5505   if (CallOperandVal == NULL)
   5506     return CW_Default;
   5507   Type *type = CallOperandVal->getType();
   5508   // Look at the constraint type.
   5509   switch (*constraint) {
   5510   default:
   5511     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
   5512     break;
   5513   case 'b':
   5514     if (type->isIntegerTy())
   5515       weight = CW_Register;
   5516     break;
   5517   case 'f':
   5518     if (type->isFloatTy())
   5519       weight = CW_Register;
   5520     break;
   5521   case 'd':
   5522     if (type->isDoubleTy())
   5523       weight = CW_Register;
   5524     break;
   5525   case 'v':
   5526     if (type->isVectorTy())
   5527       weight = CW_Register;
   5528     break;
   5529   case 'y':
   5530     weight = CW_Register;
   5531     break;
   5532   }
   5533   return weight;
   5534 }
   5535 
   5536 std::pair<unsigned, const TargetRegisterClass*>
   5537 PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
   5538                                                 EVT VT) const {
   5539   if (Constraint.size() == 1) {
   5540     // GCC RS6000 Constraint Letters
   5541     switch (Constraint[0]) {
   5542     case 'b':   // R1-R31
   5543     case 'r':   // R0-R31
   5544       if (VT == MVT::i64 && PPCSubTarget.isPPC64())
   5545         return std::make_pair(0U, PPC::G8RCRegisterClass);
   5546       return std::make_pair(0U, PPC::GPRCRegisterClass);
   5547     case 'f':
   5548       if (VT == MVT::f32)
   5549         return std::make_pair(0U, PPC::F4RCRegisterClass);
   5550       else if (VT == MVT::f64)
   5551         return std::make_pair(0U, PPC::F8RCRegisterClass);
   5552       break;
   5553     case 'v':
   5554       return std::make_pair(0U, PPC::VRRCRegisterClass);
   5555     case 'y':   // crrc
   5556       return std::make_pair(0U, PPC::CRRCRegisterClass);
   5557     }
   5558   }
   5559 
   5560   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
   5561 }
   5562 
   5563 
   5564 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
   5565 /// vector.  If it is invalid, don't add anything to Ops.
   5566 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   5567                                                      std::string &Constraint,
   5568                                                      std::vector<SDValue>&Ops,
   5569                                                      SelectionDAG &DAG) const {
   5570   SDValue Result(0,0);
   5571 
   5572   // Only support length 1 constraints.
   5573   if (Constraint.length() > 1) return;
   5574 
   5575   char Letter = Constraint[0];
   5576   switch (Letter) {
   5577   default: break;
   5578   case 'I':
   5579   case 'J':
   5580   case 'K':
   5581   case 'L':
   5582   case 'M':
   5583   case 'N':
   5584   case 'O':
   5585   case 'P': {
   5586     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
   5587     if (!CST) return; // Must be an immediate to match.
   5588     unsigned Value = CST->getZExtValue();
   5589     switch (Letter) {
   5590     default: llvm_unreachable("Unknown constraint letter!");
   5591     case 'I':  // "I" is a signed 16-bit constant.
   5592       if ((short)Value == (int)Value)
   5593         Result = DAG.getTargetConstant(Value, Op.getValueType());
   5594       break;
   5595     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
   5596     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
   5597       if ((short)Value == 0)
   5598         Result = DAG.getTargetConstant(Value, Op.getValueType());
   5599       break;
   5600     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
   5601       if ((Value >> 16) == 0)
   5602         Result = DAG.getTargetConstant(Value, Op.getValueType());
   5603       break;
   5604     case 'M':  // "M" is a constant that is greater than 31.
   5605       if (Value > 31)
   5606         Result = DAG.getTargetConstant(Value, Op.getValueType());
   5607       break;
   5608     case 'N':  // "N" is a positive constant that is an exact power of two.
   5609       if ((int)Value > 0 && isPowerOf2_32(Value))
   5610         Result = DAG.getTargetConstant(Value, Op.getValueType());
   5611       break;
   5612     case 'O':  // "O" is the constant zero.
   5613       if (Value == 0)
   5614         Result = DAG.getTargetConstant(Value, Op.getValueType());
   5615       break;
   5616     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
   5617       if ((short)-Value == (int)-Value)
   5618         Result = DAG.getTargetConstant(Value, Op.getValueType());
   5619       break;
   5620     }
   5621     break;
   5622   }
   5623   }
   5624 
   5625   if (Result.getNode()) {
   5626     Ops.push_back(Result);
   5627     return;
   5628   }
   5629 
   5630   // Handle standard constraint letters.
   5631   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
   5632 }
   5633 
   5634 // isLegalAddressingMode - Return true if the addressing mode represented
   5635 // by AM is legal for this target, for a load/store of the specified type.
   5636 bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
   5637                                               Type *Ty) const {
   5638   // FIXME: PPC does not allow r+i addressing modes for vectors!
   5639 
   5640   // PPC allows a sign-extended 16-bit immediate field.
   5641   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
   5642     return false;
   5643 
   5644   // No global is ever allowed as a base.
   5645   if (AM.BaseGV)
   5646     return false;
   5647 
   5648   // PPC only support r+r,
   5649   switch (AM.Scale) {
   5650   case 0:  // "r+i" or just "i", depending on HasBaseReg.
   5651     break;
   5652   case 1:
   5653     if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
   5654       return false;
   5655     // Otherwise we have r+r or r+i.
   5656     break;
   5657   case 2:
   5658     if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
   5659       return false;
   5660     // Allow 2*r as r+r.
   5661     break;
   5662   default:
   5663     // No other scales are supported.
   5664     return false;
   5665   }
   5666 
   5667   return true;
   5668 }
   5669 
   5670 /// isLegalAddressImmediate - Return true if the integer value can be used
   5671 /// as the offset of the target addressing mode for load / store of the
   5672 /// given type.
   5673 bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{
   5674   // PPC allows a sign-extended 16-bit immediate field.
   5675   return (V > -(1 << 16) && V < (1 << 16)-1);
   5676 }
   5677 
   5678 bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
   5679   return false;
   5680 }
   5681 
   5682 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
   5683                                            SelectionDAG &DAG) const {
   5684   MachineFunction &MF = DAG.getMachineFunction();
   5685   MachineFrameInfo *MFI = MF.getFrameInfo();
   5686   MFI->setReturnAddressIsTaken(true);
   5687 
   5688   DebugLoc dl = Op.getDebugLoc();
   5689   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   5690 
   5691   // Make sure the function does not optimize away the store of the RA to
   5692   // the stack.
   5693   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
   5694   FuncInfo->setLRStoreRequired();
   5695   bool isPPC64 = PPCSubTarget.isPPC64();
   5696   bool isDarwinABI = PPCSubTarget.isDarwinABI();
   5697 
   5698   if (Depth > 0) {
   5699     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
   5700     SDValue Offset =
   5701 
   5702       DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI),
   5703                       isPPC64? MVT::i64 : MVT::i32);
   5704     return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
   5705                        DAG.getNode(ISD::ADD, dl, getPointerTy(),
   5706                                    FrameAddr, Offset),
   5707                        MachinePointerInfo(), false, false, 0);
   5708   }
   5709 
   5710   // Just load the return address off the stack.
   5711   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
   5712   return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
   5713                      RetAddrFI, MachinePointerInfo(), false, false, 0);
   5714 }
   5715 
   5716 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
   5717                                           SelectionDAG &DAG) const {
   5718   DebugLoc dl = Op.getDebugLoc();
   5719   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   5720 
   5721   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   5722   bool isPPC64 = PtrVT == MVT::i64;
   5723 
   5724   MachineFunction &MF = DAG.getMachineFunction();
   5725   MachineFrameInfo *MFI = MF.getFrameInfo();
   5726   MFI->setFrameAddressIsTaken(true);
   5727   bool is31 = (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) &&
   5728                   MFI->getStackSize() &&
   5729                   !MF.getFunction()->hasFnAttr(Attribute::Naked);
   5730   unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
   5731                                 (is31 ? PPC::R31 : PPC::R1);
   5732   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
   5733                                          PtrVT);
   5734   while (Depth--)
   5735     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
   5736                             FrameAddr, MachinePointerInfo(), false, false, 0);
   5737   return FrameAddr;
   5738 }
   5739 
   5740 bool
   5741 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   5742   // The PowerPC target isn't yet aware of offsets.
   5743   return false;
   5744 }
   5745 
   5746 /// getOptimalMemOpType - Returns the target specific optimal type for load
   5747 /// and store operations as a result of memset, memcpy, and memmove
   5748 /// lowering. If DstAlign is zero that means it's safe to destination
   5749 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
   5750 /// means there isn't a need to check it against alignment requirement,
   5751 /// probably because the source does not need to be loaded. If
   5752 /// 'NonScalarIntSafe' is true, that means it's safe to return a
   5753 /// non-scalar-integer type, e.g. empty string source, constant, or loaded
   5754 /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
   5755 /// constant so it does not need to be loaded.
   5756 /// It returns EVT::Other if the type should be determined using generic
   5757 /// target-independent logic.
   5758 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
   5759                                            unsigned DstAlign, unsigned SrcAlign,
   5760                                            bool NonScalarIntSafe,
   5761                                            bool MemcpyStrSrc,
   5762                                            MachineFunction &MF) const {
   5763   if (this->PPCSubTarget.isPPC64()) {
   5764     return MVT::i64;
   5765   } else {
   5766     return MVT::i32;
   5767   }
   5768 }
   5769