Home | History | Annotate | Download | only in PowerPC
      1 //===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the PowerPC-specific support for the FastISel class. Some
     11 // of the target-specific code is generated by tablegen in the file
     12 // PPCGenFastISel.inc, which is #included here.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "PPC.h"
     17 #include "MCTargetDesc/PPCPredicates.h"
     18 #include "PPCCallingConv.h"
     19 #include "PPCISelLowering.h"
     20 #include "PPCMachineFunctionInfo.h"
     21 #include "PPCSubtarget.h"
     22 #include "PPCTargetMachine.h"
     23 #include "llvm/ADT/Optional.h"
     24 #include "llvm/CodeGen/CallingConvLower.h"
     25 #include "llvm/CodeGen/FastISel.h"
     26 #include "llvm/CodeGen/FunctionLoweringInfo.h"
     27 #include "llvm/CodeGen/MachineConstantPool.h"
     28 #include "llvm/CodeGen/MachineFrameInfo.h"
     29 #include "llvm/CodeGen/MachineInstrBuilder.h"
     30 #include "llvm/CodeGen/MachineRegisterInfo.h"
     31 #include "llvm/IR/CallingConv.h"
     32 #include "llvm/IR/GetElementPtrTypeIterator.h"
     33 #include "llvm/IR/GlobalAlias.h"
     34 #include "llvm/IR/GlobalVariable.h"
     35 #include "llvm/IR/IntrinsicInst.h"
     36 #include "llvm/IR/Operator.h"
     37 #include "llvm/Support/Debug.h"
     38 #include "llvm/Target/TargetLowering.h"
     39 #include "llvm/Target/TargetMachine.h"
     40 
     41 //===----------------------------------------------------------------------===//
     42 //
     43 // TBD:
     44 //   fastLowerArguments: Handle simple cases.
     45 //   PPCMaterializeGV: Handle TLS.
     46 //   SelectCall: Handle function pointers.
     47 //   SelectCall: Handle multi-register return values.
     48 //   SelectCall: Optimize away nops for local calls.
     49 //   processCallArgs: Handle bit-converted arguments.
     50 //   finishCall: Handle multi-register return values.
     51 //   PPCComputeAddress: Handle parameter references as FrameIndex's.
     52 //   PPCEmitCmp: Handle immediate as operand 1.
     53 //   SelectCall: Handle small byval arguments.
     54 //   SelectIntrinsicCall: Implement.
     55 //   SelectSelect: Implement.
     56 //   Consider factoring isTypeLegal into the base class.
     57 //   Implement switches and jump tables.
     58 //
     59 //===----------------------------------------------------------------------===//
     60 using namespace llvm;
     61 
     62 #define DEBUG_TYPE "ppcfastisel"
     63 
     64 namespace {
     65 
     66 typedef struct Address {
     67   enum {
     68     RegBase,
     69     FrameIndexBase
     70   } BaseType;
     71 
     72   union {
     73     unsigned Reg;
     74     int FI;
     75   } Base;
     76 
     77   long Offset;
     78 
     79   // Innocuous defaults for our address.
     80   Address()
     81    : BaseType(RegBase), Offset(0) {
     82      Base.Reg = 0;
     83    }
     84 } Address;
     85 
     86 class PPCFastISel final : public FastISel {
     87 
     88   const TargetMachine &TM;
     89   const PPCSubtarget *PPCSubTarget;
     90   PPCFunctionInfo *PPCFuncInfo;
     91   const TargetInstrInfo &TII;
     92   const TargetLowering &TLI;
     93   LLVMContext *Context;
     94 
     95   public:
     96     explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
     97                          const TargetLibraryInfo *LibInfo)
     98         : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
     99           PPCSubTarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
    100           PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
    101           TII(*PPCSubTarget->getInstrInfo()),
    102           TLI(*PPCSubTarget->getTargetLowering()),
    103           Context(&FuncInfo.Fn->getContext()) {}
    104 
    105   // Backend specific FastISel code.
    106   private:
    107     bool fastSelectInstruction(const Instruction *I) override;
    108     unsigned fastMaterializeConstant(const Constant *C) override;
    109     unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
    110     bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
    111                              const LoadInst *LI) override;
    112     bool fastLowerArguments() override;
    113     unsigned fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
    114     unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
    115                              const TargetRegisterClass *RC,
    116                              unsigned Op0, bool Op0IsKill,
    117                              uint64_t Imm);
    118     unsigned fastEmitInst_r(unsigned MachineInstOpcode,
    119                             const TargetRegisterClass *RC,
    120                             unsigned Op0, bool Op0IsKill);
    121     unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
    122                              const TargetRegisterClass *RC,
    123                              unsigned Op0, bool Op0IsKill,
    124                              unsigned Op1, bool Op1IsKill);
    125 
    126     bool fastLowerCall(CallLoweringInfo &CLI) override;
    127 
    128   // Instruction selection routines.
    129   private:
    130     bool SelectLoad(const Instruction *I);
    131     bool SelectStore(const Instruction *I);
    132     bool SelectBranch(const Instruction *I);
    133     bool SelectIndirectBr(const Instruction *I);
    134     bool SelectFPExt(const Instruction *I);
    135     bool SelectFPTrunc(const Instruction *I);
    136     bool SelectIToFP(const Instruction *I, bool IsSigned);
    137     bool SelectFPToI(const Instruction *I, bool IsSigned);
    138     bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
    139     bool SelectRet(const Instruction *I);
    140     bool SelectTrunc(const Instruction *I);
    141     bool SelectIntExt(const Instruction *I);
    142 
    143   // Utility routines.
    144   private:
    145     bool isTypeLegal(Type *Ty, MVT &VT);
    146     bool isLoadTypeLegal(Type *Ty, MVT &VT);
    147     bool isValueAvailable(const Value *V) const;
    148     bool isVSFRCRegister(unsigned Register) const {
    149       return MRI.getRegClass(Register)->getID() == PPC::VSFRCRegClassID;
    150     }
    151     bool isVSSRCRegister(unsigned Register) const {
    152       return MRI.getRegClass(Register)->getID() == PPC::VSSRCRegClassID;
    153     }
    154     bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
    155                     bool isZExt, unsigned DestReg);
    156     bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
    157                      const TargetRegisterClass *RC, bool IsZExt = true,
    158                      unsigned FP64LoadOpc = PPC::LFD);
    159     bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr);
    160     bool PPCComputeAddress(const Value *Obj, Address &Addr);
    161     void PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset,
    162                             unsigned &IndexReg);
    163     bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
    164                            unsigned DestReg, bool IsZExt);
    165     unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
    166     unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
    167     unsigned PPCMaterializeInt(const ConstantInt *CI, MVT VT,
    168                                bool UseSExt = true);
    169     unsigned PPCMaterialize32BitInt(int64_t Imm,
    170                                     const TargetRegisterClass *RC);
    171     unsigned PPCMaterialize64BitInt(int64_t Imm,
    172                                     const TargetRegisterClass *RC);
    173     unsigned PPCMoveToIntReg(const Instruction *I, MVT VT,
    174                              unsigned SrcReg, bool IsSigned);
    175     unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned);
    176 
    177   // Call handling routines.
    178   private:
    179     bool processCallArgs(SmallVectorImpl<Value*> &Args,
    180                          SmallVectorImpl<unsigned> &ArgRegs,
    181                          SmallVectorImpl<MVT> &ArgVTs,
    182                          SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
    183                          SmallVectorImpl<unsigned> &RegArgs,
    184                          CallingConv::ID CC,
    185                          unsigned &NumBytes,
    186                          bool IsVarArg);
    187     bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
    188     CCAssignFn *usePPC32CCs(unsigned Flag);
    189 
    190   private:
    191   #include "PPCGenFastISel.inc"
    192 
    193 };
    194 
    195 } // end anonymous namespace
    196 
    197 #include "PPCGenCallingConv.inc"
    198 
    199 // Function whose sole purpose is to kill compiler warnings
    200 // stemming from unused functions included from PPCGenCallingConv.inc.
    201 CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) {
    202   if (Flag == 1)
    203     return CC_PPC32_SVR4;
    204   else if (Flag == 2)
    205     return CC_PPC32_SVR4_ByVal;
    206   else if (Flag == 3)
    207     return CC_PPC32_SVR4_VarArg;
    208   else
    209     return RetCC_PPC;
    210 }
    211 
    212 static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
    213   switch (Pred) {
    214     // These are not representable with any single compare.
    215     case CmpInst::FCMP_FALSE:
    216     case CmpInst::FCMP_UEQ:
    217     case CmpInst::FCMP_UGT:
    218     case CmpInst::FCMP_UGE:
    219     case CmpInst::FCMP_ULT:
    220     case CmpInst::FCMP_ULE:
    221     case CmpInst::FCMP_UNE:
    222     case CmpInst::FCMP_TRUE:
    223     default:
    224       return Optional<PPC::Predicate>();
    225 
    226     case CmpInst::FCMP_OEQ:
    227     case CmpInst::ICMP_EQ:
    228       return PPC::PRED_EQ;
    229 
    230     case CmpInst::FCMP_OGT:
    231     case CmpInst::ICMP_UGT:
    232     case CmpInst::ICMP_SGT:
    233       return PPC::PRED_GT;
    234 
    235     case CmpInst::FCMP_OGE:
    236     case CmpInst::ICMP_UGE:
    237     case CmpInst::ICMP_SGE:
    238       return PPC::PRED_GE;
    239 
    240     case CmpInst::FCMP_OLT:
    241     case CmpInst::ICMP_ULT:
    242     case CmpInst::ICMP_SLT:
    243       return PPC::PRED_LT;
    244 
    245     case CmpInst::FCMP_OLE:
    246     case CmpInst::ICMP_ULE:
    247     case CmpInst::ICMP_SLE:
    248       return PPC::PRED_LE;
    249 
    250     case CmpInst::FCMP_ONE:
    251     case CmpInst::ICMP_NE:
    252       return PPC::PRED_NE;
    253 
    254     case CmpInst::FCMP_ORD:
    255       return PPC::PRED_NU;
    256 
    257     case CmpInst::FCMP_UNO:
    258       return PPC::PRED_UN;
    259   }
    260 }
    261 
    262 // Determine whether the type Ty is simple enough to be handled by
    263 // fast-isel, and return its equivalent machine type in VT.
    264 // FIXME: Copied directly from ARM -- factor into base class?
    265 bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
    266   EVT Evt = TLI.getValueType(DL, Ty, true);
    267 
    268   // Only handle simple types.
    269   if (Evt == MVT::Other || !Evt.isSimple()) return false;
    270   VT = Evt.getSimpleVT();
    271 
    272   // Handle all legal types, i.e. a register that will directly hold this
    273   // value.
    274   return TLI.isTypeLegal(VT);
    275 }
    276 
    277 // Determine whether the type Ty is simple enough to be handled by
    278 // fast-isel as a load target, and return its equivalent machine type in VT.
    279 bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
    280   if (isTypeLegal(Ty, VT)) return true;
    281 
    282   // If this is a type than can be sign or zero-extended to a basic operation
    283   // go ahead and accept it now.
    284   if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
    285     return true;
    286   }
    287 
    288   return false;
    289 }
    290 
    291 bool PPCFastISel::isValueAvailable(const Value *V) const {
    292   if (!isa<Instruction>(V))
    293     return true;
    294 
    295   const auto *I = cast<Instruction>(V);
    296   if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
    297     return true;
    298 
    299   return false;
    300 }
    301 
    302 // Given a value Obj, create an Address object Addr that represents its
    303 // address.  Return false if we can't handle it.
    304 bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
    305   const User *U = nullptr;
    306   unsigned Opcode = Instruction::UserOp1;
    307   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
    308     // Don't walk into other basic blocks unless the object is an alloca from
    309     // another block, otherwise it may not have a virtual register assigned.
    310     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
    311         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
    312       Opcode = I->getOpcode();
    313       U = I;
    314     }
    315   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
    316     Opcode = C->getOpcode();
    317     U = C;
    318   }
    319 
    320   switch (Opcode) {
    321     default:
    322       break;
    323     case Instruction::BitCast:
    324       // Look through bitcasts.
    325       return PPCComputeAddress(U->getOperand(0), Addr);
    326     case Instruction::IntToPtr:
    327       // Look past no-op inttoptrs.
    328       if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
    329           TLI.getPointerTy(DL))
    330         return PPCComputeAddress(U->getOperand(0), Addr);
    331       break;
    332     case Instruction::PtrToInt:
    333       // Look past no-op ptrtoints.
    334       if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
    335         return PPCComputeAddress(U->getOperand(0), Addr);
    336       break;
    337     case Instruction::GetElementPtr: {
    338       Address SavedAddr = Addr;
    339       long TmpOffset = Addr.Offset;
    340 
    341       // Iterate through the GEP folding the constants into offsets where
    342       // we can.
    343       gep_type_iterator GTI = gep_type_begin(U);
    344       for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
    345            II != IE; ++II, ++GTI) {
    346         const Value *Op = *II;
    347         if (StructType *STy = dyn_cast<StructType>(*GTI)) {
    348           const StructLayout *SL = DL.getStructLayout(STy);
    349           unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
    350           TmpOffset += SL->getElementOffset(Idx);
    351         } else {
    352           uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
    353           for (;;) {
    354             if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
    355               // Constant-offset addressing.
    356               TmpOffset += CI->getSExtValue() * S;
    357               break;
    358             }
    359             if (canFoldAddIntoGEP(U, Op)) {
    360               // A compatible add with a constant operand. Fold the constant.
    361               ConstantInt *CI =
    362               cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
    363               TmpOffset += CI->getSExtValue() * S;
    364               // Iterate on the other operand.
    365               Op = cast<AddOperator>(Op)->getOperand(0);
    366               continue;
    367             }
    368             // Unsupported
    369             goto unsupported_gep;
    370           }
    371         }
    372       }
    373 
    374       // Try to grab the base operand now.
    375       Addr.Offset = TmpOffset;
    376       if (PPCComputeAddress(U->getOperand(0), Addr)) return true;
    377 
    378       // We failed, restore everything and try the other options.
    379       Addr = SavedAddr;
    380 
    381       unsupported_gep:
    382       break;
    383     }
    384     case Instruction::Alloca: {
    385       const AllocaInst *AI = cast<AllocaInst>(Obj);
    386       DenseMap<const AllocaInst*, int>::iterator SI =
    387         FuncInfo.StaticAllocaMap.find(AI);
    388       if (SI != FuncInfo.StaticAllocaMap.end()) {
    389         Addr.BaseType = Address::FrameIndexBase;
    390         Addr.Base.FI = SI->second;
    391         return true;
    392       }
    393       break;
    394     }
    395   }
    396 
    397   // FIXME: References to parameters fall through to the behavior
    398   // below.  They should be able to reference a frame index since
    399   // they are stored to the stack, so we can get "ld rx, offset(r1)"
    400   // instead of "addi ry, r1, offset / ld rx, 0(ry)".  Obj will
    401   // just contain the parameter.  Try to handle this with a FI.
    402 
    403   // Try to get this in a register if nothing else has worked.
    404   if (Addr.Base.Reg == 0)
    405     Addr.Base.Reg = getRegForValue(Obj);
    406 
    407   // Prevent assignment of base register to X0, which is inappropriate
    408   // for loads and stores alike.
    409   if (Addr.Base.Reg != 0)
    410     MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass);
    411 
    412   return Addr.Base.Reg != 0;
    413 }
    414 
    415 // Fix up some addresses that can't be used directly.  For example, if
    416 // an offset won't fit in an instruction field, we may need to move it
    417 // into an index register.
    418 void PPCFastISel::PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset,
    419                                      unsigned &IndexReg) {
    420 
    421   // Check whether the offset fits in the instruction field.
    422   if (!isInt<16>(Addr.Offset))
    423     UseOffset = false;
    424 
    425   // If this is a stack pointer and the offset needs to be simplified then
    426   // put the alloca address into a register, set the base type back to
    427   // register and continue. This should almost never happen.
    428   if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
    429     unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
    430     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
    431             ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
    432     Addr.Base.Reg = ResultReg;
    433     Addr.BaseType = Address::RegBase;
    434   }
    435 
    436   if (!UseOffset) {
    437     IntegerType *OffsetTy = ((VT == MVT::i32) ? Type::getInt32Ty(*Context)
    438                              : Type::getInt64Ty(*Context));
    439     const ConstantInt *Offset =
    440       ConstantInt::getSigned(OffsetTy, (int64_t)(Addr.Offset));
    441     IndexReg = PPCMaterializeInt(Offset, MVT::i64);
    442     assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
    443   }
    444 }
    445 
    446 // Emit a load instruction if possible, returning true if we succeeded,
    447 // otherwise false.  See commentary below for how the register class of
    448 // the load is determined.
    449 bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
    450                               const TargetRegisterClass *RC,
    451                               bool IsZExt, unsigned FP64LoadOpc) {
    452   unsigned Opc;
    453   bool UseOffset = true;
    454 
    455   // If ResultReg is given, it determines the register class of the load.
    456   // Otherwise, RC is the register class to use.  If the result of the
    457   // load isn't anticipated in this block, both may be zero, in which
    458   // case we must make a conservative guess.  In particular, don't assign
    459   // R0 or X0 to the result register, as the result may be used in a load,
    460   // store, add-immediate, or isel that won't permit this.  (Though
    461   // perhaps the spill and reload of live-exit values would handle this?)
    462   const TargetRegisterClass *UseRC =
    463     (ResultReg ? MRI.getRegClass(ResultReg) :
    464      (RC ? RC :
    465       (VT == MVT::f64 ? &PPC::F8RCRegClass :
    466        (VT == MVT::f32 ? &PPC::F4RCRegClass :
    467         (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
    468          &PPC::GPRC_and_GPRC_NOR0RegClass)))));
    469 
    470   bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass);
    471 
    472   switch (VT.SimpleTy) {
    473     default: // e.g., vector types not handled
    474       return false;
    475     case MVT::i8:
    476       Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
    477       break;
    478     case MVT::i16:
    479       Opc = (IsZExt ?
    480              (Is32BitInt ? PPC::LHZ : PPC::LHZ8) :
    481              (Is32BitInt ? PPC::LHA : PPC::LHA8));
    482       break;
    483     case MVT::i32:
    484       Opc = (IsZExt ?
    485              (Is32BitInt ? PPC::LWZ : PPC::LWZ8) :
    486              (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
    487       if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
    488         UseOffset = false;
    489       break;
    490     case MVT::i64:
    491       Opc = PPC::LD;
    492       assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
    493              "64-bit load with 32-bit target??");
    494       UseOffset = ((Addr.Offset & 3) == 0);
    495       break;
    496     case MVT::f32:
    497       Opc = PPC::LFS;
    498       break;
    499     case MVT::f64:
    500       Opc = FP64LoadOpc;
    501       break;
    502   }
    503 
    504   // If necessary, materialize the offset into a register and use
    505   // the indexed form.  Also handle stack pointers with special needs.
    506   unsigned IndexReg = 0;
    507   PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg);
    508 
    509   // If this is a potential VSX load with an offset of 0, a VSX indexed load can
    510   // be used.
    511   bool IsVSSRC = (ResultReg != 0) && isVSSRCRegister(ResultReg);
    512   bool IsVSFRC = (ResultReg != 0) && isVSFRCRegister(ResultReg);
    513   bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
    514   bool Is64VSXLoad = IsVSSRC && Opc == PPC::LFD;
    515   if ((Is32VSXLoad || Is64VSXLoad) &&
    516       (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
    517       (Addr.Offset == 0)) {
    518     UseOffset = false;
    519   }
    520 
    521   if (ResultReg == 0)
    522     ResultReg = createResultReg(UseRC);
    523 
    524   // Note: If we still have a frame index here, we know the offset is
    525   // in range, as otherwise PPCSimplifyAddress would have converted it
    526   // into a RegBase.
    527   if (Addr.BaseType == Address::FrameIndexBase) {
    528     // VSX only provides an indexed load.
    529     if (Is32VSXLoad || Is64VSXLoad) return false;
    530 
    531     MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
    532         MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
    533                                           Addr.Offset),
    534         MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
    535         MFI.getObjectAlignment(Addr.Base.FI));
    536 
    537     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
    538       .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
    539 
    540   // Base reg with offset in range.
    541   } else if (UseOffset) {
    542     // VSX only provides an indexed load.
    543     if (Is32VSXLoad || Is64VSXLoad) return false;
    544 
    545     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
    546       .addImm(Addr.Offset).addReg(Addr.Base.Reg);
    547 
    548   // Indexed form.
    549   } else {
    550     // Get the RR opcode corresponding to the RI one.  FIXME: It would be
    551     // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
    552     // is hard to get at.
    553     switch (Opc) {
    554       default:        llvm_unreachable("Unexpected opcode!");
    555       case PPC::LBZ:    Opc = PPC::LBZX;    break;
    556       case PPC::LBZ8:   Opc = PPC::LBZX8;   break;
    557       case PPC::LHZ:    Opc = PPC::LHZX;    break;
    558       case PPC::LHZ8:   Opc = PPC::LHZX8;   break;
    559       case PPC::LHA:    Opc = PPC::LHAX;    break;
    560       case PPC::LHA8:   Opc = PPC::LHAX8;   break;
    561       case PPC::LWZ:    Opc = PPC::LWZX;    break;
    562       case PPC::LWZ8:   Opc = PPC::LWZX8;   break;
    563       case PPC::LWA:    Opc = PPC::LWAX;    break;
    564       case PPC::LWA_32: Opc = PPC::LWAX_32; break;
    565       case PPC::LD:     Opc = PPC::LDX;     break;
    566       case PPC::LFS:    Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
    567       case PPC::LFD:    Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
    568     }
    569     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
    570       .addReg(Addr.Base.Reg).addReg(IndexReg);
    571   }
    572 
    573   return true;
    574 }
    575 
    576 // Attempt to fast-select a load instruction.
    577 bool PPCFastISel::SelectLoad(const Instruction *I) {
    578   // FIXME: No atomic loads are supported.
    579   if (cast<LoadInst>(I)->isAtomic())
    580     return false;
    581 
    582   // Verify we have a legal type before going any further.
    583   MVT VT;
    584   if (!isLoadTypeLegal(I->getType(), VT))
    585     return false;
    586 
    587   // See if we can handle this address.
    588   Address Addr;
    589   if (!PPCComputeAddress(I->getOperand(0), Addr))
    590     return false;
    591 
    592   // Look at the currently assigned register for this instruction
    593   // to determine the required register class.  This is necessary
    594   // to constrain RA from using R0/X0 when this is not legal.
    595   unsigned AssignedReg = FuncInfo.ValueMap[I];
    596   const TargetRegisterClass *RC =
    597     AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
    598 
    599   unsigned ResultReg = 0;
    600   if (!PPCEmitLoad(VT, ResultReg, Addr, RC))
    601     return false;
    602   updateValueMap(I, ResultReg);
    603   return true;
    604 }
    605 
    606 // Emit a store instruction to store SrcReg at Addr.
    607 bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
    608   assert(SrcReg && "Nothing to store!");
    609   unsigned Opc;
    610   bool UseOffset = true;
    611 
    612   const TargetRegisterClass *RC = MRI.getRegClass(SrcReg);
    613   bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass);
    614 
    615   switch (VT.SimpleTy) {
    616     default: // e.g., vector types not handled
    617       return false;
    618     case MVT::i8:
    619       Opc = Is32BitInt ? PPC::STB : PPC::STB8;
    620       break;
    621     case MVT::i16:
    622       Opc = Is32BitInt ? PPC::STH : PPC::STH8;
    623       break;
    624     case MVT::i32:
    625       assert(Is32BitInt && "Not GPRC for i32??");
    626       Opc = PPC::STW;
    627       break;
    628     case MVT::i64:
    629       Opc = PPC::STD;
    630       UseOffset = ((Addr.Offset & 3) == 0);
    631       break;
    632     case MVT::f32:
    633       Opc = PPC::STFS;
    634       break;
    635     case MVT::f64:
    636       Opc = PPC::STFD;
    637       break;
    638   }
    639 
    640   // If necessary, materialize the offset into a register and use
    641   // the indexed form.  Also handle stack pointers with special needs.
    642   unsigned IndexReg = 0;
    643   PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg);
    644 
    645   // If this is a potential VSX store with an offset of 0, a VSX indexed store
    646   // can be used.
    647   bool IsVSSRC = isVSSRCRegister(SrcReg);
    648   bool IsVSFRC = isVSFRCRegister(SrcReg);
    649   bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
    650   bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
    651   if ((Is32VSXStore || Is64VSXStore) &&
    652       (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
    653       (Addr.Offset == 0)) {
    654     UseOffset = false;
    655   }
    656 
    657   // Note: If we still have a frame index here, we know the offset is
    658   // in range, as otherwise PPCSimplifyAddress would have converted it
    659   // into a RegBase.
    660   if (Addr.BaseType == Address::FrameIndexBase) {
    661     // VSX only provides an indexed store.
    662     if (Is32VSXStore || Is64VSXStore) return false;
    663 
    664     MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
    665         MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
    666                                           Addr.Offset),
    667         MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
    668         MFI.getObjectAlignment(Addr.Base.FI));
    669 
    670     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
    671         .addReg(SrcReg)
    672         .addImm(Addr.Offset)
    673         .addFrameIndex(Addr.Base.FI)
    674         .addMemOperand(MMO);
    675 
    676   // Base reg with offset in range.
    677   } else if (UseOffset) {
    678     // VSX only provides an indexed store.
    679     if (Is32VSXStore || Is64VSXStore) return false;
    680 
    681     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
    682       .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
    683 
    684   // Indexed form.
    685   } else {
    686     // Get the RR opcode corresponding to the RI one.  FIXME: It would be
    687     // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
    688     // is hard to get at.
    689     switch (Opc) {
    690       default:        llvm_unreachable("Unexpected opcode!");
    691       case PPC::STB:  Opc = PPC::STBX;  break;
    692       case PPC::STH : Opc = PPC::STHX;  break;
    693       case PPC::STW : Opc = PPC::STWX;  break;
    694       case PPC::STB8: Opc = PPC::STBX8; break;
    695       case PPC::STH8: Opc = PPC::STHX8; break;
    696       case PPC::STW8: Opc = PPC::STWX8; break;
    697       case PPC::STD:  Opc = PPC::STDX;  break;
    698       case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
    699       case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
    700     }
    701 
    702     auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
    703         .addReg(SrcReg);
    704 
    705     // If we have an index register defined we use it in the store inst,
    706     // otherwise we use X0 as base as it makes the vector instructions to
    707     // use zero in the computation of the effective address regardless the
    708     // content of the register.
    709     if (IndexReg)
    710       MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
    711     else
    712       MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
    713   }
    714 
    715   return true;
    716 }
    717 
    718 // Attempt to fast-select a store instruction.
    719 bool PPCFastISel::SelectStore(const Instruction *I) {
    720   Value *Op0 = I->getOperand(0);
    721   unsigned SrcReg = 0;
    722 
    723   // FIXME: No atomics loads are supported.
    724   if (cast<StoreInst>(I)->isAtomic())
    725     return false;
    726 
    727   // Verify we have a legal type before going any further.
    728   MVT VT;
    729   if (!isLoadTypeLegal(Op0->getType(), VT))
    730     return false;
    731 
    732   // Get the value to be stored into a register.
    733   SrcReg = getRegForValue(Op0);
    734   if (SrcReg == 0)
    735     return false;
    736 
    737   // See if we can handle this address.
    738   Address Addr;
    739   if (!PPCComputeAddress(I->getOperand(1), Addr))
    740     return false;
    741 
    742   if (!PPCEmitStore(VT, SrcReg, Addr))
    743     return false;
    744 
    745   return true;
    746 }
    747 
    748 // Attempt to fast-select a branch instruction.
    749 bool PPCFastISel::SelectBranch(const Instruction *I) {
    750   const BranchInst *BI = cast<BranchInst>(I);
    751   MachineBasicBlock *BrBB = FuncInfo.MBB;
    752   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
    753   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
    754 
    755   // For now, just try the simplest case where it's fed by a compare.
    756   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
    757     if (isValueAvailable(CI)) {
    758       Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate());
    759       if (!OptPPCPred)
    760         return false;
    761 
    762       PPC::Predicate PPCPred = OptPPCPred.getValue();
    763 
    764       // Take advantage of fall-through opportunities.
    765       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
    766         std::swap(TBB, FBB);
    767         PPCPred = PPC::InvertPredicate(PPCPred);
    768       }
    769 
    770       unsigned CondReg = createResultReg(&PPC::CRRCRegClass);
    771 
    772       if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
    773                       CondReg))
    774         return false;
    775 
    776       BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
    777         .addImm(PPCPred).addReg(CondReg).addMBB(TBB);
    778       finishCondBranch(BI->getParent(), TBB, FBB);
    779       return true;
    780     }
    781   } else if (const ConstantInt *CI =
    782              dyn_cast<ConstantInt>(BI->getCondition())) {
    783     uint64_t Imm = CI->getZExtValue();
    784     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
    785     fastEmitBranch(Target, DbgLoc);
    786     return true;
    787   }
    788 
    789   // FIXME: ARM looks for a case where the block containing the compare
    790   // has been split from the block containing the branch.  If this happens,
    791   // there is a vreg available containing the result of the compare.  I'm
    792   // not sure we can do much, as we've lost the predicate information with
    793   // the compare instruction -- we have a 4-bit CR but don't know which bit
    794   // to test here.
    795   return false;
    796 }
    797 
    798 // Attempt to emit a compare of the two source values.  Signed and unsigned
    799 // comparisons are supported.  Return false if we can't handle it.
    800 bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
    801                              bool IsZExt, unsigned DestReg) {
    802   Type *Ty = SrcValue1->getType();
    803   EVT SrcEVT = TLI.getValueType(DL, Ty, true);
    804   if (!SrcEVT.isSimple())
    805     return false;
    806   MVT SrcVT = SrcEVT.getSimpleVT();
    807 
    808   if (SrcVT == MVT::i1 && PPCSubTarget->useCRBits())
    809     return false;
    810 
    811   // See if operand 2 is an immediate encodeable in the compare.
    812   // FIXME: Operands are not in canonical order at -O0, so an immediate
    813   // operand in position 1 is a lost opportunity for now.  We are
    814   // similar to ARM in this regard.
    815   long Imm = 0;
    816   bool UseImm = false;
    817 
    818   // Only 16-bit integer constants can be represented in compares for
    819   // PowerPC.  Others will be materialized into a register.
    820   if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
    821     if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
    822         SrcVT == MVT::i8 || SrcVT == MVT::i1) {
    823       const APInt &CIVal = ConstInt->getValue();
    824       Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue();
    825       if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm)))
    826         UseImm = true;
    827     }
    828   }
    829 
    830   unsigned CmpOpc;
    831   bool NeedsExt = false;
    832   switch (SrcVT.SimpleTy) {
    833     default: return false;
    834     case MVT::f32:
    835       CmpOpc = PPC::FCMPUS;
    836       break;
    837     case MVT::f64:
    838       CmpOpc = PPC::FCMPUD;
    839       break;
    840     case MVT::i1:
    841     case MVT::i8:
    842     case MVT::i16:
    843       NeedsExt = true;
    844       // Intentional fall-through.
    845     case MVT::i32:
    846       if (!UseImm)
    847         CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
    848       else
    849         CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
    850       break;
    851     case MVT::i64:
    852       if (!UseImm)
    853         CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
    854       else
    855         CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
    856       break;
    857   }
    858 
    859   unsigned SrcReg1 = getRegForValue(SrcValue1);
    860   if (SrcReg1 == 0)
    861     return false;
    862 
    863   unsigned SrcReg2 = 0;
    864   if (!UseImm) {
    865     SrcReg2 = getRegForValue(SrcValue2);
    866     if (SrcReg2 == 0)
    867       return false;
    868   }
    869 
    870   if (NeedsExt) {
    871     unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
    872     if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
    873       return false;
    874     SrcReg1 = ExtReg;
    875 
    876     if (!UseImm) {
    877       unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
    878       if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
    879         return false;
    880       SrcReg2 = ExtReg;
    881     }
    882   }
    883 
    884   if (!UseImm)
    885     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
    886       .addReg(SrcReg1).addReg(SrcReg2);
    887   else
    888     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
    889       .addReg(SrcReg1).addImm(Imm);
    890 
    891   return true;
    892 }
    893 
    894 // Attempt to fast-select a floating-point extend instruction.
    895 bool PPCFastISel::SelectFPExt(const Instruction *I) {
    896   Value *Src  = I->getOperand(0);
    897   EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
    898   EVT DestVT = TLI.getValueType(DL, I->getType(), true);
    899 
    900   if (SrcVT != MVT::f32 || DestVT != MVT::f64)
    901     return false;
    902 
    903   unsigned SrcReg = getRegForValue(Src);
    904   if (!SrcReg)
    905     return false;
    906 
    907   // No code is generated for a FP extend.
    908   updateValueMap(I, SrcReg);
    909   return true;
    910 }
    911 
    912 // Attempt to fast-select a floating-point truncate instruction.
    913 bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
    914   Value *Src  = I->getOperand(0);
    915   EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
    916   EVT DestVT = TLI.getValueType(DL, I->getType(), true);
    917 
    918   if (SrcVT != MVT::f64 || DestVT != MVT::f32)
    919     return false;
    920 
    921   unsigned SrcReg = getRegForValue(Src);
    922   if (!SrcReg)
    923     return false;
    924 
    925   // Round the result to single precision.
    926   unsigned DestReg = createResultReg(&PPC::F4RCRegClass);
    927   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg)
    928     .addReg(SrcReg);
    929 
    930   updateValueMap(I, DestReg);
    931   return true;
    932 }
    933 
    934 // Move an i32 or i64 value in a GPR to an f64 value in an FPR.
    935 // FIXME: When direct register moves are implemented (see PowerISA 2.07),
    936 // those should be used instead of moving via a stack slot when the
    937 // subtarget permits.
    938 // FIXME: The code here is sloppy for the 4-byte case.  Can use a 4-byte
    939 // stack slot and 4-byte store/load sequence.  Or just sext the 4-byte
    940 // case to 8 bytes which produces tighter code but wastes stack space.
    941 unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
    942                                      bool IsSigned) {
    943 
    944   // If necessary, extend 32-bit int to 64-bit.
    945   if (SrcVT == MVT::i32) {
    946     unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
    947     if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
    948       return 0;
    949     SrcReg = TmpReg;
    950   }
    951 
    952   // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
    953   Address Addr;
    954   Addr.BaseType = Address::FrameIndexBase;
    955   Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
    956 
    957   // Store the value from the GPR.
    958   if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
    959     return 0;
    960 
    961   // Load the integer value into an FPR.  The kind of load used depends
    962   // on a number of conditions.
    963   unsigned LoadOpc = PPC::LFD;
    964 
    965   if (SrcVT == MVT::i32) {
    966     if (!IsSigned) {
    967       LoadOpc = PPC::LFIWZX;
    968       Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
    969     } else if (PPCSubTarget->hasLFIWAX()) {
    970       LoadOpc = PPC::LFIWAX;
    971       Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
    972     }
    973   }
    974 
    975   const TargetRegisterClass *RC = &PPC::F8RCRegClass;
    976   unsigned ResultReg = 0;
    977   if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
    978     return 0;
    979 
    980   return ResultReg;
    981 }
    982 
    983 // Attempt to fast-select an integer-to-floating-point conversion.
    984 // FIXME: Once fast-isel has better support for VSX, conversions using
    985 //        direct moves should be implemented.
    986 bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
    987   MVT DstVT;
    988   Type *DstTy = I->getType();
    989   if (!isTypeLegal(DstTy, DstVT))
    990     return false;
    991 
    992   if (DstVT != MVT::f32 && DstVT != MVT::f64)
    993     return false;
    994 
    995   Value *Src = I->getOperand(0);
    996   EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
    997   if (!SrcEVT.isSimple())
    998     return false;
    999 
   1000   MVT SrcVT = SrcEVT.getSimpleVT();
   1001 
   1002   if (SrcVT != MVT::i8  && SrcVT != MVT::i16 &&
   1003       SrcVT != MVT::i32 && SrcVT != MVT::i64)
   1004     return false;
   1005 
   1006   unsigned SrcReg = getRegForValue(Src);
   1007   if (SrcReg == 0)
   1008     return false;
   1009 
   1010   // We can only lower an unsigned convert if we have the newer
   1011   // floating-point conversion operations.
   1012   if (!IsSigned && !PPCSubTarget->hasFPCVT())
   1013     return false;
   1014 
   1015   // FIXME: For now we require the newer floating-point conversion operations
   1016   // (which are present only on P7 and A2 server models) when converting
   1017   // to single-precision float.  Otherwise we have to generate a lot of
   1018   // fiddly code to avoid double rounding.  If necessary, the fiddly code
   1019   // can be found in PPCTargetLowering::LowerINT_TO_FP().
   1020   if (DstVT == MVT::f32 && !PPCSubTarget->hasFPCVT())
   1021     return false;
   1022 
   1023   // Extend the input if necessary.
   1024   if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
   1025     unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
   1026     if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
   1027       return false;
   1028     SrcVT = MVT::i64;
   1029     SrcReg = TmpReg;
   1030   }
   1031 
   1032   // Move the integer value to an FPR.
   1033   unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
   1034   if (FPReg == 0)
   1035     return false;
   1036 
   1037   // Determine the opcode for the conversion.
   1038   const TargetRegisterClass *RC = &PPC::F8RCRegClass;
   1039   unsigned DestReg = createResultReg(RC);
   1040   unsigned Opc;
   1041 
   1042   if (DstVT == MVT::f32)
   1043     Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
   1044   else
   1045     Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
   1046 
   1047   // Generate the convert.
   1048   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
   1049     .addReg(FPReg);
   1050 
   1051   updateValueMap(I, DestReg);
   1052   return true;
   1053 }
   1054 
   1055 // Move the floating-point value in SrcReg into an integer destination
   1056 // register, and return the register (or zero if we can't handle it).
   1057 // FIXME: When direct register moves are implemented (see PowerISA 2.07),
   1058 // those should be used instead of moving via a stack slot when the
   1059 // subtarget permits.
   1060 unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
   1061                                       unsigned SrcReg, bool IsSigned) {
   1062   // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
   1063   // Note that if have STFIWX available, we could use a 4-byte stack
   1064   // slot for i32, but this being fast-isel we'll just go with the
   1065   // easiest code gen possible.
   1066   Address Addr;
   1067   Addr.BaseType = Address::FrameIndexBase;
   1068   Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
   1069 
   1070   // Store the value from the FPR.
   1071   if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
   1072     return 0;
   1073 
   1074   // Reload it into a GPR.  If we want an i32, modify the address
   1075   // to have a 4-byte offset so we load from the right place.
   1076   if (VT == MVT::i32)
   1077     Addr.Offset = 4;
   1078 
   1079   // Look at the currently assigned register for this instruction
   1080   // to determine the required register class.
   1081   unsigned AssignedReg = FuncInfo.ValueMap[I];
   1082   const TargetRegisterClass *RC =
   1083     AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
   1084 
   1085   unsigned ResultReg = 0;
   1086   if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
   1087     return 0;
   1088 
   1089   return ResultReg;
   1090 }
   1091 
   1092 // Attempt to fast-select a floating-point-to-integer conversion.
   1093 // FIXME: Once fast-isel has better support for VSX, conversions using
   1094 //        direct moves should be implemented.
   1095 bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
   1096   MVT DstVT, SrcVT;
   1097   Type *DstTy = I->getType();
   1098   if (!isTypeLegal(DstTy, DstVT))
   1099     return false;
   1100 
   1101   if (DstVT != MVT::i32 && DstVT != MVT::i64)
   1102     return false;
   1103 
   1104   // If we don't have FCTIDUZ and we need it, punt to SelectionDAG.
   1105   if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget->hasFPCVT())
   1106     return false;
   1107 
   1108   Value *Src = I->getOperand(0);
   1109   Type *SrcTy = Src->getType();
   1110   if (!isTypeLegal(SrcTy, SrcVT))
   1111     return false;
   1112 
   1113   if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
   1114     return false;
   1115 
   1116   unsigned SrcReg = getRegForValue(Src);
   1117   if (SrcReg == 0)
   1118     return false;
   1119 
   1120   // Convert f32 to f64 if necessary.  This is just a meaningless copy
   1121   // to get the register class right.  COPY_TO_REGCLASS is needed since
   1122   // a COPY from F4RC to F8RC is converted to a F4RC-F4RC copy downstream.
   1123   const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
   1124   if (InRC == &PPC::F4RCRegClass) {
   1125     unsigned TmpReg = createResultReg(&PPC::F8RCRegClass);
   1126     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1127             TII.get(TargetOpcode::COPY_TO_REGCLASS), TmpReg)
   1128       .addReg(SrcReg).addImm(PPC::F8RCRegClassID);
   1129     SrcReg = TmpReg;
   1130   }
   1131 
   1132   // Determine the opcode for the conversion, which takes place
   1133   // entirely within FPRs.
   1134   unsigned DestReg = createResultReg(&PPC::F8RCRegClass);
   1135   unsigned Opc;
   1136 
   1137   if (DstVT == MVT::i32)
   1138     if (IsSigned)
   1139       Opc = PPC::FCTIWZ;
   1140     else
   1141       Opc = PPCSubTarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
   1142   else
   1143     Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
   1144 
   1145   // Generate the convert.
   1146   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
   1147     .addReg(SrcReg);
   1148 
   1149   // Now move the integer value from a float register to an integer register.
   1150   unsigned IntReg = PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
   1151   if (IntReg == 0)
   1152     return false;
   1153 
   1154   updateValueMap(I, IntReg);
   1155   return true;
   1156 }
   1157 
   1158 // Attempt to fast-select a binary integer operation that isn't already
   1159 // handled automatically.
   1160 bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
   1161   EVT DestVT = TLI.getValueType(DL, I->getType(), true);
   1162 
   1163   // We can get here in the case when we have a binary operation on a non-legal
   1164   // type and the target independent selector doesn't know how to handle it.
   1165   if (DestVT != MVT::i16 && DestVT != MVT::i8)
   1166     return false;
   1167 
   1168   // Look at the currently assigned register for this instruction
   1169   // to determine the required register class.  If there is no register,
   1170   // make a conservative choice (don't assign R0).
   1171   unsigned AssignedReg = FuncInfo.ValueMap[I];
   1172   const TargetRegisterClass *RC =
   1173     (AssignedReg ? MRI.getRegClass(AssignedReg) :
   1174      &PPC::GPRC_and_GPRC_NOR0RegClass);
   1175   bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
   1176 
   1177   unsigned Opc;
   1178   switch (ISDOpcode) {
   1179     default: return false;
   1180     case ISD::ADD:
   1181       Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
   1182       break;
   1183     case ISD::OR:
   1184       Opc = IsGPRC ? PPC::OR : PPC::OR8;
   1185       break;
   1186     case ISD::SUB:
   1187       Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
   1188       break;
   1189   }
   1190 
   1191   unsigned ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
   1192   unsigned SrcReg1 = getRegForValue(I->getOperand(0));
   1193   if (SrcReg1 == 0) return false;
   1194 
   1195   // Handle case of small immediate operand.
   1196   if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) {
   1197     const APInt &CIVal = ConstInt->getValue();
   1198     int Imm = (int)CIVal.getSExtValue();
   1199     bool UseImm = true;
   1200     if (isInt<16>(Imm)) {
   1201       switch (Opc) {
   1202         default:
   1203           llvm_unreachable("Missing case!");
   1204         case PPC::ADD4:
   1205           Opc = PPC::ADDI;
   1206           MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
   1207           break;
   1208         case PPC::ADD8:
   1209           Opc = PPC::ADDI8;
   1210           MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
   1211           break;
   1212         case PPC::OR:
   1213           Opc = PPC::ORI;
   1214           break;
   1215         case PPC::OR8:
   1216           Opc = PPC::ORI8;
   1217           break;
   1218         case PPC::SUBF:
   1219           if (Imm == -32768)
   1220             UseImm = false;
   1221           else {
   1222             Opc = PPC::ADDI;
   1223             MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
   1224             Imm = -Imm;
   1225           }
   1226           break;
   1227         case PPC::SUBF8:
   1228           if (Imm == -32768)
   1229             UseImm = false;
   1230           else {
   1231             Opc = PPC::ADDI8;
   1232             MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
   1233             Imm = -Imm;
   1234           }
   1235           break;
   1236       }
   1237 
   1238       if (UseImm) {
   1239         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
   1240                 ResultReg)
   1241             .addReg(SrcReg1)
   1242             .addImm(Imm);
   1243         updateValueMap(I, ResultReg);
   1244         return true;
   1245       }
   1246     }
   1247   }
   1248 
   1249   // Reg-reg case.
   1250   unsigned SrcReg2 = getRegForValue(I->getOperand(1));
   1251   if (SrcReg2 == 0) return false;
   1252 
   1253   // Reverse operands for subtract-from.
   1254   if (ISDOpcode == ISD::SUB)
   1255     std::swap(SrcReg1, SrcReg2);
   1256 
   1257   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
   1258     .addReg(SrcReg1).addReg(SrcReg2);
   1259   updateValueMap(I, ResultReg);
   1260   return true;
   1261 }
   1262 
   1263 // Handle arguments to a call that we're attempting to fast-select.
   1264 // Return false if the arguments are too complex for us at the moment.
   1265 bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
   1266                                   SmallVectorImpl<unsigned> &ArgRegs,
   1267                                   SmallVectorImpl<MVT> &ArgVTs,
   1268                                   SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
   1269                                   SmallVectorImpl<unsigned> &RegArgs,
   1270                                   CallingConv::ID CC,
   1271                                   unsigned &NumBytes,
   1272                                   bool IsVarArg) {
   1273   SmallVector<CCValAssign, 16> ArgLocs;
   1274   CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
   1275 
   1276   // Reserve space for the linkage area on the stack.
   1277   unsigned LinkageSize = PPCSubTarget->getFrameLowering()->getLinkageSize();
   1278   CCInfo.AllocateStack(LinkageSize, 8);
   1279 
   1280   CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
   1281 
   1282   // Bail out if we can't handle any of the arguments.
   1283   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
   1284     CCValAssign &VA = ArgLocs[I];
   1285     MVT ArgVT = ArgVTs[VA.getValNo()];
   1286 
   1287     // Skip vector arguments for now, as well as long double and
   1288     // uint128_t, and anything that isn't passed in a register.
   1289     if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 ||
   1290         !VA.isRegLoc() || VA.needsCustom())
   1291       return false;
   1292 
   1293     // Skip bit-converted arguments for now.
   1294     if (VA.getLocInfo() == CCValAssign::BCvt)
   1295       return false;
   1296   }
   1297 
   1298   // Get a count of how many bytes are to be pushed onto the stack.
   1299   NumBytes = CCInfo.getNextStackOffset();
   1300 
   1301   // The prolog code of the callee may store up to 8 GPR argument registers to
   1302   // the stack, allowing va_start to index over them in memory if its varargs.
   1303   // Because we cannot tell if this is needed on the caller side, we have to
   1304   // conservatively assume that it is needed.  As such, make sure we have at
   1305   // least enough stack space for the caller to store the 8 GPRs.
   1306   // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
   1307   NumBytes = std::max(NumBytes, LinkageSize + 64);
   1308 
   1309   // Issue CALLSEQ_START.
   1310   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1311           TII.get(TII.getCallFrameSetupOpcode()))
   1312     .addImm(NumBytes);
   1313 
   1314   // Prepare to assign register arguments.  Every argument uses up a
   1315   // GPR protocol register even if it's passed in a floating-point
   1316   // register (unless we're using the fast calling convention).
   1317   unsigned NextGPR = PPC::X3;
   1318   unsigned NextFPR = PPC::F1;
   1319 
   1320   // Process arguments.
   1321   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
   1322     CCValAssign &VA = ArgLocs[I];
   1323     unsigned Arg = ArgRegs[VA.getValNo()];
   1324     MVT ArgVT = ArgVTs[VA.getValNo()];
   1325 
   1326     // Handle argument promotion and bitcasts.
   1327     switch (VA.getLocInfo()) {
   1328       default:
   1329         llvm_unreachable("Unknown loc info!");
   1330       case CCValAssign::Full:
   1331         break;
   1332       case CCValAssign::SExt: {
   1333         MVT DestVT = VA.getLocVT();
   1334         const TargetRegisterClass *RC =
   1335           (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
   1336         unsigned TmpReg = createResultReg(RC);
   1337         if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
   1338           llvm_unreachable("Failed to emit a sext!");
   1339         ArgVT = DestVT;
   1340         Arg = TmpReg;
   1341         break;
   1342       }
   1343       case CCValAssign::AExt:
   1344       case CCValAssign::ZExt: {
   1345         MVT DestVT = VA.getLocVT();
   1346         const TargetRegisterClass *RC =
   1347           (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
   1348         unsigned TmpReg = createResultReg(RC);
   1349         if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
   1350           llvm_unreachable("Failed to emit a zext!");
   1351         ArgVT = DestVT;
   1352         Arg = TmpReg;
   1353         break;
   1354       }
   1355       case CCValAssign::BCvt: {
   1356         // FIXME: Not yet handled.
   1357         llvm_unreachable("Should have bailed before getting here!");
   1358         break;
   1359       }
   1360     }
   1361 
   1362     // Copy this argument to the appropriate register.
   1363     unsigned ArgReg;
   1364     if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
   1365       ArgReg = NextFPR++;
   1366       if (CC != CallingConv::Fast)
   1367         ++NextGPR;
   1368     } else
   1369       ArgReg = NextGPR++;
   1370 
   1371     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1372             TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg);
   1373     RegArgs.push_back(ArgReg);
   1374   }
   1375 
   1376   return true;
   1377 }
   1378 
   1379 // For a call that we've determined we can fast-select, finish the
   1380 // call sequence and generate a copy to obtain the return value (if any).
   1381 bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
   1382   CallingConv::ID CC = CLI.CallConv;
   1383 
   1384   // Issue CallSEQ_END.
   1385   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1386           TII.get(TII.getCallFrameDestroyOpcode()))
   1387     .addImm(NumBytes).addImm(0);
   1388 
   1389   // Next, generate a copy to obtain the return value.
   1390   // FIXME: No multi-register return values yet, though I don't foresee
   1391   // any real difficulties there.
   1392   if (RetVT != MVT::isVoid) {
   1393     SmallVector<CCValAssign, 16> RVLocs;
   1394     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
   1395     CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
   1396     CCValAssign &VA = RVLocs[0];
   1397     assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
   1398     assert(VA.isRegLoc() && "Can only return in registers!");
   1399 
   1400     MVT DestVT = VA.getValVT();
   1401     MVT CopyVT = DestVT;
   1402 
   1403     // Ints smaller than a register still arrive in a full 64-bit
   1404     // register, so make sure we recognize this.
   1405     if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
   1406       CopyVT = MVT::i64;
   1407 
   1408     unsigned SourcePhysReg = VA.getLocReg();
   1409     unsigned ResultReg = 0;
   1410 
   1411     if (RetVT == CopyVT) {
   1412       const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
   1413       ResultReg = createResultReg(CpyRC);
   1414 
   1415       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1416               TII.get(TargetOpcode::COPY), ResultReg)
   1417         .addReg(SourcePhysReg);
   1418 
   1419     // If necessary, round the floating result to single precision.
   1420     } else if (CopyVT == MVT::f64) {
   1421       ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
   1422       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP),
   1423               ResultReg).addReg(SourcePhysReg);
   1424 
   1425     // If only the low half of a general register is needed, generate
   1426     // a GPRC copy instead of a G8RC copy.  (EXTRACT_SUBREG can't be
   1427     // used along the fast-isel path (not lowered), and downstream logic
   1428     // also doesn't like a direct subreg copy on a physical reg.)
   1429     } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
   1430       ResultReg = createResultReg(&PPC::GPRCRegClass);
   1431       // Convert physical register from G8RC to GPRC.
   1432       SourcePhysReg -= PPC::X0 - PPC::R0;
   1433       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1434               TII.get(TargetOpcode::COPY), ResultReg)
   1435         .addReg(SourcePhysReg);
   1436     }
   1437 
   1438     assert(ResultReg && "ResultReg unset!");
   1439     CLI.InRegs.push_back(SourcePhysReg);
   1440     CLI.ResultReg = ResultReg;
   1441     CLI.NumResultRegs = 1;
   1442   }
   1443 
   1444   return true;
   1445 }
   1446 
   1447 bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
   1448   CallingConv::ID CC  = CLI.CallConv;
   1449   bool IsTailCall     = CLI.IsTailCall;
   1450   bool IsVarArg       = CLI.IsVarArg;
   1451   const Value *Callee = CLI.Callee;
   1452   const MCSymbol *Symbol = CLI.Symbol;
   1453 
   1454   if (!Callee && !Symbol)
   1455     return false;
   1456 
   1457   // Allow SelectionDAG isel to handle tail calls.
   1458   if (IsTailCall)
   1459     return false;
   1460 
   1461   // Let SDISel handle vararg functions.
   1462   if (IsVarArg)
   1463     return false;
   1464 
   1465   // Handle simple calls for now, with legal return types and
   1466   // those that can be extended.
   1467   Type *RetTy = CLI.RetTy;
   1468   MVT RetVT;
   1469   if (RetTy->isVoidTy())
   1470     RetVT = MVT::isVoid;
   1471   else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
   1472            RetVT != MVT::i8)
   1473     return false;
   1474   else if (RetVT == MVT::i1 && PPCSubTarget->useCRBits())
   1475     // We can't handle boolean returns when CR bits are in use.
   1476     return false;
   1477 
   1478   // FIXME: No multi-register return values yet.
   1479   if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
   1480       RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
   1481       RetVT != MVT::f64) {
   1482     SmallVector<CCValAssign, 16> RVLocs;
   1483     CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
   1484     CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
   1485     if (RVLocs.size() > 1)
   1486       return false;
   1487   }
   1488 
   1489   // Bail early if more than 8 arguments, as we only currently
   1490   // handle arguments passed in registers.
   1491   unsigned NumArgs = CLI.OutVals.size();
   1492   if (NumArgs > 8)
   1493     return false;
   1494 
   1495   // Set up the argument vectors.
   1496   SmallVector<Value*, 8> Args;
   1497   SmallVector<unsigned, 8> ArgRegs;
   1498   SmallVector<MVT, 8> ArgVTs;
   1499   SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
   1500 
   1501   Args.reserve(NumArgs);
   1502   ArgRegs.reserve(NumArgs);
   1503   ArgVTs.reserve(NumArgs);
   1504   ArgFlags.reserve(NumArgs);
   1505 
   1506   for (unsigned i = 0, ie = NumArgs; i != ie; ++i) {
   1507     // Only handle easy calls for now.  It would be reasonably easy
   1508     // to handle <= 8-byte structures passed ByVal in registers, but we
   1509     // have to ensure they are right-justified in the register.
   1510     ISD::ArgFlagsTy Flags = CLI.OutFlags[i];
   1511     if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal())
   1512       return false;
   1513 
   1514     Value *ArgValue = CLI.OutVals[i];
   1515     Type *ArgTy = ArgValue->getType();
   1516     MVT ArgVT;
   1517     if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
   1518       return false;
   1519 
   1520     if (ArgVT.isVector())
   1521       return false;
   1522 
   1523     unsigned Arg = getRegForValue(ArgValue);
   1524     if (Arg == 0)
   1525       return false;
   1526 
   1527     Args.push_back(ArgValue);
   1528     ArgRegs.push_back(Arg);
   1529     ArgVTs.push_back(ArgVT);
   1530     ArgFlags.push_back(Flags);
   1531   }
   1532 
   1533   // Process the arguments.
   1534   SmallVector<unsigned, 8> RegArgs;
   1535   unsigned NumBytes;
   1536 
   1537   if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
   1538                        RegArgs, CC, NumBytes, IsVarArg))
   1539     return false;
   1540 
   1541   MachineInstrBuilder MIB;
   1542   // FIXME: No handling for function pointers yet.  This requires
   1543   // implementing the function descriptor (OPD) setup.
   1544   const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
   1545   if (!GV) {
   1546     // patchpoints are a special case; they always dispatch to a pointer value.
   1547     // However, we don't actually want to generate the indirect call sequence
   1548     // here (that will be generated, as necessary, during asm printing), and
   1549     // the call we generate here will be erased by FastISel::selectPatchpoint,
   1550     // so don't try very hard...
   1551     if (CLI.IsPatchPoint)
   1552       MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::NOP));
   1553     else
   1554       return false;
   1555   } else {
   1556     // Build direct call with NOP for TOC restore.
   1557     // FIXME: We can and should optimize away the NOP for local calls.
   1558     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1559                   TII.get(PPC::BL8_NOP));
   1560     // Add callee.
   1561     MIB.addGlobalAddress(GV);
   1562   }
   1563 
   1564   // Add implicit physical register uses to the call.
   1565   for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
   1566     MIB.addReg(RegArgs[II], RegState::Implicit);
   1567 
   1568   // Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
   1569   // into the call.
   1570   PPCFuncInfo->setUsesTOCBasePtr();
   1571   MIB.addReg(PPC::X2, RegState::Implicit);
   1572 
   1573   // Add a register mask with the call-preserved registers.  Proper
   1574   // defs for return values will be added by setPhysRegsDeadExcept().
   1575   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
   1576 
   1577   CLI.Call = MIB;
   1578 
   1579   // Finish off the call including any return values.
   1580   return finishCall(RetVT, CLI, NumBytes);
   1581 }
   1582 
   1583 // Attempt to fast-select a return instruction.
   1584 bool PPCFastISel::SelectRet(const Instruction *I) {
   1585 
   1586   if (!FuncInfo.CanLowerReturn)
   1587     return false;
   1588 
   1589   const ReturnInst *Ret = cast<ReturnInst>(I);
   1590   const Function &F = *I->getParent()->getParent();
   1591 
   1592   // Build a list of return value registers.
   1593   SmallVector<unsigned, 4> RetRegs;
   1594   CallingConv::ID CC = F.getCallingConv();
   1595 
   1596   if (Ret->getNumOperands() > 0) {
   1597     SmallVector<ISD::OutputArg, 4> Outs;
   1598     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
   1599 
   1600     // Analyze operands of the call, assigning locations to each operand.
   1601     SmallVector<CCValAssign, 16> ValLocs;
   1602     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context);
   1603     CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
   1604     const Value *RV = Ret->getOperand(0);
   1605 
   1606     // FIXME: Only one output register for now.
   1607     if (ValLocs.size() > 1)
   1608       return false;
   1609 
   1610     // Special case for returning a constant integer of any size - materialize
   1611     // the constant as an i64 and copy it to the return register.
   1612     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RV)) {
   1613       CCValAssign &VA = ValLocs[0];
   1614 
   1615       unsigned RetReg = VA.getLocReg();
   1616       // We still need to worry about properly extending the sign. For example,
   1617       // we could have only a single bit or a constant that needs zero
   1618       // extension rather than sign extension. Make sure we pass the return
   1619       // value extension property to integer materialization.
   1620       unsigned SrcReg =
   1621           PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() == CCValAssign::SExt);
   1622 
   1623       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1624             TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
   1625 
   1626       RetRegs.push_back(RetReg);
   1627 
   1628     } else {
   1629       unsigned Reg = getRegForValue(RV);
   1630 
   1631       if (Reg == 0)
   1632         return false;
   1633 
   1634       // Copy the result values into the output registers.
   1635       for (unsigned i = 0; i < ValLocs.size(); ++i) {
   1636 
   1637         CCValAssign &VA = ValLocs[i];
   1638         assert(VA.isRegLoc() && "Can only return in registers!");
   1639         RetRegs.push_back(VA.getLocReg());
   1640         unsigned SrcReg = Reg + VA.getValNo();
   1641 
   1642         EVT RVEVT = TLI.getValueType(DL, RV->getType());
   1643         if (!RVEVT.isSimple())
   1644           return false;
   1645         MVT RVVT = RVEVT.getSimpleVT();
   1646         MVT DestVT = VA.getLocVT();
   1647 
   1648         if (RVVT != DestVT && RVVT != MVT::i8 &&
   1649             RVVT != MVT::i16 && RVVT != MVT::i32)
   1650           return false;
   1651 
   1652         if (RVVT != DestVT) {
   1653           switch (VA.getLocInfo()) {
   1654             default:
   1655               llvm_unreachable("Unknown loc info!");
   1656             case CCValAssign::Full:
   1657               llvm_unreachable("Full value assign but types don't match?");
   1658             case CCValAssign::AExt:
   1659             case CCValAssign::ZExt: {
   1660               const TargetRegisterClass *RC =
   1661                 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
   1662               unsigned TmpReg = createResultReg(RC);
   1663               if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
   1664                 return false;
   1665               SrcReg = TmpReg;
   1666               break;
   1667             }
   1668             case CCValAssign::SExt: {
   1669               const TargetRegisterClass *RC =
   1670                 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
   1671               unsigned TmpReg = createResultReg(RC);
   1672               if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
   1673                 return false;
   1674               SrcReg = TmpReg;
   1675               break;
   1676             }
   1677           }
   1678         }
   1679 
   1680         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1681                 TII.get(TargetOpcode::COPY), RetRegs[i])
   1682           .addReg(SrcReg);
   1683       }
   1684     }
   1685   }
   1686 
   1687   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1688                                     TII.get(PPC::BLR8));
   1689 
   1690   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
   1691     MIB.addReg(RetRegs[i], RegState::Implicit);
   1692 
   1693   return true;
   1694 }
   1695 
   1696 // Attempt to emit an integer extend of SrcReg into DestReg.  Both
   1697 // signed and zero extensions are supported.  Return false if we
   1698 // can't handle it.
   1699 bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
   1700                                 unsigned DestReg, bool IsZExt) {
   1701   if (DestVT != MVT::i32 && DestVT != MVT::i64)
   1702     return false;
   1703   if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
   1704     return false;
   1705 
   1706   // Signed extensions use EXTSB, EXTSH, EXTSW.
   1707   if (!IsZExt) {
   1708     unsigned Opc;
   1709     if (SrcVT == MVT::i8)
   1710       Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
   1711     else if (SrcVT == MVT::i16)
   1712       Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
   1713     else {
   1714       assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
   1715       Opc = PPC::EXTSW_32_64;
   1716     }
   1717     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
   1718       .addReg(SrcReg);
   1719 
   1720   // Unsigned 32-bit extensions use RLWINM.
   1721   } else if (DestVT == MVT::i32) {
   1722     unsigned MB;
   1723     if (SrcVT == MVT::i8)
   1724       MB = 24;
   1725     else {
   1726       assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
   1727       MB = 16;
   1728     }
   1729     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLWINM),
   1730             DestReg)
   1731       .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
   1732 
   1733   // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
   1734   } else {
   1735     unsigned MB;
   1736     if (SrcVT == MVT::i8)
   1737       MB = 56;
   1738     else if (SrcVT == MVT::i16)
   1739       MB = 48;
   1740     else
   1741       MB = 32;
   1742     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1743             TII.get(PPC::RLDICL_32_64), DestReg)
   1744       .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
   1745   }
   1746 
   1747   return true;
   1748 }
   1749 
   1750 // Attempt to fast-select an indirect branch instruction.
   1751 bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
   1752   unsigned AddrReg = getRegForValue(I->getOperand(0));
   1753   if (AddrReg == 0)
   1754     return false;
   1755 
   1756   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::MTCTR8))
   1757     .addReg(AddrReg);
   1758   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCTR8));
   1759 
   1760   const IndirectBrInst *IB = cast<IndirectBrInst>(I);
   1761   for (const BasicBlock *SuccBB : IB->successors())
   1762     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]);
   1763 
   1764   return true;
   1765 }
   1766 
   1767 // Attempt to fast-select an integer truncate instruction.
   1768 bool PPCFastISel::SelectTrunc(const Instruction *I) {
   1769   Value *Src  = I->getOperand(0);
   1770   EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
   1771   EVT DestVT = TLI.getValueType(DL, I->getType(), true);
   1772 
   1773   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
   1774     return false;
   1775 
   1776   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
   1777     return false;
   1778 
   1779   unsigned SrcReg = getRegForValue(Src);
   1780   if (!SrcReg)
   1781     return false;
   1782 
   1783   // The only interesting case is when we need to switch register classes.
   1784   if (SrcVT == MVT::i64) {
   1785     unsigned ResultReg = createResultReg(&PPC::GPRCRegClass);
   1786     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1787             TII.get(TargetOpcode::COPY),
   1788             ResultReg).addReg(SrcReg, 0, PPC::sub_32);
   1789     SrcReg = ResultReg;
   1790   }
   1791 
   1792   updateValueMap(I, SrcReg);
   1793   return true;
   1794 }
   1795 
   1796 // Attempt to fast-select an integer extend instruction.
   1797 bool PPCFastISel::SelectIntExt(const Instruction *I) {
   1798   Type *DestTy = I->getType();
   1799   Value *Src = I->getOperand(0);
   1800   Type *SrcTy = Src->getType();
   1801 
   1802   bool IsZExt = isa<ZExtInst>(I);
   1803   unsigned SrcReg = getRegForValue(Src);
   1804   if (!SrcReg) return false;
   1805 
   1806   EVT SrcEVT, DestEVT;
   1807   SrcEVT = TLI.getValueType(DL, SrcTy, true);
   1808   DestEVT = TLI.getValueType(DL, DestTy, true);
   1809   if (!SrcEVT.isSimple())
   1810     return false;
   1811   if (!DestEVT.isSimple())
   1812     return false;
   1813 
   1814   MVT SrcVT = SrcEVT.getSimpleVT();
   1815   MVT DestVT = DestEVT.getSimpleVT();
   1816 
   1817   // If we know the register class needed for the result of this
   1818   // instruction, use it.  Otherwise pick the register class of the
   1819   // correct size that does not contain X0/R0, since we don't know
   1820   // whether downstream uses permit that assignment.
   1821   unsigned AssignedReg = FuncInfo.ValueMap[I];
   1822   const TargetRegisterClass *RC =
   1823     (AssignedReg ? MRI.getRegClass(AssignedReg) :
   1824      (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
   1825       &PPC::GPRC_and_GPRC_NOR0RegClass));
   1826   unsigned ResultReg = createResultReg(RC);
   1827 
   1828   if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
   1829     return false;
   1830 
   1831   updateValueMap(I, ResultReg);
   1832   return true;
   1833 }
   1834 
   1835 // Attempt to fast-select an instruction that wasn't handled by
   1836 // the table-generated machinery.
   1837 bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
   1838 
   1839   switch (I->getOpcode()) {
   1840     case Instruction::Load:
   1841       return SelectLoad(I);
   1842     case Instruction::Store:
   1843       return SelectStore(I);
   1844     case Instruction::Br:
   1845       return SelectBranch(I);
   1846     case Instruction::IndirectBr:
   1847       return SelectIndirectBr(I);
   1848     case Instruction::FPExt:
   1849       return SelectFPExt(I);
   1850     case Instruction::FPTrunc:
   1851       return SelectFPTrunc(I);
   1852     case Instruction::SIToFP:
   1853       return SelectIToFP(I, /*IsSigned*/ true);
   1854     case Instruction::UIToFP:
   1855       return SelectIToFP(I, /*IsSigned*/ false);
   1856     case Instruction::FPToSI:
   1857       return SelectFPToI(I, /*IsSigned*/ true);
   1858     case Instruction::FPToUI:
   1859       return SelectFPToI(I, /*IsSigned*/ false);
   1860     case Instruction::Add:
   1861       return SelectBinaryIntOp(I, ISD::ADD);
   1862     case Instruction::Or:
   1863       return SelectBinaryIntOp(I, ISD::OR);
   1864     case Instruction::Sub:
   1865       return SelectBinaryIntOp(I, ISD::SUB);
   1866     case Instruction::Call:
   1867       return selectCall(I);
   1868     case Instruction::Ret:
   1869       return SelectRet(I);
   1870     case Instruction::Trunc:
   1871       return SelectTrunc(I);
   1872     case Instruction::ZExt:
   1873     case Instruction::SExt:
   1874       return SelectIntExt(I);
   1875     // Here add other flavors of Instruction::XXX that automated
   1876     // cases don't catch.  For example, switches are terminators
   1877     // that aren't yet handled.
   1878     default:
   1879       break;
   1880   }
   1881   return false;
   1882 }
   1883 
   1884 // Materialize a floating-point constant into a register, and return
   1885 // the register number (or zero if we failed to handle it).
   1886 unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
   1887   // No plans to handle long double here.
   1888   if (VT != MVT::f32 && VT != MVT::f64)
   1889     return 0;
   1890 
   1891   // All FP constants are loaded from the constant pool.
   1892   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
   1893   assert(Align > 0 && "Unexpectedly missing alignment information!");
   1894   unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
   1895   unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
   1896   CodeModel::Model CModel = TM.getCodeModel();
   1897 
   1898   MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
   1899       MachinePointerInfo::getConstantPool(*FuncInfo.MF),
   1900       MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Align);
   1901 
   1902   unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD;
   1903   unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
   1904 
   1905   PPCFuncInfo->setUsesTOCBasePtr();
   1906   // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
   1907   if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) {
   1908     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocCPT),
   1909             TmpReg)
   1910       .addConstantPoolIndex(Idx).addReg(PPC::X2);
   1911     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
   1912       .addImm(0).addReg(TmpReg).addMemOperand(MMO);
   1913   } else {
   1914     // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)).
   1915     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
   1916             TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
   1917     // But for large code model, we must generate a LDtocL followed
   1918     // by the LF[SD].
   1919     if (CModel == CodeModel::Large) {
   1920       unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
   1921       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
   1922               TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
   1923       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
   1924         .addImm(0).addReg(TmpReg2);
   1925     } else
   1926       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
   1927         .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
   1928         .addReg(TmpReg)
   1929         .addMemOperand(MMO);
   1930   }
   1931 
   1932   return DestReg;
   1933 }
   1934 
   1935 // Materialize the address of a global value into a register, and return
   1936 // the register number (or zero if we failed to handle it).
   1937 unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
   1938   assert(VT == MVT::i64 && "Non-address!");
   1939   const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
   1940   unsigned DestReg = createResultReg(RC);
   1941 
   1942   // Global values may be plain old object addresses, TLS object
   1943   // addresses, constant pool entries, or jump tables.  How we generate
   1944   // code for these may depend on small, medium, or large code model.
   1945   CodeModel::Model CModel = TM.getCodeModel();
   1946 
   1947   // FIXME: Jump tables are not yet required because fast-isel doesn't
   1948   // handle switches; if that changes, we need them as well.  For now,
   1949   // what follows assumes everything's a generic (or TLS) global address.
   1950 
   1951   // FIXME: We don't yet handle the complexity of TLS.
   1952   if (GV->isThreadLocal())
   1953     return 0;
   1954 
   1955   PPCFuncInfo->setUsesTOCBasePtr();
   1956   // For small code model, generate a simple TOC load.
   1957   if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault)
   1958     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtoc),
   1959             DestReg)
   1960         .addGlobalAddress(GV)
   1961         .addReg(PPC::X2);
   1962   else {
   1963     // If the address is an externally defined symbol, a symbol with common
   1964     // or externally available linkage, a non-local function address, or a
   1965     // jump table address (not yet needed), or if we are generating code
   1966     // for large code model, we generate:
   1967     //       LDtocL(GV, ADDIStocHA(%X2, GV))
   1968     // Otherwise we generate:
   1969     //       ADDItocL(ADDIStocHA(%X2, GV), GV)
   1970     // Either way, start with the ADDIStocHA:
   1971     unsigned HighPartReg = createResultReg(RC);
   1972     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
   1973             HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
   1974 
   1975     unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
   1976     if (GVFlags & PPCII::MO_NLP_FLAG) {
   1977       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
   1978               DestReg).addGlobalAddress(GV).addReg(HighPartReg);
   1979     } else {
   1980       // Otherwise generate the ADDItocL.
   1981       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDItocL),
   1982               DestReg).addReg(HighPartReg).addGlobalAddress(GV);
   1983     }
   1984   }
   1985 
   1986   return DestReg;
   1987 }
   1988 
   1989 // Materialize a 32-bit integer constant into a register, and return
   1990 // the register number (or zero if we failed to handle it).
   1991 unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
   1992                                              const TargetRegisterClass *RC) {
   1993   unsigned Lo = Imm & 0xFFFF;
   1994   unsigned Hi = (Imm >> 16) & 0xFFFF;
   1995 
   1996   unsigned ResultReg = createResultReg(RC);
   1997   bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
   1998 
   1999   if (isInt<16>(Imm))
   2000     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2001             TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
   2002       .addImm(Imm);
   2003   else if (Lo) {
   2004     // Both Lo and Hi have nonzero bits.
   2005     unsigned TmpReg = createResultReg(RC);
   2006     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2007             TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
   2008       .addImm(Hi);
   2009     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2010             TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
   2011       .addReg(TmpReg).addImm(Lo);
   2012   } else
   2013     // Just Hi bits.
   2014     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2015             TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
   2016       .addImm(Hi);
   2017 
   2018   return ResultReg;
   2019 }
   2020 
   2021 // Materialize a 64-bit integer constant into a register, and return
   2022 // the register number (or zero if we failed to handle it).
   2023 unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
   2024                                              const TargetRegisterClass *RC) {
   2025   unsigned Remainder = 0;
   2026   unsigned Shift = 0;
   2027 
   2028   // If the value doesn't fit in 32 bits, see if we can shift it
   2029   // so that it fits in 32 bits.
   2030   if (!isInt<32>(Imm)) {
   2031     Shift = countTrailingZeros<uint64_t>(Imm);
   2032     int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
   2033 
   2034     if (isInt<32>(ImmSh))
   2035       Imm = ImmSh;
   2036     else {
   2037       Remainder = Imm;
   2038       Shift = 32;
   2039       Imm >>= 32;
   2040     }
   2041   }
   2042 
   2043   // Handle the high-order 32 bits (if shifted) or the whole 32 bits
   2044   // (if not shifted).
   2045   unsigned TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
   2046   if (!Shift)
   2047     return TmpReg1;
   2048 
   2049   // If upper 32 bits were not zero, we've built them and need to shift
   2050   // them into place.
   2051   unsigned TmpReg2;
   2052   if (Imm) {
   2053     TmpReg2 = createResultReg(RC);
   2054     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLDICR),
   2055             TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift);
   2056   } else
   2057     TmpReg2 = TmpReg1;
   2058 
   2059   unsigned TmpReg3, Hi, Lo;
   2060   if ((Hi = (Remainder >> 16) & 0xFFFF)) {
   2061     TmpReg3 = createResultReg(RC);
   2062     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORIS8),
   2063             TmpReg3).addReg(TmpReg2).addImm(Hi);
   2064   } else
   2065     TmpReg3 = TmpReg2;
   2066 
   2067   if ((Lo = Remainder & 0xFFFF)) {
   2068     unsigned ResultReg = createResultReg(RC);
   2069     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORI8),
   2070             ResultReg).addReg(TmpReg3).addImm(Lo);
   2071     return ResultReg;
   2072   }
   2073 
   2074   return TmpReg3;
   2075 }
   2076 
   2077 
   2078 // Materialize an integer constant into a register, and return
   2079 // the register number (or zero if we failed to handle it).
   2080 unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
   2081                                         bool UseSExt) {
   2082   // If we're using CR bit registers for i1 values, handle that as a special
   2083   // case first.
   2084   if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
   2085     unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
   2086     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2087             TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
   2088     return ImmReg;
   2089   }
   2090 
   2091   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
   2092       VT != MVT::i8 && VT != MVT::i1)
   2093     return 0;
   2094 
   2095   const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
   2096                                    &PPC::GPRCRegClass);
   2097 
   2098   // If the constant is in range, use a load-immediate.
   2099   if (UseSExt && isInt<16>(CI->getSExtValue())) {
   2100     unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
   2101     unsigned ImmReg = createResultReg(RC);
   2102     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
   2103         .addImm(CI->getSExtValue());
   2104     return ImmReg;
   2105   } else if (!UseSExt && isUInt<16>(CI->getZExtValue())) {
   2106     unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
   2107     unsigned ImmReg = createResultReg(RC);
   2108     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
   2109         .addImm(CI->getZExtValue());
   2110     return ImmReg;
   2111   }
   2112 
   2113   // Construct the constant piecewise.
   2114   int64_t Imm = CI->getZExtValue();
   2115 
   2116   if (VT == MVT::i64)
   2117     return PPCMaterialize64BitInt(Imm, RC);
   2118   else if (VT == MVT::i32)
   2119     return PPCMaterialize32BitInt(Imm, RC);
   2120 
   2121   return 0;
   2122 }
   2123 
   2124 // Materialize a constant into a register, and return the register
   2125 // number (or zero if we failed to handle it).
   2126 unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) {
   2127   EVT CEVT = TLI.getValueType(DL, C->getType(), true);
   2128 
   2129   // Only handle simple types.
   2130   if (!CEVT.isSimple()) return 0;
   2131   MVT VT = CEVT.getSimpleVT();
   2132 
   2133   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
   2134     return PPCMaterializeFP(CFP, VT);
   2135   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
   2136     return PPCMaterializeGV(GV, VT);
   2137   else if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
   2138     return PPCMaterializeInt(CI, VT, VT != MVT::i1);
   2139 
   2140   return 0;
   2141 }
   2142 
   2143 // Materialize the address created by an alloca into a register, and
   2144 // return the register number (or zero if we failed to handle it).
   2145 unsigned PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
   2146   // Don't handle dynamic allocas.
   2147   if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
   2148 
   2149   MVT VT;
   2150   if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
   2151 
   2152   DenseMap<const AllocaInst*, int>::iterator SI =
   2153     FuncInfo.StaticAllocaMap.find(AI);
   2154 
   2155   if (SI != FuncInfo.StaticAllocaMap.end()) {
   2156     unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
   2157     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
   2158             ResultReg).addFrameIndex(SI->second).addImm(0);
   2159     return ResultReg;
   2160   }
   2161 
   2162   return 0;
   2163 }
   2164 
   2165 // Fold loads into extends when possible.
   2166 // FIXME: We can have multiple redundant extend/trunc instructions
   2167 // following a load.  The folding only picks up one.  Extend this
   2168 // to check subsequent instructions for the same pattern and remove
   2169 // them.  Thus ResultReg should be the def reg for the last redundant
   2170 // instruction in a chain, and all intervening instructions can be
   2171 // removed from parent.  Change test/CodeGen/PowerPC/fast-isel-fold.ll
   2172 // to add ELF64-NOT: rldicl to the appropriate tests when this works.
   2173 bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
   2174                                       const LoadInst *LI) {
   2175   // Verify we have a legal type before going any further.
   2176   MVT VT;
   2177   if (!isLoadTypeLegal(LI->getType(), VT))
   2178     return false;
   2179 
   2180   // Combine load followed by zero- or sign-extend.
   2181   bool IsZExt = false;
   2182   switch(MI->getOpcode()) {
   2183     default:
   2184       return false;
   2185 
   2186     case PPC::RLDICL:
   2187     case PPC::RLDICL_32_64: {
   2188       IsZExt = true;
   2189       unsigned MB = MI->getOperand(3).getImm();
   2190       if ((VT == MVT::i8 && MB <= 56) ||
   2191           (VT == MVT::i16 && MB <= 48) ||
   2192           (VT == MVT::i32 && MB <= 32))
   2193         break;
   2194       return false;
   2195     }
   2196 
   2197     case PPC::RLWINM:
   2198     case PPC::RLWINM8: {
   2199       IsZExt = true;
   2200       unsigned MB = MI->getOperand(3).getImm();
   2201       if ((VT == MVT::i8 && MB <= 24) ||
   2202           (VT == MVT::i16 && MB <= 16))
   2203         break;
   2204       return false;
   2205     }
   2206 
   2207     case PPC::EXTSB:
   2208     case PPC::EXTSB8:
   2209     case PPC::EXTSB8_32_64:
   2210       /* There is no sign-extending load-byte instruction. */
   2211       return false;
   2212 
   2213     case PPC::EXTSH:
   2214     case PPC::EXTSH8:
   2215     case PPC::EXTSH8_32_64: {
   2216       if (VT != MVT::i16 && VT != MVT::i8)
   2217         return false;
   2218       break;
   2219     }
   2220 
   2221     case PPC::EXTSW:
   2222     case PPC::EXTSW_32_64: {
   2223       if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
   2224         return false;
   2225       break;
   2226     }
   2227   }
   2228 
   2229   // See if we can handle this address.
   2230   Address Addr;
   2231   if (!PPCComputeAddress(LI->getOperand(0), Addr))
   2232     return false;
   2233 
   2234   unsigned ResultReg = MI->getOperand(0).getReg();
   2235 
   2236   if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt))
   2237     return false;
   2238 
   2239   MI->eraseFromParent();
   2240   return true;
   2241 }
   2242 
   2243 // Attempt to lower call arguments in a faster way than done by
   2244 // the selection DAG code.
   2245 bool PPCFastISel::fastLowerArguments() {
   2246   // Defer to normal argument lowering for now.  It's reasonably
   2247   // efficient.  Consider doing something like ARM to handle the
   2248   // case where all args fit in registers, no varargs, no float
   2249   // or vector args.
   2250   return false;
   2251 }
   2252 
   2253 // Handle materializing integer constants into a register.  This is not
   2254 // automatically generated for PowerPC, so must be explicitly created here.
   2255 unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
   2256 
   2257   if (Opc != ISD::Constant)
   2258     return 0;
   2259 
   2260   // If we're using CR bit registers for i1 values, handle that as a special
   2261   // case first.
   2262   if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
   2263     unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
   2264     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2265             TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg);
   2266     return ImmReg;
   2267   }
   2268 
   2269   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
   2270       VT != MVT::i8 && VT != MVT::i1)
   2271     return 0;
   2272 
   2273   const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
   2274                                    &PPC::GPRCRegClass);
   2275   if (VT == MVT::i64)
   2276     return PPCMaterialize64BitInt(Imm, RC);
   2277   else
   2278     return PPCMaterialize32BitInt(Imm, RC);
   2279 }
   2280 
   2281 // Override for ADDI and ADDI8 to set the correct register class
   2282 // on RHS operand 0.  The automatic infrastructure naively assumes
   2283 // GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
   2284 // for these cases.  At the moment, none of the other automatically
   2285 // generated RI instructions require special treatment.  However, once
   2286 // SelectSelect is implemented, "isel" requires similar handling.
   2287 //
   2288 // Also be conservative about the output register class.  Avoid
   2289 // assigning R0 or X0 to the output register for GPRC and G8RC
   2290 // register classes, as any such result could be used in ADDI, etc.,
   2291 // where those regs have another meaning.
   2292 unsigned PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
   2293                                       const TargetRegisterClass *RC,
   2294                                       unsigned Op0, bool Op0IsKill,
   2295                                       uint64_t Imm) {
   2296   if (MachineInstOpcode == PPC::ADDI)
   2297     MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass);
   2298   else if (MachineInstOpcode == PPC::ADDI8)
   2299     MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass);
   2300 
   2301   const TargetRegisterClass *UseRC =
   2302     (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
   2303      (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
   2304 
   2305   return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC,
   2306                                    Op0, Op0IsKill, Imm);
   2307 }
   2308 
   2309 // Override for instructions with one register operand to avoid use of
   2310 // R0/X0.  The automatic infrastructure isn't aware of the context so
   2311 // we must be conservative.
   2312 unsigned PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
   2313                                      const TargetRegisterClass* RC,
   2314                                      unsigned Op0, bool Op0IsKill) {
   2315   const TargetRegisterClass *UseRC =
   2316     (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
   2317      (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
   2318 
   2319   return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill);
   2320 }
   2321 
   2322 // Override for instructions with two register operands to avoid use
   2323 // of R0/X0.  The automatic infrastructure isn't aware of the context
   2324 // so we must be conservative.
   2325 unsigned PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
   2326                                       const TargetRegisterClass* RC,
   2327                                       unsigned Op0, bool Op0IsKill,
   2328                                       unsigned Op1, bool Op1IsKill) {
   2329   const TargetRegisterClass *UseRC =
   2330     (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
   2331      (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
   2332 
   2333   return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill,
   2334                                    Op1, Op1IsKill);
   2335 }
   2336 
   2337 namespace llvm {
   2338   // Create the fast instruction selector for PowerPC64 ELF.
   2339   FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
   2340                                 const TargetLibraryInfo *LibInfo) {
   2341     // Only available on 64-bit ELF for now.
   2342     const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
   2343     if (Subtarget.isPPC64() && Subtarget.isSVR4ABI())
   2344       return new PPCFastISel(FuncInfo, LibInfo);
   2345     return nullptr;
   2346   }
   2347 }
   2348