Home | History | Annotate | Download | only in PowerPC
      1 //===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the PowerPC-specific support for the FastISel class. Some
     11 // of the target-specific code is generated by tablegen in the file
     12 // PPCGenFastISel.inc, which is #included here.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "PPC.h"
     17 #include "MCTargetDesc/PPCPredicates.h"
     18 #include "PPCCallingConv.h"
     19 #include "PPCCCState.h"
     20 #include "PPCISelLowering.h"
     21 #include "PPCMachineFunctionInfo.h"
     22 #include "PPCSubtarget.h"
     23 #include "PPCTargetMachine.h"
     24 #include "llvm/ADT/Optional.h"
     25 #include "llvm/CodeGen/CallingConvLower.h"
     26 #include "llvm/CodeGen/FastISel.h"
     27 #include "llvm/CodeGen/FunctionLoweringInfo.h"
     28 #include "llvm/CodeGen/MachineConstantPool.h"
     29 #include "llvm/CodeGen/MachineFrameInfo.h"
     30 #include "llvm/CodeGen/MachineInstrBuilder.h"
     31 #include "llvm/CodeGen/MachineRegisterInfo.h"
     32 #include "llvm/IR/CallingConv.h"
     33 #include "llvm/IR/GetElementPtrTypeIterator.h"
     34 #include "llvm/IR/GlobalAlias.h"
     35 #include "llvm/IR/GlobalVariable.h"
     36 #include "llvm/IR/IntrinsicInst.h"
     37 #include "llvm/IR/Operator.h"
     38 #include "llvm/Support/Debug.h"
     39 #include "llvm/Target/TargetLowering.h"
     40 #include "llvm/Target/TargetMachine.h"
     41 
     42 //===----------------------------------------------------------------------===//
     43 //
     44 // TBD:
     45 //   fastLowerArguments: Handle simple cases.
     46 //   PPCMaterializeGV: Handle TLS.
     47 //   SelectCall: Handle function pointers.
     48 //   SelectCall: Handle multi-register return values.
     49 //   SelectCall: Optimize away nops for local calls.
     50 //   processCallArgs: Handle bit-converted arguments.
     51 //   finishCall: Handle multi-register return values.
     52 //   PPCComputeAddress: Handle parameter references as FrameIndex's.
     53 //   PPCEmitCmp: Handle immediate as operand 1.
     54 //   SelectCall: Handle small byval arguments.
     55 //   SelectIntrinsicCall: Implement.
     56 //   SelectSelect: Implement.
     57 //   Consider factoring isTypeLegal into the base class.
     58 //   Implement switches and jump tables.
     59 //
     60 //===----------------------------------------------------------------------===//
     61 using namespace llvm;
     62 
     63 #define DEBUG_TYPE "ppcfastisel"
     64 
     65 namespace {
     66 
     67 typedef struct Address {
     68   enum {
     69     RegBase,
     70     FrameIndexBase
     71   } BaseType;
     72 
     73   union {
     74     unsigned Reg;
     75     int FI;
     76   } Base;
     77 
     78   long Offset;
     79 
     80   // Innocuous defaults for our address.
     81   Address()
     82    : BaseType(RegBase), Offset(0) {
     83      Base.Reg = 0;
     84    }
     85 } Address;
     86 
     87 class PPCFastISel final : public FastISel {
     88 
     89   const TargetMachine &TM;
     90   const PPCSubtarget *PPCSubTarget;
     91   PPCFunctionInfo *PPCFuncInfo;
     92   const TargetInstrInfo &TII;
     93   const TargetLowering &TLI;
     94   LLVMContext *Context;
     95 
     96   public:
     97     explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
     98                          const TargetLibraryInfo *LibInfo)
     99         : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
    100           PPCSubTarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
    101           PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
    102           TII(*PPCSubTarget->getInstrInfo()),
    103           TLI(*PPCSubTarget->getTargetLowering()),
    104           Context(&FuncInfo.Fn->getContext()) {}
    105 
    106   // Backend specific FastISel code.
    107   private:
    108     bool fastSelectInstruction(const Instruction *I) override;
    109     unsigned fastMaterializeConstant(const Constant *C) override;
    110     unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
    111     bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
    112                              const LoadInst *LI) override;
    113     bool fastLowerArguments() override;
    114     unsigned fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
    115     unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
    116                              const TargetRegisterClass *RC,
    117                              unsigned Op0, bool Op0IsKill,
    118                              uint64_t Imm);
    119     unsigned fastEmitInst_r(unsigned MachineInstOpcode,
    120                             const TargetRegisterClass *RC,
    121                             unsigned Op0, bool Op0IsKill);
    122     unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
    123                              const TargetRegisterClass *RC,
    124                              unsigned Op0, bool Op0IsKill,
    125                              unsigned Op1, bool Op1IsKill);
    126 
    127     bool fastLowerCall(CallLoweringInfo &CLI) override;
    128 
    129   // Instruction selection routines.
    130   private:
    131     bool SelectLoad(const Instruction *I);
    132     bool SelectStore(const Instruction *I);
    133     bool SelectBranch(const Instruction *I);
    134     bool SelectIndirectBr(const Instruction *I);
    135     bool SelectFPExt(const Instruction *I);
    136     bool SelectFPTrunc(const Instruction *I);
    137     bool SelectIToFP(const Instruction *I, bool IsSigned);
    138     bool SelectFPToI(const Instruction *I, bool IsSigned);
    139     bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
    140     bool SelectRet(const Instruction *I);
    141     bool SelectTrunc(const Instruction *I);
    142     bool SelectIntExt(const Instruction *I);
    143 
    144   // Utility routines.
    145   private:
    146     bool isTypeLegal(Type *Ty, MVT &VT);
    147     bool isLoadTypeLegal(Type *Ty, MVT &VT);
    148     bool isValueAvailable(const Value *V) const;
    149     bool isVSFRCRegister(unsigned Register) const {
    150       return MRI.getRegClass(Register)->getID() == PPC::VSFRCRegClassID;
    151     }
    152     bool isVSSRCRegister(unsigned Register) const {
    153       return MRI.getRegClass(Register)->getID() == PPC::VSSRCRegClassID;
    154     }
    155     bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
    156                     bool isZExt, unsigned DestReg);
    157     bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
    158                      const TargetRegisterClass *RC, bool IsZExt = true,
    159                      unsigned FP64LoadOpc = PPC::LFD);
    160     bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr);
    161     bool PPCComputeAddress(const Value *Obj, Address &Addr);
    162     void PPCSimplifyAddress(Address &Addr, bool &UseOffset,
    163                             unsigned &IndexReg);
    164     bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
    165                            unsigned DestReg, bool IsZExt);
    166     unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
    167     unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
    168     unsigned PPCMaterializeInt(const ConstantInt *CI, MVT VT,
    169                                bool UseSExt = true);
    170     unsigned PPCMaterialize32BitInt(int64_t Imm,
    171                                     const TargetRegisterClass *RC);
    172     unsigned PPCMaterialize64BitInt(int64_t Imm,
    173                                     const TargetRegisterClass *RC);
    174     unsigned PPCMoveToIntReg(const Instruction *I, MVT VT,
    175                              unsigned SrcReg, bool IsSigned);
    176     unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned);
    177 
    178   // Call handling routines.
    179   private:
    180     bool processCallArgs(SmallVectorImpl<Value*> &Args,
    181                          SmallVectorImpl<unsigned> &ArgRegs,
    182                          SmallVectorImpl<MVT> &ArgVTs,
    183                          SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
    184                          SmallVectorImpl<unsigned> &RegArgs,
    185                          CallingConv::ID CC,
    186                          unsigned &NumBytes,
    187                          bool IsVarArg);
    188     bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
    189     LLVM_ATTRIBUTE_UNUSED CCAssignFn *usePPC32CCs(unsigned Flag);
    190 
    191   private:
    192   #include "PPCGenFastISel.inc"
    193 
    194 };
    195 
    196 } // end anonymous namespace
    197 
    198 #include "PPCGenCallingConv.inc"
    199 
    200 // Function whose sole purpose is to kill compiler warnings
    201 // stemming from unused functions included from PPCGenCallingConv.inc.
    202 CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) {
    203   if (Flag == 1)
    204     return CC_PPC32_SVR4;
    205   else if (Flag == 2)
    206     return CC_PPC32_SVR4_ByVal;
    207   else if (Flag == 3)
    208     return CC_PPC32_SVR4_VarArg;
    209   else
    210     return RetCC_PPC;
    211 }
    212 
    213 static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
    214   switch (Pred) {
    215     // These are not representable with any single compare.
    216     case CmpInst::FCMP_FALSE:
    217     case CmpInst::FCMP_TRUE:
    218     // Major concern about the following 6 cases is NaN result. The comparison
    219     // result consists of 4 bits, indicating lt, eq, gt and un (unordered),
    220     // only one of which will be set. The result is generated by fcmpu
    221     // instruction. However, bc instruction only inspects one of the first 3
    222     // bits, so when un is set, bc instruction may jump to to an undesired
    223     // place.
    224     //
    225     // More specifically, if we expect an unordered comparison and un is set, we
    226     // expect to always go to true branch; in such case UEQ, UGT and ULT still
    227     // give false, which are undesired; but UNE, UGE, ULE happen to give true,
    228     // since they are tested by inspecting !eq, !lt, !gt, respectively.
    229     //
    230     // Similarly, for ordered comparison, when un is set, we always expect the
    231     // result to be false. In such case OGT, OLT and OEQ is good, since they are
    232     // actually testing GT, LT, and EQ respectively, which are false. OGE, OLE
    233     // and ONE are tested through !lt, !gt and !eq, and these are true.
    234     case CmpInst::FCMP_UEQ:
    235     case CmpInst::FCMP_UGT:
    236     case CmpInst::FCMP_ULT:
    237     case CmpInst::FCMP_OGE:
    238     case CmpInst::FCMP_OLE:
    239     case CmpInst::FCMP_ONE:
    240     default:
    241       return Optional<PPC::Predicate>();
    242 
    243     case CmpInst::FCMP_OEQ:
    244     case CmpInst::ICMP_EQ:
    245       return PPC::PRED_EQ;
    246 
    247     case CmpInst::FCMP_OGT:
    248     case CmpInst::ICMP_UGT:
    249     case CmpInst::ICMP_SGT:
    250       return PPC::PRED_GT;
    251 
    252     case CmpInst::FCMP_UGE:
    253     case CmpInst::ICMP_UGE:
    254     case CmpInst::ICMP_SGE:
    255       return PPC::PRED_GE;
    256 
    257     case CmpInst::FCMP_OLT:
    258     case CmpInst::ICMP_ULT:
    259     case CmpInst::ICMP_SLT:
    260       return PPC::PRED_LT;
    261 
    262     case CmpInst::FCMP_ULE:
    263     case CmpInst::ICMP_ULE:
    264     case CmpInst::ICMP_SLE:
    265       return PPC::PRED_LE;
    266 
    267     case CmpInst::FCMP_UNE:
    268     case CmpInst::ICMP_NE:
    269       return PPC::PRED_NE;
    270 
    271     case CmpInst::FCMP_ORD:
    272       return PPC::PRED_NU;
    273 
    274     case CmpInst::FCMP_UNO:
    275       return PPC::PRED_UN;
    276   }
    277 }
    278 
    279 // Determine whether the type Ty is simple enough to be handled by
    280 // fast-isel, and return its equivalent machine type in VT.
    281 // FIXME: Copied directly from ARM -- factor into base class?
    282 bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
    283   EVT Evt = TLI.getValueType(DL, Ty, true);
    284 
    285   // Only handle simple types.
    286   if (Evt == MVT::Other || !Evt.isSimple()) return false;
    287   VT = Evt.getSimpleVT();
    288 
    289   // Handle all legal types, i.e. a register that will directly hold this
    290   // value.
    291   return TLI.isTypeLegal(VT);
    292 }
    293 
    294 // Determine whether the type Ty is simple enough to be handled by
    295 // fast-isel as a load target, and return its equivalent machine type in VT.
    296 bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
    297   if (isTypeLegal(Ty, VT)) return true;
    298 
    299   // If this is a type than can be sign or zero-extended to a basic operation
    300   // go ahead and accept it now.
    301   if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
    302     return true;
    303   }
    304 
    305   return false;
    306 }
    307 
    308 bool PPCFastISel::isValueAvailable(const Value *V) const {
    309   if (!isa<Instruction>(V))
    310     return true;
    311 
    312   const auto *I = cast<Instruction>(V);
    313   return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
    314 }
    315 
    316 // Given a value Obj, create an Address object Addr that represents its
    317 // address.  Return false if we can't handle it.
    318 bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
    319   const User *U = nullptr;
    320   unsigned Opcode = Instruction::UserOp1;
    321   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
    322     // Don't walk into other basic blocks unless the object is an alloca from
    323     // another block, otherwise it may not have a virtual register assigned.
    324     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
    325         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
    326       Opcode = I->getOpcode();
    327       U = I;
    328     }
    329   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
    330     Opcode = C->getOpcode();
    331     U = C;
    332   }
    333 
    334   switch (Opcode) {
    335     default:
    336       break;
    337     case Instruction::BitCast:
    338       // Look through bitcasts.
    339       return PPCComputeAddress(U->getOperand(0), Addr);
    340     case Instruction::IntToPtr:
    341       // Look past no-op inttoptrs.
    342       if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
    343           TLI.getPointerTy(DL))
    344         return PPCComputeAddress(U->getOperand(0), Addr);
    345       break;
    346     case Instruction::PtrToInt:
    347       // Look past no-op ptrtoints.
    348       if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
    349         return PPCComputeAddress(U->getOperand(0), Addr);
    350       break;
    351     case Instruction::GetElementPtr: {
    352       Address SavedAddr = Addr;
    353       long TmpOffset = Addr.Offset;
    354 
    355       // Iterate through the GEP folding the constants into offsets where
    356       // we can.
    357       gep_type_iterator GTI = gep_type_begin(U);
    358       for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
    359            II != IE; ++II, ++GTI) {
    360         const Value *Op = *II;
    361         if (StructType *STy = dyn_cast<StructType>(*GTI)) {
    362           const StructLayout *SL = DL.getStructLayout(STy);
    363           unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
    364           TmpOffset += SL->getElementOffset(Idx);
    365         } else {
    366           uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
    367           for (;;) {
    368             if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
    369               // Constant-offset addressing.
    370               TmpOffset += CI->getSExtValue() * S;
    371               break;
    372             }
    373             if (canFoldAddIntoGEP(U, Op)) {
    374               // A compatible add with a constant operand. Fold the constant.
    375               ConstantInt *CI =
    376               cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
    377               TmpOffset += CI->getSExtValue() * S;
    378               // Iterate on the other operand.
    379               Op = cast<AddOperator>(Op)->getOperand(0);
    380               continue;
    381             }
    382             // Unsupported
    383             goto unsupported_gep;
    384           }
    385         }
    386       }
    387 
    388       // Try to grab the base operand now.
    389       Addr.Offset = TmpOffset;
    390       if (PPCComputeAddress(U->getOperand(0), Addr)) return true;
    391 
    392       // We failed, restore everything and try the other options.
    393       Addr = SavedAddr;
    394 
    395       unsupported_gep:
    396       break;
    397     }
    398     case Instruction::Alloca: {
    399       const AllocaInst *AI = cast<AllocaInst>(Obj);
    400       DenseMap<const AllocaInst*, int>::iterator SI =
    401         FuncInfo.StaticAllocaMap.find(AI);
    402       if (SI != FuncInfo.StaticAllocaMap.end()) {
    403         Addr.BaseType = Address::FrameIndexBase;
    404         Addr.Base.FI = SI->second;
    405         return true;
    406       }
    407       break;
    408     }
    409   }
    410 
    411   // FIXME: References to parameters fall through to the behavior
    412   // below.  They should be able to reference a frame index since
    413   // they are stored to the stack, so we can get "ld rx, offset(r1)"
    414   // instead of "addi ry, r1, offset / ld rx, 0(ry)".  Obj will
    415   // just contain the parameter.  Try to handle this with a FI.
    416 
    417   // Try to get this in a register if nothing else has worked.
    418   if (Addr.Base.Reg == 0)
    419     Addr.Base.Reg = getRegForValue(Obj);
    420 
    421   // Prevent assignment of base register to X0, which is inappropriate
    422   // for loads and stores alike.
    423   if (Addr.Base.Reg != 0)
    424     MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass);
    425 
    426   return Addr.Base.Reg != 0;
    427 }
    428 
    429 // Fix up some addresses that can't be used directly.  For example, if
    430 // an offset won't fit in an instruction field, we may need to move it
    431 // into an index register.
    432 void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset,
    433                                      unsigned &IndexReg) {
    434 
    435   // Check whether the offset fits in the instruction field.
    436   if (!isInt<16>(Addr.Offset))
    437     UseOffset = false;
    438 
    439   // If this is a stack pointer and the offset needs to be simplified then
    440   // put the alloca address into a register, set the base type back to
    441   // register and continue. This should almost never happen.
    442   if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
    443     unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
    444     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
    445             ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
    446     Addr.Base.Reg = ResultReg;
    447     Addr.BaseType = Address::RegBase;
    448   }
    449 
    450   if (!UseOffset) {
    451     IntegerType *OffsetTy = Type::getInt64Ty(*Context);
    452     const ConstantInt *Offset =
    453       ConstantInt::getSigned(OffsetTy, (int64_t)(Addr.Offset));
    454     IndexReg = PPCMaterializeInt(Offset, MVT::i64);
    455     assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
    456   }
    457 }
    458 
    459 // Emit a load instruction if possible, returning true if we succeeded,
    460 // otherwise false.  See commentary below for how the register class of
    461 // the load is determined.
    462 bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
    463                               const TargetRegisterClass *RC,
    464                               bool IsZExt, unsigned FP64LoadOpc) {
    465   unsigned Opc;
    466   bool UseOffset = true;
    467 
    468   // If ResultReg is given, it determines the register class of the load.
    469   // Otherwise, RC is the register class to use.  If the result of the
    470   // load isn't anticipated in this block, both may be zero, in which
    471   // case we must make a conservative guess.  In particular, don't assign
    472   // R0 or X0 to the result register, as the result may be used in a load,
    473   // store, add-immediate, or isel that won't permit this.  (Though
    474   // perhaps the spill and reload of live-exit values would handle this?)
    475   const TargetRegisterClass *UseRC =
    476     (ResultReg ? MRI.getRegClass(ResultReg) :
    477      (RC ? RC :
    478       (VT == MVT::f64 ? &PPC::F8RCRegClass :
    479        (VT == MVT::f32 ? &PPC::F4RCRegClass :
    480         (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
    481          &PPC::GPRC_and_GPRC_NOR0RegClass)))));
    482 
    483   bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass);
    484 
    485   switch (VT.SimpleTy) {
    486     default: // e.g., vector types not handled
    487       return false;
    488     case MVT::i8:
    489       Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
    490       break;
    491     case MVT::i16:
    492       Opc = (IsZExt ?
    493              (Is32BitInt ? PPC::LHZ : PPC::LHZ8) :
    494              (Is32BitInt ? PPC::LHA : PPC::LHA8));
    495       break;
    496     case MVT::i32:
    497       Opc = (IsZExt ?
    498              (Is32BitInt ? PPC::LWZ : PPC::LWZ8) :
    499              (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
    500       if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
    501         UseOffset = false;
    502       break;
    503     case MVT::i64:
    504       Opc = PPC::LD;
    505       assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
    506              "64-bit load with 32-bit target??");
    507       UseOffset = ((Addr.Offset & 3) == 0);
    508       break;
    509     case MVT::f32:
    510       Opc = PPC::LFS;
    511       break;
    512     case MVT::f64:
    513       Opc = FP64LoadOpc;
    514       break;
    515   }
    516 
    517   // If necessary, materialize the offset into a register and use
    518   // the indexed form.  Also handle stack pointers with special needs.
    519   unsigned IndexReg = 0;
    520   PPCSimplifyAddress(Addr, UseOffset, IndexReg);
    521 
    522   // If this is a potential VSX load with an offset of 0, a VSX indexed load can
    523   // be used.
    524   bool IsVSSRC = (ResultReg != 0) && isVSSRCRegister(ResultReg);
    525   bool IsVSFRC = (ResultReg != 0) && isVSFRCRegister(ResultReg);
    526   bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
    527   bool Is64VSXLoad = IsVSSRC && Opc == PPC::LFD;
    528   if ((Is32VSXLoad || Is64VSXLoad) &&
    529       (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
    530       (Addr.Offset == 0)) {
    531     UseOffset = false;
    532   }
    533 
    534   if (ResultReg == 0)
    535     ResultReg = createResultReg(UseRC);
    536 
    537   // Note: If we still have a frame index here, we know the offset is
    538   // in range, as otherwise PPCSimplifyAddress would have converted it
    539   // into a RegBase.
    540   if (Addr.BaseType == Address::FrameIndexBase) {
    541     // VSX only provides an indexed load.
    542     if (Is32VSXLoad || Is64VSXLoad) return false;
    543 
    544     MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
    545         MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
    546                                           Addr.Offset),
    547         MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
    548         MFI.getObjectAlignment(Addr.Base.FI));
    549 
    550     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
    551       .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
    552 
    553   // Base reg with offset in range.
    554   } else if (UseOffset) {
    555     // VSX only provides an indexed load.
    556     if (Is32VSXLoad || Is64VSXLoad) return false;
    557 
    558     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
    559       .addImm(Addr.Offset).addReg(Addr.Base.Reg);
    560 
    561   // Indexed form.
    562   } else {
    563     // Get the RR opcode corresponding to the RI one.  FIXME: It would be
    564     // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
    565     // is hard to get at.
    566     switch (Opc) {
    567       default:        llvm_unreachable("Unexpected opcode!");
    568       case PPC::LBZ:    Opc = PPC::LBZX;    break;
    569       case PPC::LBZ8:   Opc = PPC::LBZX8;   break;
    570       case PPC::LHZ:    Opc = PPC::LHZX;    break;
    571       case PPC::LHZ8:   Opc = PPC::LHZX8;   break;
    572       case PPC::LHA:    Opc = PPC::LHAX;    break;
    573       case PPC::LHA8:   Opc = PPC::LHAX8;   break;
    574       case PPC::LWZ:    Opc = PPC::LWZX;    break;
    575       case PPC::LWZ8:   Opc = PPC::LWZX8;   break;
    576       case PPC::LWA:    Opc = PPC::LWAX;    break;
    577       case PPC::LWA_32: Opc = PPC::LWAX_32; break;
    578       case PPC::LD:     Opc = PPC::LDX;     break;
    579       case PPC::LFS:    Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
    580       case PPC::LFD:    Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
    581     }
    582     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
    583       .addReg(Addr.Base.Reg).addReg(IndexReg);
    584   }
    585 
    586   return true;
    587 }
    588 
    589 // Attempt to fast-select a load instruction.
    590 bool PPCFastISel::SelectLoad(const Instruction *I) {
    591   // FIXME: No atomic loads are supported.
    592   if (cast<LoadInst>(I)->isAtomic())
    593     return false;
    594 
    595   // Verify we have a legal type before going any further.
    596   MVT VT;
    597   if (!isLoadTypeLegal(I->getType(), VT))
    598     return false;
    599 
    600   // See if we can handle this address.
    601   Address Addr;
    602   if (!PPCComputeAddress(I->getOperand(0), Addr))
    603     return false;
    604 
    605   // Look at the currently assigned register for this instruction
    606   // to determine the required register class.  This is necessary
    607   // to constrain RA from using R0/X0 when this is not legal.
    608   unsigned AssignedReg = FuncInfo.ValueMap[I];
    609   const TargetRegisterClass *RC =
    610     AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
    611 
    612   unsigned ResultReg = 0;
    613   if (!PPCEmitLoad(VT, ResultReg, Addr, RC))
    614     return false;
    615   updateValueMap(I, ResultReg);
    616   return true;
    617 }
    618 
    619 // Emit a store instruction to store SrcReg at Addr.
    620 bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
    621   assert(SrcReg && "Nothing to store!");
    622   unsigned Opc;
    623   bool UseOffset = true;
    624 
    625   const TargetRegisterClass *RC = MRI.getRegClass(SrcReg);
    626   bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass);
    627 
    628   switch (VT.SimpleTy) {
    629     default: // e.g., vector types not handled
    630       return false;
    631     case MVT::i8:
    632       Opc = Is32BitInt ? PPC::STB : PPC::STB8;
    633       break;
    634     case MVT::i16:
    635       Opc = Is32BitInt ? PPC::STH : PPC::STH8;
    636       break;
    637     case MVT::i32:
    638       assert(Is32BitInt && "Not GPRC for i32??");
    639       Opc = PPC::STW;
    640       break;
    641     case MVT::i64:
    642       Opc = PPC::STD;
    643       UseOffset = ((Addr.Offset & 3) == 0);
    644       break;
    645     case MVT::f32:
    646       Opc = PPC::STFS;
    647       break;
    648     case MVT::f64:
    649       Opc = PPC::STFD;
    650       break;
    651   }
    652 
    653   // If necessary, materialize the offset into a register and use
    654   // the indexed form.  Also handle stack pointers with special needs.
    655   unsigned IndexReg = 0;
    656   PPCSimplifyAddress(Addr, UseOffset, IndexReg);
    657 
    658   // If this is a potential VSX store with an offset of 0, a VSX indexed store
    659   // can be used.
    660   bool IsVSSRC = isVSSRCRegister(SrcReg);
    661   bool IsVSFRC = isVSFRCRegister(SrcReg);
    662   bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
    663   bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
    664   if ((Is32VSXStore || Is64VSXStore) &&
    665       (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
    666       (Addr.Offset == 0)) {
    667     UseOffset = false;
    668   }
    669 
    670   // Note: If we still have a frame index here, we know the offset is
    671   // in range, as otherwise PPCSimplifyAddress would have converted it
    672   // into a RegBase.
    673   if (Addr.BaseType == Address::FrameIndexBase) {
    674     // VSX only provides an indexed store.
    675     if (Is32VSXStore || Is64VSXStore) return false;
    676 
    677     MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
    678         MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
    679                                           Addr.Offset),
    680         MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
    681         MFI.getObjectAlignment(Addr.Base.FI));
    682 
    683     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
    684         .addReg(SrcReg)
    685         .addImm(Addr.Offset)
    686         .addFrameIndex(Addr.Base.FI)
    687         .addMemOperand(MMO);
    688 
    689   // Base reg with offset in range.
    690   } else if (UseOffset) {
    691     // VSX only provides an indexed store.
    692     if (Is32VSXStore || Is64VSXStore) return false;
    693 
    694     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
    695       .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
    696 
    697   // Indexed form.
    698   } else {
    699     // Get the RR opcode corresponding to the RI one.  FIXME: It would be
    700     // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
    701     // is hard to get at.
    702     switch (Opc) {
    703       default:        llvm_unreachable("Unexpected opcode!");
    704       case PPC::STB:  Opc = PPC::STBX;  break;
    705       case PPC::STH : Opc = PPC::STHX;  break;
    706       case PPC::STW : Opc = PPC::STWX;  break;
    707       case PPC::STB8: Opc = PPC::STBX8; break;
    708       case PPC::STH8: Opc = PPC::STHX8; break;
    709       case PPC::STW8: Opc = PPC::STWX8; break;
    710       case PPC::STD:  Opc = PPC::STDX;  break;
    711       case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
    712       case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
    713     }
    714 
    715     auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
    716         .addReg(SrcReg);
    717 
    718     // If we have an index register defined we use it in the store inst,
    719     // otherwise we use X0 as base as it makes the vector instructions to
    720     // use zero in the computation of the effective address regardless the
    721     // content of the register.
    722     if (IndexReg)
    723       MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
    724     else
    725       MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
    726   }
    727 
    728   return true;
    729 }
    730 
    731 // Attempt to fast-select a store instruction.
    732 bool PPCFastISel::SelectStore(const Instruction *I) {
    733   Value *Op0 = I->getOperand(0);
    734   unsigned SrcReg = 0;
    735 
    736   // FIXME: No atomics loads are supported.
    737   if (cast<StoreInst>(I)->isAtomic())
    738     return false;
    739 
    740   // Verify we have a legal type before going any further.
    741   MVT VT;
    742   if (!isLoadTypeLegal(Op0->getType(), VT))
    743     return false;
    744 
    745   // Get the value to be stored into a register.
    746   SrcReg = getRegForValue(Op0);
    747   if (SrcReg == 0)
    748     return false;
    749 
    750   // See if we can handle this address.
    751   Address Addr;
    752   if (!PPCComputeAddress(I->getOperand(1), Addr))
    753     return false;
    754 
    755   if (!PPCEmitStore(VT, SrcReg, Addr))
    756     return false;
    757 
    758   return true;
    759 }
    760 
    761 // Attempt to fast-select a branch instruction.
    762 bool PPCFastISel::SelectBranch(const Instruction *I) {
    763   const BranchInst *BI = cast<BranchInst>(I);
    764   MachineBasicBlock *BrBB = FuncInfo.MBB;
    765   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
    766   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
    767 
    768   // For now, just try the simplest case where it's fed by a compare.
    769   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
    770     if (isValueAvailable(CI)) {
    771       Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate());
    772       if (!OptPPCPred)
    773         return false;
    774 
    775       PPC::Predicate PPCPred = OptPPCPred.getValue();
    776 
    777       // Take advantage of fall-through opportunities.
    778       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
    779         std::swap(TBB, FBB);
    780         PPCPred = PPC::InvertPredicate(PPCPred);
    781       }
    782 
    783       unsigned CondReg = createResultReg(&PPC::CRRCRegClass);
    784 
    785       if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
    786                       CondReg))
    787         return false;
    788 
    789       BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
    790         .addImm(PPCPred).addReg(CondReg).addMBB(TBB);
    791       finishCondBranch(BI->getParent(), TBB, FBB);
    792       return true;
    793     }
    794   } else if (const ConstantInt *CI =
    795              dyn_cast<ConstantInt>(BI->getCondition())) {
    796     uint64_t Imm = CI->getZExtValue();
    797     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
    798     fastEmitBranch(Target, DbgLoc);
    799     return true;
    800   }
    801 
    802   // FIXME: ARM looks for a case where the block containing the compare
    803   // has been split from the block containing the branch.  If this happens,
    804   // there is a vreg available containing the result of the compare.  I'm
    805   // not sure we can do much, as we've lost the predicate information with
    806   // the compare instruction -- we have a 4-bit CR but don't know which bit
    807   // to test here.
    808   return false;
    809 }
    810 
    811 // Attempt to emit a compare of the two source values.  Signed and unsigned
    812 // comparisons are supported.  Return false if we can't handle it.
    813 bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
    814                              bool IsZExt, unsigned DestReg) {
    815   Type *Ty = SrcValue1->getType();
    816   EVT SrcEVT = TLI.getValueType(DL, Ty, true);
    817   if (!SrcEVT.isSimple())
    818     return false;
    819   MVT SrcVT = SrcEVT.getSimpleVT();
    820 
    821   if (SrcVT == MVT::i1 && PPCSubTarget->useCRBits())
    822     return false;
    823 
    824   // See if operand 2 is an immediate encodeable in the compare.
    825   // FIXME: Operands are not in canonical order at -O0, so an immediate
    826   // operand in position 1 is a lost opportunity for now.  We are
    827   // similar to ARM in this regard.
    828   long Imm = 0;
    829   bool UseImm = false;
    830 
    831   // Only 16-bit integer constants can be represented in compares for
    832   // PowerPC.  Others will be materialized into a register.
    833   if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
    834     if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
    835         SrcVT == MVT::i8 || SrcVT == MVT::i1) {
    836       const APInt &CIVal = ConstInt->getValue();
    837       Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue();
    838       if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm)))
    839         UseImm = true;
    840     }
    841   }
    842 
    843   unsigned CmpOpc;
    844   bool NeedsExt = false;
    845   switch (SrcVT.SimpleTy) {
    846     default: return false;
    847     case MVT::f32:
    848       CmpOpc = PPC::FCMPUS;
    849       break;
    850     case MVT::f64:
    851       CmpOpc = PPC::FCMPUD;
    852       break;
    853     case MVT::i1:
    854     case MVT::i8:
    855     case MVT::i16:
    856       NeedsExt = true;
    857       // Intentional fall-through.
    858     case MVT::i32:
    859       if (!UseImm)
    860         CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
    861       else
    862         CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
    863       break;
    864     case MVT::i64:
    865       if (!UseImm)
    866         CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
    867       else
    868         CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
    869       break;
    870   }
    871 
    872   unsigned SrcReg1 = getRegForValue(SrcValue1);
    873   if (SrcReg1 == 0)
    874     return false;
    875 
    876   unsigned SrcReg2 = 0;
    877   if (!UseImm) {
    878     SrcReg2 = getRegForValue(SrcValue2);
    879     if (SrcReg2 == 0)
    880       return false;
    881   }
    882 
    883   if (NeedsExt) {
    884     unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
    885     if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
    886       return false;
    887     SrcReg1 = ExtReg;
    888 
    889     if (!UseImm) {
    890       unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
    891       if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
    892         return false;
    893       SrcReg2 = ExtReg;
    894     }
    895   }
    896 
    897   if (!UseImm)
    898     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
    899       .addReg(SrcReg1).addReg(SrcReg2);
    900   else
    901     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
    902       .addReg(SrcReg1).addImm(Imm);
    903 
    904   return true;
    905 }
    906 
    907 // Attempt to fast-select a floating-point extend instruction.
    908 bool PPCFastISel::SelectFPExt(const Instruction *I) {
    909   Value *Src  = I->getOperand(0);
    910   EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
    911   EVT DestVT = TLI.getValueType(DL, I->getType(), true);
    912 
    913   if (SrcVT != MVT::f32 || DestVT != MVT::f64)
    914     return false;
    915 
    916   unsigned SrcReg = getRegForValue(Src);
    917   if (!SrcReg)
    918     return false;
    919 
    920   // No code is generated for a FP extend.
    921   updateValueMap(I, SrcReg);
    922   return true;
    923 }
    924 
    925 // Attempt to fast-select a floating-point truncate instruction.
    926 bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
    927   Value *Src  = I->getOperand(0);
    928   EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
    929   EVT DestVT = TLI.getValueType(DL, I->getType(), true);
    930 
    931   if (SrcVT != MVT::f64 || DestVT != MVT::f32)
    932     return false;
    933 
    934   unsigned SrcReg = getRegForValue(Src);
    935   if (!SrcReg)
    936     return false;
    937 
    938   // Round the result to single precision.
    939   unsigned DestReg = createResultReg(&PPC::F4RCRegClass);
    940   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg)
    941     .addReg(SrcReg);
    942 
    943   updateValueMap(I, DestReg);
    944   return true;
    945 }
    946 
    947 // Move an i32 or i64 value in a GPR to an f64 value in an FPR.
    948 // FIXME: When direct register moves are implemented (see PowerISA 2.07),
    949 // those should be used instead of moving via a stack slot when the
    950 // subtarget permits.
    951 // FIXME: The code here is sloppy for the 4-byte case.  Can use a 4-byte
    952 // stack slot and 4-byte store/load sequence.  Or just sext the 4-byte
    953 // case to 8 bytes which produces tighter code but wastes stack space.
    954 unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
    955                                      bool IsSigned) {
    956 
    957   // If necessary, extend 32-bit int to 64-bit.
    958   if (SrcVT == MVT::i32) {
    959     unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
    960     if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
    961       return 0;
    962     SrcReg = TmpReg;
    963   }
    964 
    965   // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
    966   Address Addr;
    967   Addr.BaseType = Address::FrameIndexBase;
    968   Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
    969 
    970   // Store the value from the GPR.
    971   if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
    972     return 0;
    973 
    974   // Load the integer value into an FPR.  The kind of load used depends
    975   // on a number of conditions.
    976   unsigned LoadOpc = PPC::LFD;
    977 
    978   if (SrcVT == MVT::i32) {
    979     if (!IsSigned) {
    980       LoadOpc = PPC::LFIWZX;
    981       Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
    982     } else if (PPCSubTarget->hasLFIWAX()) {
    983       LoadOpc = PPC::LFIWAX;
    984       Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
    985     }
    986   }
    987 
    988   const TargetRegisterClass *RC = &PPC::F8RCRegClass;
    989   unsigned ResultReg = 0;
    990   if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
    991     return 0;
    992 
    993   return ResultReg;
    994 }
    995 
    996 // Attempt to fast-select an integer-to-floating-point conversion.
    997 // FIXME: Once fast-isel has better support for VSX, conversions using
    998 //        direct moves should be implemented.
    999 bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
   1000   MVT DstVT;
   1001   Type *DstTy = I->getType();
   1002   if (!isTypeLegal(DstTy, DstVT))
   1003     return false;
   1004 
   1005   if (DstVT != MVT::f32 && DstVT != MVT::f64)
   1006     return false;
   1007 
   1008   Value *Src = I->getOperand(0);
   1009   EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
   1010   if (!SrcEVT.isSimple())
   1011     return false;
   1012 
   1013   MVT SrcVT = SrcEVT.getSimpleVT();
   1014 
   1015   if (SrcVT != MVT::i8  && SrcVT != MVT::i16 &&
   1016       SrcVT != MVT::i32 && SrcVT != MVT::i64)
   1017     return false;
   1018 
   1019   unsigned SrcReg = getRegForValue(Src);
   1020   if (SrcReg == 0)
   1021     return false;
   1022 
   1023   // We can only lower an unsigned convert if we have the newer
   1024   // floating-point conversion operations.
   1025   if (!IsSigned && !PPCSubTarget->hasFPCVT())
   1026     return false;
   1027 
   1028   // FIXME: For now we require the newer floating-point conversion operations
   1029   // (which are present only on P7 and A2 server models) when converting
   1030   // to single-precision float.  Otherwise we have to generate a lot of
   1031   // fiddly code to avoid double rounding.  If necessary, the fiddly code
   1032   // can be found in PPCTargetLowering::LowerINT_TO_FP().
   1033   if (DstVT == MVT::f32 && !PPCSubTarget->hasFPCVT())
   1034     return false;
   1035 
   1036   // Extend the input if necessary.
   1037   if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
   1038     unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
   1039     if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
   1040       return false;
   1041     SrcVT = MVT::i64;
   1042     SrcReg = TmpReg;
   1043   }
   1044 
   1045   // Move the integer value to an FPR.
   1046   unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
   1047   if (FPReg == 0)
   1048     return false;
   1049 
   1050   // Determine the opcode for the conversion.
   1051   const TargetRegisterClass *RC = &PPC::F8RCRegClass;
   1052   unsigned DestReg = createResultReg(RC);
   1053   unsigned Opc;
   1054 
   1055   if (DstVT == MVT::f32)
   1056     Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
   1057   else
   1058     Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
   1059 
   1060   // Generate the convert.
   1061   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
   1062     .addReg(FPReg);
   1063 
   1064   updateValueMap(I, DestReg);
   1065   return true;
   1066 }
   1067 
   1068 // Move the floating-point value in SrcReg into an integer destination
   1069 // register, and return the register (or zero if we can't handle it).
   1070 // FIXME: When direct register moves are implemented (see PowerISA 2.07),
   1071 // those should be used instead of moving via a stack slot when the
   1072 // subtarget permits.
   1073 unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
   1074                                       unsigned SrcReg, bool IsSigned) {
   1075   // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
   1076   // Note that if have STFIWX available, we could use a 4-byte stack
   1077   // slot for i32, but this being fast-isel we'll just go with the
   1078   // easiest code gen possible.
   1079   Address Addr;
   1080   Addr.BaseType = Address::FrameIndexBase;
   1081   Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
   1082 
   1083   // Store the value from the FPR.
   1084   if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
   1085     return 0;
   1086 
   1087   // Reload it into a GPR.  If we want an i32 on big endian, modify the
   1088   // address to have a 4-byte offset so we load from the right place.
   1089   if (VT == MVT::i32)
   1090     Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
   1091 
   1092   // Look at the currently assigned register for this instruction
   1093   // to determine the required register class.
   1094   unsigned AssignedReg = FuncInfo.ValueMap[I];
   1095   const TargetRegisterClass *RC =
   1096     AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
   1097 
   1098   unsigned ResultReg = 0;
   1099   if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
   1100     return 0;
   1101 
   1102   return ResultReg;
   1103 }
   1104 
   1105 // Attempt to fast-select a floating-point-to-integer conversion.
   1106 // FIXME: Once fast-isel has better support for VSX, conversions using
   1107 //        direct moves should be implemented.
   1108 bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
   1109   MVT DstVT, SrcVT;
   1110   Type *DstTy = I->getType();
   1111   if (!isTypeLegal(DstTy, DstVT))
   1112     return false;
   1113 
   1114   if (DstVT != MVT::i32 && DstVT != MVT::i64)
   1115     return false;
   1116 
   1117   // If we don't have FCTIDUZ and we need it, punt to SelectionDAG.
   1118   if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget->hasFPCVT())
   1119     return false;
   1120 
   1121   Value *Src = I->getOperand(0);
   1122   Type *SrcTy = Src->getType();
   1123   if (!isTypeLegal(SrcTy, SrcVT))
   1124     return false;
   1125 
   1126   if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
   1127     return false;
   1128 
   1129   unsigned SrcReg = getRegForValue(Src);
   1130   if (SrcReg == 0)
   1131     return false;
   1132 
   1133   // Convert f32 to f64 if necessary.  This is just a meaningless copy
   1134   // to get the register class right.
   1135   const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
   1136   if (InRC == &PPC::F4RCRegClass) {
   1137     unsigned TmpReg = createResultReg(&PPC::F8RCRegClass);
   1138     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1139             TII.get(TargetOpcode::COPY), TmpReg)
   1140       .addReg(SrcReg);
   1141     SrcReg = TmpReg;
   1142   }
   1143 
   1144   // Determine the opcode for the conversion, which takes place
   1145   // entirely within FPRs.
   1146   unsigned DestReg = createResultReg(&PPC::F8RCRegClass);
   1147   unsigned Opc;
   1148 
   1149   if (DstVT == MVT::i32)
   1150     if (IsSigned)
   1151       Opc = PPC::FCTIWZ;
   1152     else
   1153       Opc = PPCSubTarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
   1154   else
   1155     Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
   1156 
   1157   // Generate the convert.
   1158   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
   1159     .addReg(SrcReg);
   1160 
   1161   // Now move the integer value from a float register to an integer register.
   1162   unsigned IntReg = PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
   1163   if (IntReg == 0)
   1164     return false;
   1165 
   1166   updateValueMap(I, IntReg);
   1167   return true;
   1168 }
   1169 
   1170 // Attempt to fast-select a binary integer operation that isn't already
   1171 // handled automatically.
   1172 bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
   1173   EVT DestVT = TLI.getValueType(DL, I->getType(), true);
   1174 
   1175   // We can get here in the case when we have a binary operation on a non-legal
   1176   // type and the target independent selector doesn't know how to handle it.
   1177   if (DestVT != MVT::i16 && DestVT != MVT::i8)
   1178     return false;
   1179 
   1180   // Look at the currently assigned register for this instruction
   1181   // to determine the required register class.  If there is no register,
   1182   // make a conservative choice (don't assign R0).
   1183   unsigned AssignedReg = FuncInfo.ValueMap[I];
   1184   const TargetRegisterClass *RC =
   1185     (AssignedReg ? MRI.getRegClass(AssignedReg) :
   1186      &PPC::GPRC_and_GPRC_NOR0RegClass);
   1187   bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
   1188 
   1189   unsigned Opc;
   1190   switch (ISDOpcode) {
   1191     default: return false;
   1192     case ISD::ADD:
   1193       Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
   1194       break;
   1195     case ISD::OR:
   1196       Opc = IsGPRC ? PPC::OR : PPC::OR8;
   1197       break;
   1198     case ISD::SUB:
   1199       Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
   1200       break;
   1201   }
   1202 
   1203   unsigned ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
   1204   unsigned SrcReg1 = getRegForValue(I->getOperand(0));
   1205   if (SrcReg1 == 0) return false;
   1206 
   1207   // Handle case of small immediate operand.
   1208   if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) {
   1209     const APInt &CIVal = ConstInt->getValue();
   1210     int Imm = (int)CIVal.getSExtValue();
   1211     bool UseImm = true;
   1212     if (isInt<16>(Imm)) {
   1213       switch (Opc) {
   1214         default:
   1215           llvm_unreachable("Missing case!");
   1216         case PPC::ADD4:
   1217           Opc = PPC::ADDI;
   1218           MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
   1219           break;
   1220         case PPC::ADD8:
   1221           Opc = PPC::ADDI8;
   1222           MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
   1223           break;
   1224         case PPC::OR:
   1225           Opc = PPC::ORI;
   1226           break;
   1227         case PPC::OR8:
   1228           Opc = PPC::ORI8;
   1229           break;
   1230         case PPC::SUBF:
   1231           if (Imm == -32768)
   1232             UseImm = false;
   1233           else {
   1234             Opc = PPC::ADDI;
   1235             MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
   1236             Imm = -Imm;
   1237           }
   1238           break;
   1239         case PPC::SUBF8:
   1240           if (Imm == -32768)
   1241             UseImm = false;
   1242           else {
   1243             Opc = PPC::ADDI8;
   1244             MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
   1245             Imm = -Imm;
   1246           }
   1247           break;
   1248       }
   1249 
   1250       if (UseImm) {
   1251         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
   1252                 ResultReg)
   1253             .addReg(SrcReg1)
   1254             .addImm(Imm);
   1255         updateValueMap(I, ResultReg);
   1256         return true;
   1257       }
   1258     }
   1259   }
   1260 
   1261   // Reg-reg case.
   1262   unsigned SrcReg2 = getRegForValue(I->getOperand(1));
   1263   if (SrcReg2 == 0) return false;
   1264 
   1265   // Reverse operands for subtract-from.
   1266   if (ISDOpcode == ISD::SUB)
   1267     std::swap(SrcReg1, SrcReg2);
   1268 
   1269   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
   1270     .addReg(SrcReg1).addReg(SrcReg2);
   1271   updateValueMap(I, ResultReg);
   1272   return true;
   1273 }
   1274 
   1275 // Handle arguments to a call that we're attempting to fast-select.
   1276 // Return false if the arguments are too complex for us at the moment.
   1277 bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
   1278                                   SmallVectorImpl<unsigned> &ArgRegs,
   1279                                   SmallVectorImpl<MVT> &ArgVTs,
   1280                                   SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
   1281                                   SmallVectorImpl<unsigned> &RegArgs,
   1282                                   CallingConv::ID CC,
   1283                                   unsigned &NumBytes,
   1284                                   bool IsVarArg) {
   1285   SmallVector<CCValAssign, 16> ArgLocs;
   1286   CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
   1287 
   1288   // Reserve space for the linkage area on the stack.
   1289   unsigned LinkageSize = PPCSubTarget->getFrameLowering()->getLinkageSize();
   1290   CCInfo.AllocateStack(LinkageSize, 8);
   1291 
   1292   CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
   1293 
   1294   // Bail out if we can't handle any of the arguments.
   1295   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
   1296     CCValAssign &VA = ArgLocs[I];
   1297     MVT ArgVT = ArgVTs[VA.getValNo()];
   1298 
   1299     // Skip vector arguments for now, as well as long double and
   1300     // uint128_t, and anything that isn't passed in a register.
   1301     if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 ||
   1302         !VA.isRegLoc() || VA.needsCustom())
   1303       return false;
   1304 
   1305     // Skip bit-converted arguments for now.
   1306     if (VA.getLocInfo() == CCValAssign::BCvt)
   1307       return false;
   1308   }
   1309 
   1310   // Get a count of how many bytes are to be pushed onto the stack.
   1311   NumBytes = CCInfo.getNextStackOffset();
   1312 
   1313   // The prolog code of the callee may store up to 8 GPR argument registers to
   1314   // the stack, allowing va_start to index over them in memory if its varargs.
   1315   // Because we cannot tell if this is needed on the caller side, we have to
   1316   // conservatively assume that it is needed.  As such, make sure we have at
   1317   // least enough stack space for the caller to store the 8 GPRs.
   1318   // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
   1319   NumBytes = std::max(NumBytes, LinkageSize + 64);
   1320 
   1321   // Issue CALLSEQ_START.
   1322   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1323           TII.get(TII.getCallFrameSetupOpcode()))
   1324     .addImm(NumBytes);
   1325 
   1326   // Prepare to assign register arguments.  Every argument uses up a
   1327   // GPR protocol register even if it's passed in a floating-point
   1328   // register (unless we're using the fast calling convention).
   1329   unsigned NextGPR = PPC::X3;
   1330   unsigned NextFPR = PPC::F1;
   1331 
   1332   // Process arguments.
   1333   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
   1334     CCValAssign &VA = ArgLocs[I];
   1335     unsigned Arg = ArgRegs[VA.getValNo()];
   1336     MVT ArgVT = ArgVTs[VA.getValNo()];
   1337 
   1338     // Handle argument promotion and bitcasts.
   1339     switch (VA.getLocInfo()) {
   1340       default:
   1341         llvm_unreachable("Unknown loc info!");
   1342       case CCValAssign::Full:
   1343         break;
   1344       case CCValAssign::SExt: {
   1345         MVT DestVT = VA.getLocVT();
   1346         const TargetRegisterClass *RC =
   1347           (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
   1348         unsigned TmpReg = createResultReg(RC);
   1349         if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
   1350           llvm_unreachable("Failed to emit a sext!");
   1351         ArgVT = DestVT;
   1352         Arg = TmpReg;
   1353         break;
   1354       }
   1355       case CCValAssign::AExt:
   1356       case CCValAssign::ZExt: {
   1357         MVT DestVT = VA.getLocVT();
   1358         const TargetRegisterClass *RC =
   1359           (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
   1360         unsigned TmpReg = createResultReg(RC);
   1361         if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
   1362           llvm_unreachable("Failed to emit a zext!");
   1363         ArgVT = DestVT;
   1364         Arg = TmpReg;
   1365         break;
   1366       }
   1367       case CCValAssign::BCvt: {
   1368         // FIXME: Not yet handled.
   1369         llvm_unreachable("Should have bailed before getting here!");
   1370         break;
   1371       }
   1372     }
   1373 
   1374     // Copy this argument to the appropriate register.
   1375     unsigned ArgReg;
   1376     if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
   1377       ArgReg = NextFPR++;
   1378       if (CC != CallingConv::Fast)
   1379         ++NextGPR;
   1380     } else
   1381       ArgReg = NextGPR++;
   1382 
   1383     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1384             TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg);
   1385     RegArgs.push_back(ArgReg);
   1386   }
   1387 
   1388   return true;
   1389 }
   1390 
   1391 // For a call that we've determined we can fast-select, finish the
   1392 // call sequence and generate a copy to obtain the return value (if any).
   1393 bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
   1394   CallingConv::ID CC = CLI.CallConv;
   1395 
   1396   // Issue CallSEQ_END.
   1397   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1398           TII.get(TII.getCallFrameDestroyOpcode()))
   1399     .addImm(NumBytes).addImm(0);
   1400 
   1401   // Next, generate a copy to obtain the return value.
   1402   // FIXME: No multi-register return values yet, though I don't foresee
   1403   // any real difficulties there.
   1404   if (RetVT != MVT::isVoid) {
   1405     SmallVector<CCValAssign, 16> RVLocs;
   1406     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
   1407     CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
   1408     CCValAssign &VA = RVLocs[0];
   1409     assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
   1410     assert(VA.isRegLoc() && "Can only return in registers!");
   1411 
   1412     MVT DestVT = VA.getValVT();
   1413     MVT CopyVT = DestVT;
   1414 
   1415     // Ints smaller than a register still arrive in a full 64-bit
   1416     // register, so make sure we recognize this.
   1417     if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
   1418       CopyVT = MVT::i64;
   1419 
   1420     unsigned SourcePhysReg = VA.getLocReg();
   1421     unsigned ResultReg = 0;
   1422 
   1423     if (RetVT == CopyVT) {
   1424       const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
   1425       ResultReg = createResultReg(CpyRC);
   1426 
   1427       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1428               TII.get(TargetOpcode::COPY), ResultReg)
   1429         .addReg(SourcePhysReg);
   1430 
   1431     // If necessary, round the floating result to single precision.
   1432     } else if (CopyVT == MVT::f64) {
   1433       ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
   1434       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP),
   1435               ResultReg).addReg(SourcePhysReg);
   1436 
   1437     // If only the low half of a general register is needed, generate
   1438     // a GPRC copy instead of a G8RC copy.  (EXTRACT_SUBREG can't be
   1439     // used along the fast-isel path (not lowered), and downstream logic
   1440     // also doesn't like a direct subreg copy on a physical reg.)
   1441     } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
   1442       ResultReg = createResultReg(&PPC::GPRCRegClass);
   1443       // Convert physical register from G8RC to GPRC.
   1444       SourcePhysReg -= PPC::X0 - PPC::R0;
   1445       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1446               TII.get(TargetOpcode::COPY), ResultReg)
   1447         .addReg(SourcePhysReg);
   1448     }
   1449 
   1450     assert(ResultReg && "ResultReg unset!");
   1451     CLI.InRegs.push_back(SourcePhysReg);
   1452     CLI.ResultReg = ResultReg;
   1453     CLI.NumResultRegs = 1;
   1454   }
   1455 
   1456   return true;
   1457 }
   1458 
   1459 bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
   1460   CallingConv::ID CC  = CLI.CallConv;
   1461   bool IsTailCall     = CLI.IsTailCall;
   1462   bool IsVarArg       = CLI.IsVarArg;
   1463   const Value *Callee = CLI.Callee;
   1464   const MCSymbol *Symbol = CLI.Symbol;
   1465 
   1466   if (!Callee && !Symbol)
   1467     return false;
   1468 
   1469   // Allow SelectionDAG isel to handle tail calls.
   1470   if (IsTailCall)
   1471     return false;
   1472 
   1473   // Let SDISel handle vararg functions.
   1474   if (IsVarArg)
   1475     return false;
   1476 
   1477   // Handle simple calls for now, with legal return types and
   1478   // those that can be extended.
   1479   Type *RetTy = CLI.RetTy;
   1480   MVT RetVT;
   1481   if (RetTy->isVoidTy())
   1482     RetVT = MVT::isVoid;
   1483   else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
   1484            RetVT != MVT::i8)
   1485     return false;
   1486   else if (RetVT == MVT::i1 && PPCSubTarget->useCRBits())
   1487     // We can't handle boolean returns when CR bits are in use.
   1488     return false;
   1489 
   1490   // FIXME: No multi-register return values yet.
   1491   if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
   1492       RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
   1493       RetVT != MVT::f64) {
   1494     SmallVector<CCValAssign, 16> RVLocs;
   1495     CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
   1496     CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
   1497     if (RVLocs.size() > 1)
   1498       return false;
   1499   }
   1500 
   1501   // Bail early if more than 8 arguments, as we only currently
   1502   // handle arguments passed in registers.
   1503   unsigned NumArgs = CLI.OutVals.size();
   1504   if (NumArgs > 8)
   1505     return false;
   1506 
   1507   // Set up the argument vectors.
   1508   SmallVector<Value*, 8> Args;
   1509   SmallVector<unsigned, 8> ArgRegs;
   1510   SmallVector<MVT, 8> ArgVTs;
   1511   SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
   1512 
   1513   Args.reserve(NumArgs);
   1514   ArgRegs.reserve(NumArgs);
   1515   ArgVTs.reserve(NumArgs);
   1516   ArgFlags.reserve(NumArgs);
   1517 
   1518   for (unsigned i = 0, ie = NumArgs; i != ie; ++i) {
   1519     // Only handle easy calls for now.  It would be reasonably easy
   1520     // to handle <= 8-byte structures passed ByVal in registers, but we
   1521     // have to ensure they are right-justified in the register.
   1522     ISD::ArgFlagsTy Flags = CLI.OutFlags[i];
   1523     if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal())
   1524       return false;
   1525 
   1526     Value *ArgValue = CLI.OutVals[i];
   1527     Type *ArgTy = ArgValue->getType();
   1528     MVT ArgVT;
   1529     if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
   1530       return false;
   1531 
   1532     if (ArgVT.isVector())
   1533       return false;
   1534 
   1535     unsigned Arg = getRegForValue(ArgValue);
   1536     if (Arg == 0)
   1537       return false;
   1538 
   1539     Args.push_back(ArgValue);
   1540     ArgRegs.push_back(Arg);
   1541     ArgVTs.push_back(ArgVT);
   1542     ArgFlags.push_back(Flags);
   1543   }
   1544 
   1545   // Process the arguments.
   1546   SmallVector<unsigned, 8> RegArgs;
   1547   unsigned NumBytes;
   1548 
   1549   if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
   1550                        RegArgs, CC, NumBytes, IsVarArg))
   1551     return false;
   1552 
   1553   MachineInstrBuilder MIB;
   1554   // FIXME: No handling for function pointers yet.  This requires
   1555   // implementing the function descriptor (OPD) setup.
   1556   const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
   1557   if (!GV) {
   1558     // patchpoints are a special case; they always dispatch to a pointer value.
   1559     // However, we don't actually want to generate the indirect call sequence
   1560     // here (that will be generated, as necessary, during asm printing), and
   1561     // the call we generate here will be erased by FastISel::selectPatchpoint,
   1562     // so don't try very hard...
   1563     if (CLI.IsPatchPoint)
   1564       MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::NOP));
   1565     else
   1566       return false;
   1567   } else {
   1568     // Build direct call with NOP for TOC restore.
   1569     // FIXME: We can and should optimize away the NOP for local calls.
   1570     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1571                   TII.get(PPC::BL8_NOP));
   1572     // Add callee.
   1573     MIB.addGlobalAddress(GV);
   1574   }
   1575 
   1576   // Add implicit physical register uses to the call.
   1577   for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
   1578     MIB.addReg(RegArgs[II], RegState::Implicit);
   1579 
   1580   // Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
   1581   // into the call.
   1582   PPCFuncInfo->setUsesTOCBasePtr();
   1583   MIB.addReg(PPC::X2, RegState::Implicit);
   1584 
   1585   // Add a register mask with the call-preserved registers.  Proper
   1586   // defs for return values will be added by setPhysRegsDeadExcept().
   1587   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
   1588 
   1589   CLI.Call = MIB;
   1590 
   1591   // Finish off the call including any return values.
   1592   return finishCall(RetVT, CLI, NumBytes);
   1593 }
   1594 
   1595 // Attempt to fast-select a return instruction.
   1596 bool PPCFastISel::SelectRet(const Instruction *I) {
   1597 
   1598   if (!FuncInfo.CanLowerReturn)
   1599     return false;
   1600 
   1601   if (TLI.supportSplitCSR(FuncInfo.MF))
   1602     return false;
   1603 
   1604   const ReturnInst *Ret = cast<ReturnInst>(I);
   1605   const Function &F = *I->getParent()->getParent();
   1606 
   1607   // Build a list of return value registers.
   1608   SmallVector<unsigned, 4> RetRegs;
   1609   CallingConv::ID CC = F.getCallingConv();
   1610 
   1611   if (Ret->getNumOperands() > 0) {
   1612     SmallVector<ISD::OutputArg, 4> Outs;
   1613     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
   1614 
   1615     // Analyze operands of the call, assigning locations to each operand.
   1616     SmallVector<CCValAssign, 16> ValLocs;
   1617     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context);
   1618     CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
   1619     const Value *RV = Ret->getOperand(0);
   1620 
   1621     // FIXME: Only one output register for now.
   1622     if (ValLocs.size() > 1)
   1623       return false;
   1624 
   1625     // Special case for returning a constant integer of any size - materialize
   1626     // the constant as an i64 and copy it to the return register.
   1627     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RV)) {
   1628       CCValAssign &VA = ValLocs[0];
   1629 
   1630       unsigned RetReg = VA.getLocReg();
   1631       // We still need to worry about properly extending the sign. For example,
   1632       // we could have only a single bit or a constant that needs zero
   1633       // extension rather than sign extension. Make sure we pass the return
   1634       // value extension property to integer materialization.
   1635       unsigned SrcReg =
   1636           PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() != CCValAssign::ZExt);
   1637 
   1638       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1639             TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
   1640 
   1641       RetRegs.push_back(RetReg);
   1642 
   1643     } else {
   1644       unsigned Reg = getRegForValue(RV);
   1645 
   1646       if (Reg == 0)
   1647         return false;
   1648 
   1649       // Copy the result values into the output registers.
   1650       for (unsigned i = 0; i < ValLocs.size(); ++i) {
   1651 
   1652         CCValAssign &VA = ValLocs[i];
   1653         assert(VA.isRegLoc() && "Can only return in registers!");
   1654         RetRegs.push_back(VA.getLocReg());
   1655         unsigned SrcReg = Reg + VA.getValNo();
   1656 
   1657         EVT RVEVT = TLI.getValueType(DL, RV->getType());
   1658         if (!RVEVT.isSimple())
   1659           return false;
   1660         MVT RVVT = RVEVT.getSimpleVT();
   1661         MVT DestVT = VA.getLocVT();
   1662 
   1663         if (RVVT != DestVT && RVVT != MVT::i8 &&
   1664             RVVT != MVT::i16 && RVVT != MVT::i32)
   1665           return false;
   1666 
   1667         if (RVVT != DestVT) {
   1668           switch (VA.getLocInfo()) {
   1669             default:
   1670               llvm_unreachable("Unknown loc info!");
   1671             case CCValAssign::Full:
   1672               llvm_unreachable("Full value assign but types don't match?");
   1673             case CCValAssign::AExt:
   1674             case CCValAssign::ZExt: {
   1675               const TargetRegisterClass *RC =
   1676                 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
   1677               unsigned TmpReg = createResultReg(RC);
   1678               if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
   1679                 return false;
   1680               SrcReg = TmpReg;
   1681               break;
   1682             }
   1683             case CCValAssign::SExt: {
   1684               const TargetRegisterClass *RC =
   1685                 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
   1686               unsigned TmpReg = createResultReg(RC);
   1687               if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
   1688                 return false;
   1689               SrcReg = TmpReg;
   1690               break;
   1691             }
   1692           }
   1693         }
   1694 
   1695         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1696                 TII.get(TargetOpcode::COPY), RetRegs[i])
   1697           .addReg(SrcReg);
   1698       }
   1699     }
   1700   }
   1701 
   1702   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1703                                     TII.get(PPC::BLR8));
   1704 
   1705   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
   1706     MIB.addReg(RetRegs[i], RegState::Implicit);
   1707 
   1708   return true;
   1709 }
   1710 
   1711 // Attempt to emit an integer extend of SrcReg into DestReg.  Both
   1712 // signed and zero extensions are supported.  Return false if we
   1713 // can't handle it.
   1714 bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
   1715                                 unsigned DestReg, bool IsZExt) {
   1716   if (DestVT != MVT::i32 && DestVT != MVT::i64)
   1717     return false;
   1718   if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
   1719     return false;
   1720 
   1721   // Signed extensions use EXTSB, EXTSH, EXTSW.
   1722   if (!IsZExt) {
   1723     unsigned Opc;
   1724     if (SrcVT == MVT::i8)
   1725       Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
   1726     else if (SrcVT == MVT::i16)
   1727       Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
   1728     else {
   1729       assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
   1730       Opc = PPC::EXTSW_32_64;
   1731     }
   1732     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
   1733       .addReg(SrcReg);
   1734 
   1735   // Unsigned 32-bit extensions use RLWINM.
   1736   } else if (DestVT == MVT::i32) {
   1737     unsigned MB;
   1738     if (SrcVT == MVT::i8)
   1739       MB = 24;
   1740     else {
   1741       assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
   1742       MB = 16;
   1743     }
   1744     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLWINM),
   1745             DestReg)
   1746       .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
   1747 
   1748   // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
   1749   } else {
   1750     unsigned MB;
   1751     if (SrcVT == MVT::i8)
   1752       MB = 56;
   1753     else if (SrcVT == MVT::i16)
   1754       MB = 48;
   1755     else
   1756       MB = 32;
   1757     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1758             TII.get(PPC::RLDICL_32_64), DestReg)
   1759       .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
   1760   }
   1761 
   1762   return true;
   1763 }
   1764 
   1765 // Attempt to fast-select an indirect branch instruction.
   1766 bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
   1767   unsigned AddrReg = getRegForValue(I->getOperand(0));
   1768   if (AddrReg == 0)
   1769     return false;
   1770 
   1771   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::MTCTR8))
   1772     .addReg(AddrReg);
   1773   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCTR8));
   1774 
   1775   const IndirectBrInst *IB = cast<IndirectBrInst>(I);
   1776   for (const BasicBlock *SuccBB : IB->successors())
   1777     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]);
   1778 
   1779   return true;
   1780 }
   1781 
   1782 // Attempt to fast-select an integer truncate instruction.
   1783 bool PPCFastISel::SelectTrunc(const Instruction *I) {
   1784   Value *Src  = I->getOperand(0);
   1785   EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
   1786   EVT DestVT = TLI.getValueType(DL, I->getType(), true);
   1787 
   1788   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
   1789     return false;
   1790 
   1791   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
   1792     return false;
   1793 
   1794   unsigned SrcReg = getRegForValue(Src);
   1795   if (!SrcReg)
   1796     return false;
   1797 
   1798   // The only interesting case is when we need to switch register classes.
   1799   if (SrcVT == MVT::i64) {
   1800     unsigned ResultReg = createResultReg(&PPC::GPRCRegClass);
   1801     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1802             TII.get(TargetOpcode::COPY),
   1803             ResultReg).addReg(SrcReg, 0, PPC::sub_32);
   1804     SrcReg = ResultReg;
   1805   }
   1806 
   1807   updateValueMap(I, SrcReg);
   1808   return true;
   1809 }
   1810 
   1811 // Attempt to fast-select an integer extend instruction.
   1812 bool PPCFastISel::SelectIntExt(const Instruction *I) {
   1813   Type *DestTy = I->getType();
   1814   Value *Src = I->getOperand(0);
   1815   Type *SrcTy = Src->getType();
   1816 
   1817   bool IsZExt = isa<ZExtInst>(I);
   1818   unsigned SrcReg = getRegForValue(Src);
   1819   if (!SrcReg) return false;
   1820 
   1821   EVT SrcEVT, DestEVT;
   1822   SrcEVT = TLI.getValueType(DL, SrcTy, true);
   1823   DestEVT = TLI.getValueType(DL, DestTy, true);
   1824   if (!SrcEVT.isSimple())
   1825     return false;
   1826   if (!DestEVT.isSimple())
   1827     return false;
   1828 
   1829   MVT SrcVT = SrcEVT.getSimpleVT();
   1830   MVT DestVT = DestEVT.getSimpleVT();
   1831 
   1832   // If we know the register class needed for the result of this
   1833   // instruction, use it.  Otherwise pick the register class of the
   1834   // correct size that does not contain X0/R0, since we don't know
   1835   // whether downstream uses permit that assignment.
   1836   unsigned AssignedReg = FuncInfo.ValueMap[I];
   1837   const TargetRegisterClass *RC =
   1838     (AssignedReg ? MRI.getRegClass(AssignedReg) :
   1839      (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
   1840       &PPC::GPRC_and_GPRC_NOR0RegClass));
   1841   unsigned ResultReg = createResultReg(RC);
   1842 
   1843   if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
   1844     return false;
   1845 
   1846   updateValueMap(I, ResultReg);
   1847   return true;
   1848 }
   1849 
   1850 // Attempt to fast-select an instruction that wasn't handled by
   1851 // the table-generated machinery.
   1852 bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
   1853 
   1854   switch (I->getOpcode()) {
   1855     case Instruction::Load:
   1856       return SelectLoad(I);
   1857     case Instruction::Store:
   1858       return SelectStore(I);
   1859     case Instruction::Br:
   1860       return SelectBranch(I);
   1861     case Instruction::IndirectBr:
   1862       return SelectIndirectBr(I);
   1863     case Instruction::FPExt:
   1864       return SelectFPExt(I);
   1865     case Instruction::FPTrunc:
   1866       return SelectFPTrunc(I);
   1867     case Instruction::SIToFP:
   1868       return SelectIToFP(I, /*IsSigned*/ true);
   1869     case Instruction::UIToFP:
   1870       return SelectIToFP(I, /*IsSigned*/ false);
   1871     case Instruction::FPToSI:
   1872       return SelectFPToI(I, /*IsSigned*/ true);
   1873     case Instruction::FPToUI:
   1874       return SelectFPToI(I, /*IsSigned*/ false);
   1875     case Instruction::Add:
   1876       return SelectBinaryIntOp(I, ISD::ADD);
   1877     case Instruction::Or:
   1878       return SelectBinaryIntOp(I, ISD::OR);
   1879     case Instruction::Sub:
   1880       return SelectBinaryIntOp(I, ISD::SUB);
   1881     case Instruction::Call:
   1882       return selectCall(I);
   1883     case Instruction::Ret:
   1884       return SelectRet(I);
   1885     case Instruction::Trunc:
   1886       return SelectTrunc(I);
   1887     case Instruction::ZExt:
   1888     case Instruction::SExt:
   1889       return SelectIntExt(I);
   1890     // Here add other flavors of Instruction::XXX that automated
   1891     // cases don't catch.  For example, switches are terminators
   1892     // that aren't yet handled.
   1893     default:
   1894       break;
   1895   }
   1896   return false;
   1897 }
   1898 
   1899 // Materialize a floating-point constant into a register, and return
   1900 // the register number (or zero if we failed to handle it).
   1901 unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
   1902   // No plans to handle long double here.
   1903   if (VT != MVT::f32 && VT != MVT::f64)
   1904     return 0;
   1905 
   1906   // All FP constants are loaded from the constant pool.
   1907   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
   1908   assert(Align > 0 && "Unexpectedly missing alignment information!");
   1909   unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
   1910   unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
   1911   CodeModel::Model CModel = TM.getCodeModel();
   1912 
   1913   MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
   1914       MachinePointerInfo::getConstantPool(*FuncInfo.MF),
   1915       MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Align);
   1916 
   1917   unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD;
   1918   unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
   1919 
   1920   PPCFuncInfo->setUsesTOCBasePtr();
   1921   // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
   1922   if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) {
   1923     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocCPT),
   1924             TmpReg)
   1925       .addConstantPoolIndex(Idx).addReg(PPC::X2);
   1926     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
   1927       .addImm(0).addReg(TmpReg).addMemOperand(MMO);
   1928   } else {
   1929     // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)).
   1930     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
   1931             TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
   1932     // But for large code model, we must generate a LDtocL followed
   1933     // by the LF[SD].
   1934     if (CModel == CodeModel::Large) {
   1935       unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
   1936       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
   1937               TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
   1938       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
   1939         .addImm(0).addReg(TmpReg2);
   1940     } else
   1941       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
   1942         .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
   1943         .addReg(TmpReg)
   1944         .addMemOperand(MMO);
   1945   }
   1946 
   1947   return DestReg;
   1948 }
   1949 
   1950 // Materialize the address of a global value into a register, and return
   1951 // the register number (or zero if we failed to handle it).
   1952 unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
   1953   assert(VT == MVT::i64 && "Non-address!");
   1954   const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
   1955   unsigned DestReg = createResultReg(RC);
   1956 
   1957   // Global values may be plain old object addresses, TLS object
   1958   // addresses, constant pool entries, or jump tables.  How we generate
   1959   // code for these may depend on small, medium, or large code model.
   1960   CodeModel::Model CModel = TM.getCodeModel();
   1961 
   1962   // FIXME: Jump tables are not yet required because fast-isel doesn't
   1963   // handle switches; if that changes, we need them as well.  For now,
   1964   // what follows assumes everything's a generic (or TLS) global address.
   1965 
   1966   // FIXME: We don't yet handle the complexity of TLS.
   1967   if (GV->isThreadLocal())
   1968     return 0;
   1969 
   1970   PPCFuncInfo->setUsesTOCBasePtr();
   1971   // For small code model, generate a simple TOC load.
   1972   if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault)
   1973     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtoc),
   1974             DestReg)
   1975         .addGlobalAddress(GV)
   1976         .addReg(PPC::X2);
   1977   else {
   1978     // If the address is an externally defined symbol, a symbol with common
   1979     // or externally available linkage, a non-local function address, or a
   1980     // jump table address (not yet needed), or if we are generating code
   1981     // for large code model, we generate:
   1982     //       LDtocL(GV, ADDIStocHA(%X2, GV))
   1983     // Otherwise we generate:
   1984     //       ADDItocL(ADDIStocHA(%X2, GV), GV)
   1985     // Either way, start with the ADDIStocHA:
   1986     unsigned HighPartReg = createResultReg(RC);
   1987     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
   1988             HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
   1989 
   1990     unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
   1991     if (GVFlags & PPCII::MO_NLP_FLAG) {
   1992       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
   1993               DestReg).addGlobalAddress(GV).addReg(HighPartReg);
   1994     } else {
   1995       // Otherwise generate the ADDItocL.
   1996       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDItocL),
   1997               DestReg).addReg(HighPartReg).addGlobalAddress(GV);
   1998     }
   1999   }
   2000 
   2001   return DestReg;
   2002 }
   2003 
   2004 // Materialize a 32-bit integer constant into a register, and return
   2005 // the register number (or zero if we failed to handle it).
   2006 unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
   2007                                              const TargetRegisterClass *RC) {
   2008   unsigned Lo = Imm & 0xFFFF;
   2009   unsigned Hi = (Imm >> 16) & 0xFFFF;
   2010 
   2011   unsigned ResultReg = createResultReg(RC);
   2012   bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
   2013 
   2014   if (isInt<16>(Imm))
   2015     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2016             TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
   2017       .addImm(Imm);
   2018   else if (Lo) {
   2019     // Both Lo and Hi have nonzero bits.
   2020     unsigned TmpReg = createResultReg(RC);
   2021     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2022             TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
   2023       .addImm(Hi);
   2024     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2025             TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
   2026       .addReg(TmpReg).addImm(Lo);
   2027   } else
   2028     // Just Hi bits.
   2029     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2030             TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
   2031       .addImm(Hi);
   2032 
   2033   return ResultReg;
   2034 }
   2035 
   2036 // Materialize a 64-bit integer constant into a register, and return
   2037 // the register number (or zero if we failed to handle it).
   2038 unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
   2039                                              const TargetRegisterClass *RC) {
   2040   unsigned Remainder = 0;
   2041   unsigned Shift = 0;
   2042 
   2043   // If the value doesn't fit in 32 bits, see if we can shift it
   2044   // so that it fits in 32 bits.
   2045   if (!isInt<32>(Imm)) {
   2046     Shift = countTrailingZeros<uint64_t>(Imm);
   2047     int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
   2048 
   2049     if (isInt<32>(ImmSh))
   2050       Imm = ImmSh;
   2051     else {
   2052       Remainder = Imm;
   2053       Shift = 32;
   2054       Imm >>= 32;
   2055     }
   2056   }
   2057 
   2058   // Handle the high-order 32 bits (if shifted) or the whole 32 bits
   2059   // (if not shifted).
   2060   unsigned TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
   2061   if (!Shift)
   2062     return TmpReg1;
   2063 
   2064   // If upper 32 bits were not zero, we've built them and need to shift
   2065   // them into place.
   2066   unsigned TmpReg2;
   2067   if (Imm) {
   2068     TmpReg2 = createResultReg(RC);
   2069     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLDICR),
   2070             TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift);
   2071   } else
   2072     TmpReg2 = TmpReg1;
   2073 
   2074   unsigned TmpReg3, Hi, Lo;
   2075   if ((Hi = (Remainder >> 16) & 0xFFFF)) {
   2076     TmpReg3 = createResultReg(RC);
   2077     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORIS8),
   2078             TmpReg3).addReg(TmpReg2).addImm(Hi);
   2079   } else
   2080     TmpReg3 = TmpReg2;
   2081 
   2082   if ((Lo = Remainder & 0xFFFF)) {
   2083     unsigned ResultReg = createResultReg(RC);
   2084     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORI8),
   2085             ResultReg).addReg(TmpReg3).addImm(Lo);
   2086     return ResultReg;
   2087   }
   2088 
   2089   return TmpReg3;
   2090 }
   2091 
   2092 // Materialize an integer constant into a register, and return
   2093 // the register number (or zero if we failed to handle it).
   2094 unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
   2095                                         bool UseSExt) {
   2096   // If we're using CR bit registers for i1 values, handle that as a special
   2097   // case first.
   2098   if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
   2099     unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
   2100     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2101             TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
   2102     return ImmReg;
   2103   }
   2104 
   2105   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
   2106       VT != MVT::i1)
   2107     return 0;
   2108 
   2109   const TargetRegisterClass *RC =
   2110       ((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass);
   2111   int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue();
   2112 
   2113   // If the constant is in range, use a load-immediate.
   2114   // Since LI will sign extend the constant we need to make sure that for
   2115   // our zeroext constants that the sign extended constant fits into 16-bits -
   2116   // a range of 0..0x7fff.
   2117   if (isInt<16>(Imm)) {
   2118     unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
   2119     unsigned ImmReg = createResultReg(RC);
   2120     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
   2121         .addImm(Imm);
   2122     return ImmReg;
   2123   }
   2124 
   2125   // Construct the constant piecewise.
   2126   if (VT == MVT::i64)
   2127     return PPCMaterialize64BitInt(Imm, RC);
   2128   else if (VT == MVT::i32)
   2129     return PPCMaterialize32BitInt(Imm, RC);
   2130 
   2131   return 0;
   2132 }
   2133 
   2134 // Materialize a constant into a register, and return the register
   2135 // number (or zero if we failed to handle it).
   2136 unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) {
   2137   EVT CEVT = TLI.getValueType(DL, C->getType(), true);
   2138 
   2139   // Only handle simple types.
   2140   if (!CEVT.isSimple()) return 0;
   2141   MVT VT = CEVT.getSimpleVT();
   2142 
   2143   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
   2144     return PPCMaterializeFP(CFP, VT);
   2145   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
   2146     return PPCMaterializeGV(GV, VT);
   2147   else if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
   2148     return PPCMaterializeInt(CI, VT, VT != MVT::i1);
   2149 
   2150   return 0;
   2151 }
   2152 
   2153 // Materialize the address created by an alloca into a register, and
   2154 // return the register number (or zero if we failed to handle it).
   2155 unsigned PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
   2156   // Don't handle dynamic allocas.
   2157   if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
   2158 
   2159   MVT VT;
   2160   if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
   2161 
   2162   DenseMap<const AllocaInst*, int>::iterator SI =
   2163     FuncInfo.StaticAllocaMap.find(AI);
   2164 
   2165   if (SI != FuncInfo.StaticAllocaMap.end()) {
   2166     unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
   2167     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
   2168             ResultReg).addFrameIndex(SI->second).addImm(0);
   2169     return ResultReg;
   2170   }
   2171 
   2172   return 0;
   2173 }
   2174 
   2175 // Fold loads into extends when possible.
   2176 // FIXME: We can have multiple redundant extend/trunc instructions
   2177 // following a load.  The folding only picks up one.  Extend this
   2178 // to check subsequent instructions for the same pattern and remove
   2179 // them.  Thus ResultReg should be the def reg for the last redundant
   2180 // instruction in a chain, and all intervening instructions can be
   2181 // removed from parent.  Change test/CodeGen/PowerPC/fast-isel-fold.ll
   2182 // to add ELF64-NOT: rldicl to the appropriate tests when this works.
   2183 bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
   2184                                       const LoadInst *LI) {
   2185   // Verify we have a legal type before going any further.
   2186   MVT VT;
   2187   if (!isLoadTypeLegal(LI->getType(), VT))
   2188     return false;
   2189 
   2190   // Combine load followed by zero- or sign-extend.
   2191   bool IsZExt = false;
   2192   switch(MI->getOpcode()) {
   2193     default:
   2194       return false;
   2195 
   2196     case PPC::RLDICL:
   2197     case PPC::RLDICL_32_64: {
   2198       IsZExt = true;
   2199       unsigned MB = MI->getOperand(3).getImm();
   2200       if ((VT == MVT::i8 && MB <= 56) ||
   2201           (VT == MVT::i16 && MB <= 48) ||
   2202           (VT == MVT::i32 && MB <= 32))
   2203         break;
   2204       return false;
   2205     }
   2206 
   2207     case PPC::RLWINM:
   2208     case PPC::RLWINM8: {
   2209       IsZExt = true;
   2210       unsigned MB = MI->getOperand(3).getImm();
   2211       if ((VT == MVT::i8 && MB <= 24) ||
   2212           (VT == MVT::i16 && MB <= 16))
   2213         break;
   2214       return false;
   2215     }
   2216 
   2217     case PPC::EXTSB:
   2218     case PPC::EXTSB8:
   2219     case PPC::EXTSB8_32_64:
   2220       /* There is no sign-extending load-byte instruction. */
   2221       return false;
   2222 
   2223     case PPC::EXTSH:
   2224     case PPC::EXTSH8:
   2225     case PPC::EXTSH8_32_64: {
   2226       if (VT != MVT::i16 && VT != MVT::i8)
   2227         return false;
   2228       break;
   2229     }
   2230 
   2231     case PPC::EXTSW:
   2232     case PPC::EXTSW_32_64: {
   2233       if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
   2234         return false;
   2235       break;
   2236     }
   2237   }
   2238 
   2239   // See if we can handle this address.
   2240   Address Addr;
   2241   if (!PPCComputeAddress(LI->getOperand(0), Addr))
   2242     return false;
   2243 
   2244   unsigned ResultReg = MI->getOperand(0).getReg();
   2245 
   2246   if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt))
   2247     return false;
   2248 
   2249   MI->eraseFromParent();
   2250   return true;
   2251 }
   2252 
   2253 // Attempt to lower call arguments in a faster way than done by
   2254 // the selection DAG code.
   2255 bool PPCFastISel::fastLowerArguments() {
   2256   // Defer to normal argument lowering for now.  It's reasonably
   2257   // efficient.  Consider doing something like ARM to handle the
   2258   // case where all args fit in registers, no varargs, no float
   2259   // or vector args.
   2260   return false;
   2261 }
   2262 
   2263 // Handle materializing integer constants into a register.  This is not
   2264 // automatically generated for PowerPC, so must be explicitly created here.
   2265 unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
   2266 
   2267   if (Opc != ISD::Constant)
   2268     return 0;
   2269 
   2270   // If we're using CR bit registers for i1 values, handle that as a special
   2271   // case first.
   2272   if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
   2273     unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
   2274     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2275             TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg);
   2276     return ImmReg;
   2277   }
   2278 
   2279   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
   2280       VT != MVT::i8 && VT != MVT::i1)
   2281     return 0;
   2282 
   2283   const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
   2284                                    &PPC::GPRCRegClass);
   2285   if (VT == MVT::i64)
   2286     return PPCMaterialize64BitInt(Imm, RC);
   2287   else
   2288     return PPCMaterialize32BitInt(Imm, RC);
   2289 }
   2290 
   2291 // Override for ADDI and ADDI8 to set the correct register class
   2292 // on RHS operand 0.  The automatic infrastructure naively assumes
   2293 // GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
   2294 // for these cases.  At the moment, none of the other automatically
   2295 // generated RI instructions require special treatment.  However, once
   2296 // SelectSelect is implemented, "isel" requires similar handling.
   2297 //
   2298 // Also be conservative about the output register class.  Avoid
   2299 // assigning R0 or X0 to the output register for GPRC and G8RC
   2300 // register classes, as any such result could be used in ADDI, etc.,
   2301 // where those regs have another meaning.
   2302 unsigned PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
   2303                                       const TargetRegisterClass *RC,
   2304                                       unsigned Op0, bool Op0IsKill,
   2305                                       uint64_t Imm) {
   2306   if (MachineInstOpcode == PPC::ADDI)
   2307     MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass);
   2308   else if (MachineInstOpcode == PPC::ADDI8)
   2309     MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass);
   2310 
   2311   const TargetRegisterClass *UseRC =
   2312     (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
   2313      (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
   2314 
   2315   return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC,
   2316                                    Op0, Op0IsKill, Imm);
   2317 }
   2318 
   2319 // Override for instructions with one register operand to avoid use of
   2320 // R0/X0.  The automatic infrastructure isn't aware of the context so
   2321 // we must be conservative.
   2322 unsigned PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
   2323                                      const TargetRegisterClass* RC,
   2324                                      unsigned Op0, bool Op0IsKill) {
   2325   const TargetRegisterClass *UseRC =
   2326     (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
   2327      (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
   2328 
   2329   return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill);
   2330 }
   2331 
   2332 // Override for instructions with two register operands to avoid use
   2333 // of R0/X0.  The automatic infrastructure isn't aware of the context
   2334 // so we must be conservative.
   2335 unsigned PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
   2336                                       const TargetRegisterClass* RC,
   2337                                       unsigned Op0, bool Op0IsKill,
   2338                                       unsigned Op1, bool Op1IsKill) {
   2339   const TargetRegisterClass *UseRC =
   2340     (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
   2341      (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
   2342 
   2343   return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill,
   2344                                    Op1, Op1IsKill);
   2345 }
   2346 
   2347 namespace llvm {
   2348   // Create the fast instruction selector for PowerPC64 ELF.
   2349   FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
   2350                                 const TargetLibraryInfo *LibInfo) {
   2351     // Only available on 64-bit ELF for now.
   2352     const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
   2353     if (Subtarget.isPPC64() && Subtarget.isSVR4ABI())
   2354       return new PPCFastISel(FuncInfo, LibInfo);
   2355     return nullptr;
   2356   }
   2357 }
   2358