Home | History | Annotate | Download | only in AArch64
      1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the AArch64-specific support for the FastISel class. Some
     11 // of the target-specific code is generated by tablegen in the file
     12 // AArch64GenFastISel.inc, which is #included here.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "AArch64.h"
     17 #include "AArch64CallingConvention.h"
     18 #include "AArch64Subtarget.h"
     19 #include "AArch64TargetMachine.h"
     20 #include "MCTargetDesc/AArch64AddressingModes.h"
     21 #include "llvm/Analysis/BranchProbabilityInfo.h"
     22 #include "llvm/CodeGen/CallingConvLower.h"
     23 #include "llvm/CodeGen/FastISel.h"
     24 #include "llvm/CodeGen/FunctionLoweringInfo.h"
     25 #include "llvm/CodeGen/MachineConstantPool.h"
     26 #include "llvm/CodeGen/MachineFrameInfo.h"
     27 #include "llvm/CodeGen/MachineInstrBuilder.h"
     28 #include "llvm/CodeGen/MachineRegisterInfo.h"
     29 #include "llvm/IR/CallingConv.h"
     30 #include "llvm/IR/DataLayout.h"
     31 #include "llvm/IR/DerivedTypes.h"
     32 #include "llvm/IR/Function.h"
     33 #include "llvm/IR/GetElementPtrTypeIterator.h"
     34 #include "llvm/IR/GlobalAlias.h"
     35 #include "llvm/IR/GlobalVariable.h"
     36 #include "llvm/IR/Instructions.h"
     37 #include "llvm/IR/IntrinsicInst.h"
     38 #include "llvm/IR/Operator.h"
     39 #include "llvm/MC/MCSymbol.h"
     40 using namespace llvm;
     41 
     42 namespace {
     43 
     44 class AArch64FastISel final : public FastISel {
     45   class Address {
     46   public:
     47     typedef enum {
     48       RegBase,
     49       FrameIndexBase
     50     } BaseKind;
     51 
     52   private:
     53     BaseKind Kind;
     54     AArch64_AM::ShiftExtendType ExtType;
     55     union {
     56       unsigned Reg;
     57       int FI;
     58     } Base;
     59     unsigned OffsetReg;
     60     unsigned Shift;
     61     int64_t Offset;
     62     const GlobalValue *GV;
     63 
     64   public:
     65     Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
     66       OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
     67     void setKind(BaseKind K) { Kind = K; }
     68     BaseKind getKind() const { return Kind; }
     69     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
     70     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
     71     bool isRegBase() const { return Kind == RegBase; }
     72     bool isFIBase() const { return Kind == FrameIndexBase; }
     73     void setReg(unsigned Reg) {
     74       assert(isRegBase() && "Invalid base register access!");
     75       Base.Reg = Reg;
     76     }
     77     unsigned getReg() const {
     78       assert(isRegBase() && "Invalid base register access!");
     79       return Base.Reg;
     80     }
     81     void setOffsetReg(unsigned Reg) {
     82       OffsetReg = Reg;
     83     }
     84     unsigned getOffsetReg() const {
     85       return OffsetReg;
     86     }
     87     void setFI(unsigned FI) {
     88       assert(isFIBase() && "Invalid base frame index  access!");
     89       Base.FI = FI;
     90     }
     91     unsigned getFI() const {
     92       assert(isFIBase() && "Invalid base frame index access!");
     93       return Base.FI;
     94     }
     95     void setOffset(int64_t O) { Offset = O; }
     96     int64_t getOffset() { return Offset; }
     97     void setShift(unsigned S) { Shift = S; }
     98     unsigned getShift() { return Shift; }
     99 
    100     void setGlobalValue(const GlobalValue *G) { GV = G; }
    101     const GlobalValue *getGlobalValue() { return GV; }
    102   };
    103 
    104   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
    105   /// make the right decision when generating code for different targets.
    106   const AArch64Subtarget *Subtarget;
    107   LLVMContext *Context;
    108 
    109   bool fastLowerArguments() override;
    110   bool fastLowerCall(CallLoweringInfo &CLI) override;
    111   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
    112 
    113 private:
    114   // Selection routines.
    115   bool selectAddSub(const Instruction *I);
    116   bool selectLogicalOp(const Instruction *I);
    117   bool selectLoad(const Instruction *I);
    118   bool selectStore(const Instruction *I);
    119   bool selectBranch(const Instruction *I);
    120   bool selectIndirectBr(const Instruction *I);
    121   bool selectCmp(const Instruction *I);
    122   bool selectSelect(const Instruction *I);
    123   bool selectFPExt(const Instruction *I);
    124   bool selectFPTrunc(const Instruction *I);
    125   bool selectFPToInt(const Instruction *I, bool Signed);
    126   bool selectIntToFP(const Instruction *I, bool Signed);
    127   bool selectRem(const Instruction *I, unsigned ISDOpcode);
    128   bool selectRet(const Instruction *I);
    129   bool selectTrunc(const Instruction *I);
    130   bool selectIntExt(const Instruction *I);
    131   bool selectMul(const Instruction *I);
    132   bool selectShift(const Instruction *I);
    133   bool selectBitCast(const Instruction *I);
    134   bool selectFRem(const Instruction *I);
    135   bool selectSDiv(const Instruction *I);
    136   bool selectGetElementPtr(const Instruction *I);
    137 
    138   // Utility helper routines.
    139   bool isTypeLegal(Type *Ty, MVT &VT);
    140   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
    141   bool isValueAvailable(const Value *V) const;
    142   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
    143   bool computeCallAddress(const Value *V, Address &Addr);
    144   bool simplifyAddress(Address &Addr, MVT VT);
    145   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
    146                             unsigned Flags, unsigned ScaleFactor,
    147                             MachineMemOperand *MMO);
    148   bool isMemCpySmall(uint64_t Len, unsigned Alignment);
    149   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
    150                           unsigned Alignment);
    151   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
    152                          const Value *Cond);
    153   bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
    154   bool optimizeSelect(const SelectInst *SI);
    155   std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
    156 
    157   // Emit helper routines.
    158   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
    159                       const Value *RHS, bool SetFlags = false,
    160                       bool WantResult = true,  bool IsZExt = false);
    161   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
    162                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
    163                          bool SetFlags = false, bool WantResult = true);
    164   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
    165                          bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
    166                          bool WantResult = true);
    167   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
    168                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
    169                          AArch64_AM::ShiftExtendType ShiftType,
    170                          uint64_t ShiftImm, bool SetFlags = false,
    171                          bool WantResult = true);
    172   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
    173                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
    174                           AArch64_AM::ShiftExtendType ExtType,
    175                           uint64_t ShiftImm, bool SetFlags = false,
    176                          bool WantResult = true);
    177 
    178   // Emit functions.
    179   bool emitCompareAndBranch(const BranchInst *BI);
    180   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
    181   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
    182   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
    183   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
    184   unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
    185                     MachineMemOperand *MMO = nullptr);
    186   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
    187                  MachineMemOperand *MMO = nullptr);
    188   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
    189   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
    190   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
    191                    bool SetFlags = false, bool WantResult = true,
    192                    bool IsZExt = false);
    193   unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
    194   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
    195                    bool SetFlags = false, bool WantResult = true,
    196                    bool IsZExt = false);
    197   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
    198                        unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
    199   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
    200                        unsigned RHSReg, bool RHSIsKill,
    201                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
    202                        bool WantResult = true);
    203   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
    204                          const Value *RHS);
    205   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
    206                             bool LHSIsKill, uint64_t Imm);
    207   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
    208                             bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
    209                             uint64_t ShiftImm);
    210   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
    211   unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
    212                       unsigned Op1, bool Op1IsKill);
    213   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
    214                         unsigned Op1, bool Op1IsKill);
    215   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
    216                         unsigned Op1, bool Op1IsKill);
    217   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
    218                       unsigned Op1Reg, bool Op1IsKill);
    219   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
    220                       uint64_t Imm, bool IsZExt = true);
    221   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
    222                       unsigned Op1Reg, bool Op1IsKill);
    223   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
    224                       uint64_t Imm, bool IsZExt = true);
    225   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
    226                       unsigned Op1Reg, bool Op1IsKill);
    227   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
    228                       uint64_t Imm, bool IsZExt = false);
    229 
    230   unsigned materializeInt(const ConstantInt *CI, MVT VT);
    231   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
    232   unsigned materializeGV(const GlobalValue *GV);
    233 
    234   // Call handling routines.
    235 private:
    236   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
    237   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
    238                        unsigned &NumBytes);
    239   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
    240 
    241 public:
    242   // Backend specific FastISel code.
    243   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
    244   unsigned fastMaterializeConstant(const Constant *C) override;
    245   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
    246 
    247   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
    248                            const TargetLibraryInfo *LibInfo)
    249       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
    250     Subtarget =
    251         &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
    252     Context = &FuncInfo.Fn->getContext();
    253   }
    254 
    255   bool fastSelectInstruction(const Instruction *I) override;
    256 
    257 #include "AArch64GenFastISel.inc"
    258 };
    259 
    260 } // end anonymous namespace
    261 
    262 #include "AArch64GenCallingConv.inc"
    263 
    264 /// \brief Check if the sign-/zero-extend will be a noop.
    265 static bool isIntExtFree(const Instruction *I) {
    266   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
    267          "Unexpected integer extend instruction.");
    268   assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
    269          "Unexpected value type.");
    270   bool IsZExt = isa<ZExtInst>(I);
    271 
    272   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
    273     if (LI->hasOneUse())
    274       return true;
    275 
    276   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
    277     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
    278       return true;
    279 
    280   return false;
    281 }
    282 
    283 /// \brief Determine the implicit scale factor that is applied by a memory
    284 /// operation for a given value type.
    285 static unsigned getImplicitScaleFactor(MVT VT) {
    286   switch (VT.SimpleTy) {
    287   default:
    288     return 0;    // invalid
    289   case MVT::i1:  // fall-through
    290   case MVT::i8:
    291     return 1;
    292   case MVT::i16:
    293     return 2;
    294   case MVT::i32: // fall-through
    295   case MVT::f32:
    296     return 4;
    297   case MVT::i64: // fall-through
    298   case MVT::f64:
    299     return 8;
    300   }
    301 }
    302 
    303 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
    304   if (CC == CallingConv::WebKit_JS)
    305     return CC_AArch64_WebKit_JS;
    306   if (CC == CallingConv::GHC)
    307     return CC_AArch64_GHC;
    308   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
    309 }
    310 
    311 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
    312   assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
    313          "Alloca should always return a pointer.");
    314 
    315   // Don't handle dynamic allocas.
    316   if (!FuncInfo.StaticAllocaMap.count(AI))
    317     return 0;
    318 
    319   DenseMap<const AllocaInst *, int>::iterator SI =
    320       FuncInfo.StaticAllocaMap.find(AI);
    321 
    322   if (SI != FuncInfo.StaticAllocaMap.end()) {
    323     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
    324     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
    325             ResultReg)
    326         .addFrameIndex(SI->second)
    327         .addImm(0)
    328         .addImm(0);
    329     return ResultReg;
    330   }
    331 
    332   return 0;
    333 }
    334 
    335 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
    336   if (VT > MVT::i64)
    337     return 0;
    338 
    339   if (!CI->isZero())
    340     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
    341 
    342   // Create a copy from the zero register to materialize a "0" value.
    343   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
    344                                                    : &AArch64::GPR32RegClass;
    345   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
    346   unsigned ResultReg = createResultReg(RC);
    347   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
    348           ResultReg).addReg(ZeroReg, getKillRegState(true));
    349   return ResultReg;
    350 }
    351 
    352 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
    353   // Positive zero (+0.0) has to be materialized with a fmov from the zero
    354   // register, because the immediate version of fmov cannot encode zero.
    355   if (CFP->isNullValue())
    356     return fastMaterializeFloatZero(CFP);
    357 
    358   if (VT != MVT::f32 && VT != MVT::f64)
    359     return 0;
    360 
    361   const APFloat Val = CFP->getValueAPF();
    362   bool Is64Bit = (VT == MVT::f64);
    363   // This checks to see if we can use FMOV instructions to materialize
    364   // a constant, otherwise we have to materialize via the constant pool.
    365   if (TLI.isFPImmLegal(Val, VT)) {
    366     int Imm =
    367         Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
    368     assert((Imm != -1) && "Cannot encode floating-point constant.");
    369     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
    370     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
    371   }
    372 
    373   // For the MachO large code model materialize the FP constant in code.
    374   if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
    375     unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
    376     const TargetRegisterClass *RC = Is64Bit ?
    377         &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    378 
    379     unsigned TmpReg = createResultReg(RC);
    380     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
    381         .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
    382 
    383     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
    384     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    385             TII.get(TargetOpcode::COPY), ResultReg)
    386         .addReg(TmpReg, getKillRegState(true));
    387 
    388     return ResultReg;
    389   }
    390 
    391   // Materialize via constant pool.  MachineConstantPool wants an explicit
    392   // alignment.
    393   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
    394   if (Align == 0)
    395     Align = DL.getTypeAllocSize(CFP->getType());
    396 
    397   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
    398   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
    399   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
    400           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
    401 
    402   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
    403   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
    404   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
    405       .addReg(ADRPReg)
    406       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
    407   return ResultReg;
    408 }
    409 
    410 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
    411   // We can't handle thread-local variables quickly yet.
    412   if (GV->isThreadLocal())
    413     return 0;
    414 
    415   // MachO still uses GOT for large code-model accesses, but ELF requires
    416   // movz/movk sequences, which FastISel doesn't handle yet.
    417   if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
    418     return 0;
    419 
    420   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
    421 
    422   EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
    423   if (!DestEVT.isSimple())
    424     return 0;
    425 
    426   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
    427   unsigned ResultReg;
    428 
    429   if (OpFlags & AArch64II::MO_GOT) {
    430     // ADRP + LDRX
    431     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
    432             ADRPReg)
    433       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
    434 
    435     ResultReg = createResultReg(&AArch64::GPR64RegClass);
    436     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
    437             ResultReg)
    438       .addReg(ADRPReg)
    439       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
    440                         AArch64II::MO_NC);
    441   } else {
    442     // ADRP + ADDX
    443     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
    444             ADRPReg)
    445       .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
    446 
    447     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
    448     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
    449             ResultReg)
    450       .addReg(ADRPReg)
    451       .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
    452       .addImm(0);
    453   }
    454   return ResultReg;
    455 }
    456 
    457 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
    458   EVT CEVT = TLI.getValueType(DL, C->getType(), true);
    459 
    460   // Only handle simple types.
    461   if (!CEVT.isSimple())
    462     return 0;
    463   MVT VT = CEVT.getSimpleVT();
    464 
    465   if (const auto *CI = dyn_cast<ConstantInt>(C))
    466     return materializeInt(CI, VT);
    467   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
    468     return materializeFP(CFP, VT);
    469   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
    470     return materializeGV(GV);
    471 
    472   return 0;
    473 }
    474 
    475 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
    476   assert(CFP->isNullValue() &&
    477          "Floating-point constant is not a positive zero.");
    478   MVT VT;
    479   if (!isTypeLegal(CFP->getType(), VT))
    480     return 0;
    481 
    482   if (VT != MVT::f32 && VT != MVT::f64)
    483     return 0;
    484 
    485   bool Is64Bit = (VT == MVT::f64);
    486   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
    487   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
    488   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
    489 }
    490 
    491 /// \brief Check if the multiply is by a power-of-2 constant.
    492 static bool isMulPowOf2(const Value *I) {
    493   if (const auto *MI = dyn_cast<MulOperator>(I)) {
    494     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
    495       if (C->getValue().isPowerOf2())
    496         return true;
    497     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
    498       if (C->getValue().isPowerOf2())
    499         return true;
    500   }
    501   return false;
    502 }
    503 
    504 // Computes the address to get to an object.
    505 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
    506 {
    507   const User *U = nullptr;
    508   unsigned Opcode = Instruction::UserOp1;
    509   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
    510     // Don't walk into other basic blocks unless the object is an alloca from
    511     // another block, otherwise it may not have a virtual register assigned.
    512     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
    513         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
    514       Opcode = I->getOpcode();
    515       U = I;
    516     }
    517   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
    518     Opcode = C->getOpcode();
    519     U = C;
    520   }
    521 
    522   if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
    523     if (Ty->getAddressSpace() > 255)
    524       // Fast instruction selection doesn't support the special
    525       // address spaces.
    526       return false;
    527 
    528   switch (Opcode) {
    529   default:
    530     break;
    531   case Instruction::BitCast: {
    532     // Look through bitcasts.
    533     return computeAddress(U->getOperand(0), Addr, Ty);
    534   }
    535   case Instruction::IntToPtr: {
    536     // Look past no-op inttoptrs.
    537     if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
    538         TLI.getPointerTy(DL))
    539       return computeAddress(U->getOperand(0), Addr, Ty);
    540     break;
    541   }
    542   case Instruction::PtrToInt: {
    543     // Look past no-op ptrtoints.
    544     if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
    545       return computeAddress(U->getOperand(0), Addr, Ty);
    546     break;
    547   }
    548   case Instruction::GetElementPtr: {
    549     Address SavedAddr = Addr;
    550     uint64_t TmpOffset = Addr.getOffset();
    551 
    552     // Iterate through the GEP folding the constants into offsets where
    553     // we can.
    554     for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
    555          GTI != E; ++GTI) {
    556       const Value *Op = GTI.getOperand();
    557       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
    558         const StructLayout *SL = DL.getStructLayout(STy);
    559         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
    560         TmpOffset += SL->getElementOffset(Idx);
    561       } else {
    562         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
    563         for (;;) {
    564           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
    565             // Constant-offset addressing.
    566             TmpOffset += CI->getSExtValue() * S;
    567             break;
    568           }
    569           if (canFoldAddIntoGEP(U, Op)) {
    570             // A compatible add with a constant operand. Fold the constant.
    571             ConstantInt *CI =
    572                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
    573             TmpOffset += CI->getSExtValue() * S;
    574             // Iterate on the other operand.
    575             Op = cast<AddOperator>(Op)->getOperand(0);
    576             continue;
    577           }
    578           // Unsupported
    579           goto unsupported_gep;
    580         }
    581       }
    582     }
    583 
    584     // Try to grab the base operand now.
    585     Addr.setOffset(TmpOffset);
    586     if (computeAddress(U->getOperand(0), Addr, Ty))
    587       return true;
    588 
    589     // We failed, restore everything and try the other options.
    590     Addr = SavedAddr;
    591 
    592   unsupported_gep:
    593     break;
    594   }
    595   case Instruction::Alloca: {
    596     const AllocaInst *AI = cast<AllocaInst>(Obj);
    597     DenseMap<const AllocaInst *, int>::iterator SI =
    598         FuncInfo.StaticAllocaMap.find(AI);
    599     if (SI != FuncInfo.StaticAllocaMap.end()) {
    600       Addr.setKind(Address::FrameIndexBase);
    601       Addr.setFI(SI->second);
    602       return true;
    603     }
    604     break;
    605   }
    606   case Instruction::Add: {
    607     // Adds of constants are common and easy enough.
    608     const Value *LHS = U->getOperand(0);
    609     const Value *RHS = U->getOperand(1);
    610 
    611     if (isa<ConstantInt>(LHS))
    612       std::swap(LHS, RHS);
    613 
    614     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
    615       Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
    616       return computeAddress(LHS, Addr, Ty);
    617     }
    618 
    619     Address Backup = Addr;
    620     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
    621       return true;
    622     Addr = Backup;
    623 
    624     break;
    625   }
    626   case Instruction::Sub: {
    627     // Subs of constants are common and easy enough.
    628     const Value *LHS = U->getOperand(0);
    629     const Value *RHS = U->getOperand(1);
    630 
    631     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
    632       Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
    633       return computeAddress(LHS, Addr, Ty);
    634     }
    635     break;
    636   }
    637   case Instruction::Shl: {
    638     if (Addr.getOffsetReg())
    639       break;
    640 
    641     const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
    642     if (!CI)
    643       break;
    644 
    645     unsigned Val = CI->getZExtValue();
    646     if (Val < 1 || Val > 3)
    647       break;
    648 
    649     uint64_t NumBytes = 0;
    650     if (Ty && Ty->isSized()) {
    651       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
    652       NumBytes = NumBits / 8;
    653       if (!isPowerOf2_64(NumBits))
    654         NumBytes = 0;
    655     }
    656 
    657     if (NumBytes != (1ULL << Val))
    658       break;
    659 
    660     Addr.setShift(Val);
    661     Addr.setExtendType(AArch64_AM::LSL);
    662 
    663     const Value *Src = U->getOperand(0);
    664     if (const auto *I = dyn_cast<Instruction>(Src)) {
    665       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
    666         // Fold the zext or sext when it won't become a noop.
    667         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
    668           if (!isIntExtFree(ZE) &&
    669               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
    670             Addr.setExtendType(AArch64_AM::UXTW);
    671             Src = ZE->getOperand(0);
    672           }
    673         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
    674           if (!isIntExtFree(SE) &&
    675               SE->getOperand(0)->getType()->isIntegerTy(32)) {
    676             Addr.setExtendType(AArch64_AM::SXTW);
    677             Src = SE->getOperand(0);
    678           }
    679         }
    680       }
    681     }
    682 
    683     if (const auto *AI = dyn_cast<BinaryOperator>(Src))
    684       if (AI->getOpcode() == Instruction::And) {
    685         const Value *LHS = AI->getOperand(0);
    686         const Value *RHS = AI->getOperand(1);
    687 
    688         if (const auto *C = dyn_cast<ConstantInt>(LHS))
    689           if (C->getValue() == 0xffffffff)
    690             std::swap(LHS, RHS);
    691 
    692         if (const auto *C = dyn_cast<ConstantInt>(RHS))
    693           if (C->getValue() == 0xffffffff) {
    694             Addr.setExtendType(AArch64_AM::UXTW);
    695             unsigned Reg = getRegForValue(LHS);
    696             if (!Reg)
    697               return false;
    698             bool RegIsKill = hasTrivialKill(LHS);
    699             Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
    700                                              AArch64::sub_32);
    701             Addr.setOffsetReg(Reg);
    702             return true;
    703           }
    704       }
    705 
    706     unsigned Reg = getRegForValue(Src);
    707     if (!Reg)
    708       return false;
    709     Addr.setOffsetReg(Reg);
    710     return true;
    711   }
    712   case Instruction::Mul: {
    713     if (Addr.getOffsetReg())
    714       break;
    715 
    716     if (!isMulPowOf2(U))
    717       break;
    718 
    719     const Value *LHS = U->getOperand(0);
    720     const Value *RHS = U->getOperand(1);
    721 
    722     // Canonicalize power-of-2 value to the RHS.
    723     if (const auto *C = dyn_cast<ConstantInt>(LHS))
    724       if (C->getValue().isPowerOf2())
    725         std::swap(LHS, RHS);
    726 
    727     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
    728     const auto *C = cast<ConstantInt>(RHS);
    729     unsigned Val = C->getValue().logBase2();
    730     if (Val < 1 || Val > 3)
    731       break;
    732 
    733     uint64_t NumBytes = 0;
    734     if (Ty && Ty->isSized()) {
    735       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
    736       NumBytes = NumBits / 8;
    737       if (!isPowerOf2_64(NumBits))
    738         NumBytes = 0;
    739     }
    740 
    741     if (NumBytes != (1ULL << Val))
    742       break;
    743 
    744     Addr.setShift(Val);
    745     Addr.setExtendType(AArch64_AM::LSL);
    746 
    747     const Value *Src = LHS;
    748     if (const auto *I = dyn_cast<Instruction>(Src)) {
    749       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
    750         // Fold the zext or sext when it won't become a noop.
    751         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
    752           if (!isIntExtFree(ZE) &&
    753               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
    754             Addr.setExtendType(AArch64_AM::UXTW);
    755             Src = ZE->getOperand(0);
    756           }
    757         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
    758           if (!isIntExtFree(SE) &&
    759               SE->getOperand(0)->getType()->isIntegerTy(32)) {
    760             Addr.setExtendType(AArch64_AM::SXTW);
    761             Src = SE->getOperand(0);
    762           }
    763         }
    764       }
    765     }
    766 
    767     unsigned Reg = getRegForValue(Src);
    768     if (!Reg)
    769       return false;
    770     Addr.setOffsetReg(Reg);
    771     return true;
    772   }
    773   case Instruction::And: {
    774     if (Addr.getOffsetReg())
    775       break;
    776 
    777     if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
    778       break;
    779 
    780     const Value *LHS = U->getOperand(0);
    781     const Value *RHS = U->getOperand(1);
    782 
    783     if (const auto *C = dyn_cast<ConstantInt>(LHS))
    784       if (C->getValue() == 0xffffffff)
    785         std::swap(LHS, RHS);
    786 
    787     if (const auto *C = dyn_cast<ConstantInt>(RHS))
    788       if (C->getValue() == 0xffffffff) {
    789         Addr.setShift(0);
    790         Addr.setExtendType(AArch64_AM::LSL);
    791         Addr.setExtendType(AArch64_AM::UXTW);
    792 
    793         unsigned Reg = getRegForValue(LHS);
    794         if (!Reg)
    795           return false;
    796         bool RegIsKill = hasTrivialKill(LHS);
    797         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
    798                                          AArch64::sub_32);
    799         Addr.setOffsetReg(Reg);
    800         return true;
    801       }
    802     break;
    803   }
    804   case Instruction::SExt:
    805   case Instruction::ZExt: {
    806     if (!Addr.getReg() || Addr.getOffsetReg())
    807       break;
    808 
    809     const Value *Src = nullptr;
    810     // Fold the zext or sext when it won't become a noop.
    811     if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
    812       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
    813         Addr.setExtendType(AArch64_AM::UXTW);
    814         Src = ZE->getOperand(0);
    815       }
    816     } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
    817       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
    818         Addr.setExtendType(AArch64_AM::SXTW);
    819         Src = SE->getOperand(0);
    820       }
    821     }
    822 
    823     if (!Src)
    824       break;
    825 
    826     Addr.setShift(0);
    827     unsigned Reg = getRegForValue(Src);
    828     if (!Reg)
    829       return false;
    830     Addr.setOffsetReg(Reg);
    831     return true;
    832   }
    833   } // end switch
    834 
    835   if (Addr.isRegBase() && !Addr.getReg()) {
    836     unsigned Reg = getRegForValue(Obj);
    837     if (!Reg)
    838       return false;
    839     Addr.setReg(Reg);
    840     return true;
    841   }
    842 
    843   if (!Addr.getOffsetReg()) {
    844     unsigned Reg = getRegForValue(Obj);
    845     if (!Reg)
    846       return false;
    847     Addr.setOffsetReg(Reg);
    848     return true;
    849   }
    850 
    851   return false;
    852 }
    853 
    854 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
    855   const User *U = nullptr;
    856   unsigned Opcode = Instruction::UserOp1;
    857   bool InMBB = true;
    858 
    859   if (const auto *I = dyn_cast<Instruction>(V)) {
    860     Opcode = I->getOpcode();
    861     U = I;
    862     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
    863   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
    864     Opcode = C->getOpcode();
    865     U = C;
    866   }
    867 
    868   switch (Opcode) {
    869   default: break;
    870   case Instruction::BitCast:
    871     // Look past bitcasts if its operand is in the same BB.
    872     if (InMBB)
    873       return computeCallAddress(U->getOperand(0), Addr);
    874     break;
    875   case Instruction::IntToPtr:
    876     // Look past no-op inttoptrs if its operand is in the same BB.
    877     if (InMBB &&
    878         TLI.getValueType(DL, U->getOperand(0)->getType()) ==
    879             TLI.getPointerTy(DL))
    880       return computeCallAddress(U->getOperand(0), Addr);
    881     break;
    882   case Instruction::PtrToInt:
    883     // Look past no-op ptrtoints if its operand is in the same BB.
    884     if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
    885       return computeCallAddress(U->getOperand(0), Addr);
    886     break;
    887   }
    888 
    889   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
    890     Addr.setGlobalValue(GV);
    891     return true;
    892   }
    893 
    894   // If all else fails, try to materialize the value in a register.
    895   if (!Addr.getGlobalValue()) {
    896     Addr.setReg(getRegForValue(V));
    897     return Addr.getReg() != 0;
    898   }
    899 
    900   return false;
    901 }
    902 
    903 
    904 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
    905   EVT evt = TLI.getValueType(DL, Ty, true);
    906 
    907   // Only handle simple types.
    908   if (evt == MVT::Other || !evt.isSimple())
    909     return false;
    910   VT = evt.getSimpleVT();
    911 
    912   // This is a legal type, but it's not something we handle in fast-isel.
    913   if (VT == MVT::f128)
    914     return false;
    915 
    916   // Handle all other legal types, i.e. a register that will directly hold this
    917   // value.
    918   return TLI.isTypeLegal(VT);
    919 }
    920 
    921 /// \brief Determine if the value type is supported by FastISel.
    922 ///
    923 /// FastISel for AArch64 can handle more value types than are legal. This adds
    924 /// simple value type such as i1, i8, and i16.
    925 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
    926   if (Ty->isVectorTy() && !IsVectorAllowed)
    927     return false;
    928 
    929   if (isTypeLegal(Ty, VT))
    930     return true;
    931 
    932   // If this is a type than can be sign or zero-extended to a basic operation
    933   // go ahead and accept it now.
    934   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
    935     return true;
    936 
    937   return false;
    938 }
    939 
    940 bool AArch64FastISel::isValueAvailable(const Value *V) const {
    941   if (!isa<Instruction>(V))
    942     return true;
    943 
    944   const auto *I = cast<Instruction>(V);
    945   return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
    946 }
    947 
    948 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
    949   unsigned ScaleFactor = getImplicitScaleFactor(VT);
    950   if (!ScaleFactor)
    951     return false;
    952 
    953   bool ImmediateOffsetNeedsLowering = false;
    954   bool RegisterOffsetNeedsLowering = false;
    955   int64_t Offset = Addr.getOffset();
    956   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
    957     ImmediateOffsetNeedsLowering = true;
    958   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
    959            !isUInt<12>(Offset / ScaleFactor))
    960     ImmediateOffsetNeedsLowering = true;
    961 
    962   // Cannot encode an offset register and an immediate offset in the same
    963   // instruction. Fold the immediate offset into the load/store instruction and
    964   // emit an additional add to take care of the offset register.
    965   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
    966     RegisterOffsetNeedsLowering = true;
    967 
    968   // Cannot encode zero register as base.
    969   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
    970     RegisterOffsetNeedsLowering = true;
    971 
    972   // If this is a stack pointer and the offset needs to be simplified then put
    973   // the alloca address into a register, set the base type back to register and
    974   // continue. This should almost never happen.
    975   if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
    976   {
    977     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
    978     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
    979             ResultReg)
    980       .addFrameIndex(Addr.getFI())
    981       .addImm(0)
    982       .addImm(0);
    983     Addr.setKind(Address::RegBase);
    984     Addr.setReg(ResultReg);
    985   }
    986 
    987   if (RegisterOffsetNeedsLowering) {
    988     unsigned ResultReg = 0;
    989     if (Addr.getReg()) {
    990       if (Addr.getExtendType() == AArch64_AM::SXTW ||
    991           Addr.getExtendType() == AArch64_AM::UXTW   )
    992         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
    993                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
    994                                   /*TODO:IsKill=*/false, Addr.getExtendType(),
    995                                   Addr.getShift());
    996       else
    997         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
    998                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
    999                                   /*TODO:IsKill=*/false, AArch64_AM::LSL,
   1000                                   Addr.getShift());
   1001     } else {
   1002       if (Addr.getExtendType() == AArch64_AM::UXTW)
   1003         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
   1004                                /*Op0IsKill=*/false, Addr.getShift(),
   1005                                /*IsZExt=*/true);
   1006       else if (Addr.getExtendType() == AArch64_AM::SXTW)
   1007         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
   1008                                /*Op0IsKill=*/false, Addr.getShift(),
   1009                                /*IsZExt=*/false);
   1010       else
   1011         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
   1012                                /*Op0IsKill=*/false, Addr.getShift());
   1013     }
   1014     if (!ResultReg)
   1015       return false;
   1016 
   1017     Addr.setReg(ResultReg);
   1018     Addr.setOffsetReg(0);
   1019     Addr.setShift(0);
   1020     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
   1021   }
   1022 
   1023   // Since the offset is too large for the load/store instruction get the
   1024   // reg+offset into a register.
   1025   if (ImmediateOffsetNeedsLowering) {
   1026     unsigned ResultReg;
   1027     if (Addr.getReg())
   1028       // Try to fold the immediate into the add instruction.
   1029       ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
   1030     else
   1031       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
   1032 
   1033     if (!ResultReg)
   1034       return false;
   1035     Addr.setReg(ResultReg);
   1036     Addr.setOffset(0);
   1037   }
   1038   return true;
   1039 }
   1040 
   1041 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
   1042                                            const MachineInstrBuilder &MIB,
   1043                                            unsigned Flags,
   1044                                            unsigned ScaleFactor,
   1045                                            MachineMemOperand *MMO) {
   1046   int64_t Offset = Addr.getOffset() / ScaleFactor;
   1047   // Frame base works a bit differently. Handle it separately.
   1048   if (Addr.isFIBase()) {
   1049     int FI = Addr.getFI();
   1050     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
   1051     // and alignment should be based on the VT.
   1052     MMO = FuncInfo.MF->getMachineMemOperand(
   1053         MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
   1054         MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
   1055     // Now add the rest of the operands.
   1056     MIB.addFrameIndex(FI).addImm(Offset);
   1057   } else {
   1058     assert(Addr.isRegBase() && "Unexpected address kind.");
   1059     const MCInstrDesc &II = MIB->getDesc();
   1060     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
   1061     Addr.setReg(
   1062       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
   1063     Addr.setOffsetReg(
   1064       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
   1065     if (Addr.getOffsetReg()) {
   1066       assert(Addr.getOffset() == 0 && "Unexpected offset");
   1067       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
   1068                       Addr.getExtendType() == AArch64_AM::SXTX;
   1069       MIB.addReg(Addr.getReg());
   1070       MIB.addReg(Addr.getOffsetReg());
   1071       MIB.addImm(IsSigned);
   1072       MIB.addImm(Addr.getShift() != 0);
   1073     } else
   1074       MIB.addReg(Addr.getReg()).addImm(Offset);
   1075   }
   1076 
   1077   if (MMO)
   1078     MIB.addMemOperand(MMO);
   1079 }
   1080 
   1081 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
   1082                                      const Value *RHS, bool SetFlags,
   1083                                      bool WantResult,  bool IsZExt) {
   1084   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
   1085   bool NeedExtend = false;
   1086   switch (RetVT.SimpleTy) {
   1087   default:
   1088     return 0;
   1089   case MVT::i1:
   1090     NeedExtend = true;
   1091     break;
   1092   case MVT::i8:
   1093     NeedExtend = true;
   1094     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
   1095     break;
   1096   case MVT::i16:
   1097     NeedExtend = true;
   1098     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
   1099     break;
   1100   case MVT::i32:  // fall-through
   1101   case MVT::i64:
   1102     break;
   1103   }
   1104   MVT SrcVT = RetVT;
   1105   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
   1106 
   1107   // Canonicalize immediates to the RHS first.
   1108   if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
   1109     std::swap(LHS, RHS);
   1110 
   1111   // Canonicalize mul by power of 2 to the RHS.
   1112   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
   1113     if (isMulPowOf2(LHS))
   1114       std::swap(LHS, RHS);
   1115 
   1116   // Canonicalize shift immediate to the RHS.
   1117   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
   1118     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
   1119       if (isa<ConstantInt>(SI->getOperand(1)))
   1120         if (SI->getOpcode() == Instruction::Shl  ||
   1121             SI->getOpcode() == Instruction::LShr ||
   1122             SI->getOpcode() == Instruction::AShr   )
   1123           std::swap(LHS, RHS);
   1124 
   1125   unsigned LHSReg = getRegForValue(LHS);
   1126   if (!LHSReg)
   1127     return 0;
   1128   bool LHSIsKill = hasTrivialKill(LHS);
   1129 
   1130   if (NeedExtend)
   1131     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
   1132 
   1133   unsigned ResultReg = 0;
   1134   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
   1135     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
   1136     if (C->isNegative())
   1137       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
   1138                                 SetFlags, WantResult);
   1139     else
   1140       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
   1141                                 WantResult);
   1142   } else if (const auto *C = dyn_cast<Constant>(RHS))
   1143     if (C->isNullValue())
   1144       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
   1145                                 WantResult);
   1146 
   1147   if (ResultReg)
   1148     return ResultReg;
   1149 
   1150   // Only extend the RHS within the instruction if there is a valid extend type.
   1151   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
   1152       isValueAvailable(RHS)) {
   1153     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
   1154       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
   1155         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
   1156           unsigned RHSReg = getRegForValue(SI->getOperand(0));
   1157           if (!RHSReg)
   1158             return 0;
   1159           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
   1160           return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
   1161                                RHSIsKill, ExtendType, C->getZExtValue(),
   1162                                SetFlags, WantResult);
   1163         }
   1164     unsigned RHSReg = getRegForValue(RHS);
   1165     if (!RHSReg)
   1166       return 0;
   1167     bool RHSIsKill = hasTrivialKill(RHS);
   1168     return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
   1169                          ExtendType, 0, SetFlags, WantResult);
   1170   }
   1171 
   1172   // Check if the mul can be folded into the instruction.
   1173   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
   1174     if (isMulPowOf2(RHS)) {
   1175       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
   1176       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
   1177 
   1178       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
   1179         if (C->getValue().isPowerOf2())
   1180           std::swap(MulLHS, MulRHS);
   1181 
   1182       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
   1183       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
   1184       unsigned RHSReg = getRegForValue(MulLHS);
   1185       if (!RHSReg)
   1186         return 0;
   1187       bool RHSIsKill = hasTrivialKill(MulLHS);
   1188       ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
   1189                                 RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
   1190                                 WantResult);
   1191       if (ResultReg)
   1192         return ResultReg;
   1193     }
   1194   }
   1195 
   1196   // Check if the shift can be folded into the instruction.
   1197   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
   1198     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
   1199       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
   1200         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
   1201         switch (SI->getOpcode()) {
   1202         default: break;
   1203         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
   1204         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
   1205         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
   1206         }
   1207         uint64_t ShiftVal = C->getZExtValue();
   1208         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
   1209           unsigned RHSReg = getRegForValue(SI->getOperand(0));
   1210           if (!RHSReg)
   1211             return 0;
   1212           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
   1213           ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
   1214                                     RHSIsKill, ShiftType, ShiftVal, SetFlags,
   1215                                     WantResult);
   1216           if (ResultReg)
   1217             return ResultReg;
   1218         }
   1219       }
   1220     }
   1221   }
   1222 
   1223   unsigned RHSReg = getRegForValue(RHS);
   1224   if (!RHSReg)
   1225     return 0;
   1226   bool RHSIsKill = hasTrivialKill(RHS);
   1227 
   1228   if (NeedExtend)
   1229     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
   1230 
   1231   return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
   1232                        SetFlags, WantResult);
   1233 }
   1234 
   1235 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
   1236                                         bool LHSIsKill, unsigned RHSReg,
   1237                                         bool RHSIsKill, bool SetFlags,
   1238                                         bool WantResult) {
   1239   assert(LHSReg && RHSReg && "Invalid register number.");
   1240 
   1241   if (RetVT != MVT::i32 && RetVT != MVT::i64)
   1242     return 0;
   1243 
   1244   static const unsigned OpcTable[2][2][2] = {
   1245     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
   1246       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
   1247     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
   1248       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
   1249   };
   1250   bool Is64Bit = RetVT == MVT::i64;
   1251   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
   1252   const TargetRegisterClass *RC =
   1253       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
   1254   unsigned ResultReg;
   1255   if (WantResult)
   1256     ResultReg = createResultReg(RC);
   1257   else
   1258     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
   1259 
   1260   const MCInstrDesc &II = TII.get(Opc);
   1261   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
   1262   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
   1263   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
   1264       .addReg(LHSReg, getKillRegState(LHSIsKill))
   1265       .addReg(RHSReg, getKillRegState(RHSIsKill));
   1266   return ResultReg;
   1267 }
   1268 
   1269 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
   1270                                         bool LHSIsKill, uint64_t Imm,
   1271                                         bool SetFlags, bool WantResult) {
   1272   assert(LHSReg && "Invalid register number.");
   1273 
   1274   if (RetVT != MVT::i32 && RetVT != MVT::i64)
   1275     return 0;
   1276 
   1277   unsigned ShiftImm;
   1278   if (isUInt<12>(Imm))
   1279     ShiftImm = 0;
   1280   else if ((Imm & 0xfff000) == Imm) {
   1281     ShiftImm = 12;
   1282     Imm >>= 12;
   1283   } else
   1284     return 0;
   1285 
   1286   static const unsigned OpcTable[2][2][2] = {
   1287     { { AArch64::SUBWri,  AArch64::SUBXri  },
   1288       { AArch64::ADDWri,  AArch64::ADDXri  }  },
   1289     { { AArch64::SUBSWri, AArch64::SUBSXri },
   1290       { AArch64::ADDSWri, AArch64::ADDSXri }  }
   1291   };
   1292   bool Is64Bit = RetVT == MVT::i64;
   1293   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
   1294   const TargetRegisterClass *RC;
   1295   if (SetFlags)
   1296     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
   1297   else
   1298     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
   1299   unsigned ResultReg;
   1300   if (WantResult)
   1301     ResultReg = createResultReg(RC);
   1302   else
   1303     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
   1304 
   1305   const MCInstrDesc &II = TII.get(Opc);
   1306   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
   1307   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
   1308       .addReg(LHSReg, getKillRegState(LHSIsKill))
   1309       .addImm(Imm)
   1310       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
   1311   return ResultReg;
   1312 }
   1313 
   1314 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
   1315                                         bool LHSIsKill, unsigned RHSReg,
   1316                                         bool RHSIsKill,
   1317                                         AArch64_AM::ShiftExtendType ShiftType,
   1318                                         uint64_t ShiftImm, bool SetFlags,
   1319                                         bool WantResult) {
   1320   assert(LHSReg && RHSReg && "Invalid register number.");
   1321 
   1322   if (RetVT != MVT::i32 && RetVT != MVT::i64)
   1323     return 0;
   1324 
   1325   // Don't deal with undefined shifts.
   1326   if (ShiftImm >= RetVT.getSizeInBits())
   1327     return 0;
   1328 
   1329   static const unsigned OpcTable[2][2][2] = {
   1330     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
   1331       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
   1332     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
   1333       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
   1334   };
   1335   bool Is64Bit = RetVT == MVT::i64;
   1336   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
   1337   const TargetRegisterClass *RC =
   1338       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
   1339   unsigned ResultReg;
   1340   if (WantResult)
   1341     ResultReg = createResultReg(RC);
   1342   else
   1343     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
   1344 
   1345   const MCInstrDesc &II = TII.get(Opc);
   1346   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
   1347   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
   1348   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
   1349       .addReg(LHSReg, getKillRegState(LHSIsKill))
   1350       .addReg(RHSReg, getKillRegState(RHSIsKill))
   1351       .addImm(getShifterImm(ShiftType, ShiftImm));
   1352   return ResultReg;
   1353 }
   1354 
   1355 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
   1356                                         bool LHSIsKill, unsigned RHSReg,
   1357                                         bool RHSIsKill,
   1358                                         AArch64_AM::ShiftExtendType ExtType,
   1359                                         uint64_t ShiftImm, bool SetFlags,
   1360                                         bool WantResult) {
   1361   assert(LHSReg && RHSReg && "Invalid register number.");
   1362 
   1363   if (RetVT != MVT::i32 && RetVT != MVT::i64)
   1364     return 0;
   1365 
   1366   if (ShiftImm >= 4)
   1367     return 0;
   1368 
   1369   static const unsigned OpcTable[2][2][2] = {
   1370     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
   1371       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
   1372     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
   1373       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
   1374   };
   1375   bool Is64Bit = RetVT == MVT::i64;
   1376   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
   1377   const TargetRegisterClass *RC = nullptr;
   1378   if (SetFlags)
   1379     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
   1380   else
   1381     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
   1382   unsigned ResultReg;
   1383   if (WantResult)
   1384     ResultReg = createResultReg(RC);
   1385   else
   1386     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
   1387 
   1388   const MCInstrDesc &II = TII.get(Opc);
   1389   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
   1390   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
   1391   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
   1392       .addReg(LHSReg, getKillRegState(LHSIsKill))
   1393       .addReg(RHSReg, getKillRegState(RHSIsKill))
   1394       .addImm(getArithExtendImm(ExtType, ShiftImm));
   1395   return ResultReg;
   1396 }
   1397 
   1398 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
   1399   Type *Ty = LHS->getType();
   1400   EVT EVT = TLI.getValueType(DL, Ty, true);
   1401   if (!EVT.isSimple())
   1402     return false;
   1403   MVT VT = EVT.getSimpleVT();
   1404 
   1405   switch (VT.SimpleTy) {
   1406   default:
   1407     return false;
   1408   case MVT::i1:
   1409   case MVT::i8:
   1410   case MVT::i16:
   1411   case MVT::i32:
   1412   case MVT::i64:
   1413     return emitICmp(VT, LHS, RHS, IsZExt);
   1414   case MVT::f32:
   1415   case MVT::f64:
   1416     return emitFCmp(VT, LHS, RHS);
   1417   }
   1418 }
   1419 
   1420 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
   1421                                bool IsZExt) {
   1422   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
   1423                  IsZExt) != 0;
   1424 }
   1425 
   1426 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
   1427                                   uint64_t Imm) {
   1428   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
   1429                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
   1430 }
   1431 
   1432 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
   1433   if (RetVT != MVT::f32 && RetVT != MVT::f64)
   1434     return false;
   1435 
   1436   // Check to see if the 2nd operand is a constant that we can encode directly
   1437   // in the compare.
   1438   bool UseImm = false;
   1439   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
   1440     if (CFP->isZero() && !CFP->isNegative())
   1441       UseImm = true;
   1442 
   1443   unsigned LHSReg = getRegForValue(LHS);
   1444   if (!LHSReg)
   1445     return false;
   1446   bool LHSIsKill = hasTrivialKill(LHS);
   1447 
   1448   if (UseImm) {
   1449     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
   1450     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
   1451         .addReg(LHSReg, getKillRegState(LHSIsKill));
   1452     return true;
   1453   }
   1454 
   1455   unsigned RHSReg = getRegForValue(RHS);
   1456   if (!RHSReg)
   1457     return false;
   1458   bool RHSIsKill = hasTrivialKill(RHS);
   1459 
   1460   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
   1461   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
   1462       .addReg(LHSReg, getKillRegState(LHSIsKill))
   1463       .addReg(RHSReg, getKillRegState(RHSIsKill));
   1464   return true;
   1465 }
   1466 
   1467 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
   1468                                   bool SetFlags, bool WantResult, bool IsZExt) {
   1469   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
   1470                     IsZExt);
   1471 }
   1472 
   1473 /// \brief This method is a wrapper to simplify add emission.
   1474 ///
   1475 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
   1476 /// that fails, then try to materialize the immediate into a register and use
   1477 /// emitAddSub_rr instead.
   1478 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
   1479                                       int64_t Imm) {
   1480   unsigned ResultReg;
   1481   if (Imm < 0)
   1482     ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
   1483   else
   1484     ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
   1485 
   1486   if (ResultReg)
   1487     return ResultReg;
   1488 
   1489   unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
   1490   if (!CReg)
   1491     return 0;
   1492 
   1493   ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
   1494   return ResultReg;
   1495 }
   1496 
   1497 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
   1498                                   bool SetFlags, bool WantResult, bool IsZExt) {
   1499   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
   1500                     IsZExt);
   1501 }
   1502 
   1503 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
   1504                                       bool LHSIsKill, unsigned RHSReg,
   1505                                       bool RHSIsKill, bool WantResult) {
   1506   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
   1507                        RHSIsKill, /*SetFlags=*/true, WantResult);
   1508 }
   1509 
   1510 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
   1511                                       bool LHSIsKill, unsigned RHSReg,
   1512                                       bool RHSIsKill,
   1513                                       AArch64_AM::ShiftExtendType ShiftType,
   1514                                       uint64_t ShiftImm, bool WantResult) {
   1515   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
   1516                        RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
   1517                        WantResult);
   1518 }
   1519 
   1520 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
   1521                                         const Value *LHS, const Value *RHS) {
   1522   // Canonicalize immediates to the RHS first.
   1523   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
   1524     std::swap(LHS, RHS);
   1525 
   1526   // Canonicalize mul by power-of-2 to the RHS.
   1527   if (LHS->hasOneUse() && isValueAvailable(LHS))
   1528     if (isMulPowOf2(LHS))
   1529       std::swap(LHS, RHS);
   1530 
   1531   // Canonicalize shift immediate to the RHS.
   1532   if (LHS->hasOneUse() && isValueAvailable(LHS))
   1533     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
   1534       if (isa<ConstantInt>(SI->getOperand(1)))
   1535         std::swap(LHS, RHS);
   1536 
   1537   unsigned LHSReg = getRegForValue(LHS);
   1538   if (!LHSReg)
   1539     return 0;
   1540   bool LHSIsKill = hasTrivialKill(LHS);
   1541 
   1542   unsigned ResultReg = 0;
   1543   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
   1544     uint64_t Imm = C->getZExtValue();
   1545     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
   1546   }
   1547   if (ResultReg)
   1548     return ResultReg;
   1549 
   1550   // Check if the mul can be folded into the instruction.
   1551   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
   1552     if (isMulPowOf2(RHS)) {
   1553       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
   1554       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
   1555 
   1556       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
   1557         if (C->getValue().isPowerOf2())
   1558           std::swap(MulLHS, MulRHS);
   1559 
   1560       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
   1561       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
   1562 
   1563       unsigned RHSReg = getRegForValue(MulLHS);
   1564       if (!RHSReg)
   1565         return 0;
   1566       bool RHSIsKill = hasTrivialKill(MulLHS);
   1567       ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
   1568                                    RHSIsKill, ShiftVal);
   1569       if (ResultReg)
   1570         return ResultReg;
   1571     }
   1572   }
   1573 
   1574   // Check if the shift can be folded into the instruction.
   1575   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
   1576     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
   1577       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
   1578         uint64_t ShiftVal = C->getZExtValue();
   1579         unsigned RHSReg = getRegForValue(SI->getOperand(0));
   1580         if (!RHSReg)
   1581           return 0;
   1582         bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
   1583         ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
   1584                                      RHSIsKill, ShiftVal);
   1585         if (ResultReg)
   1586           return ResultReg;
   1587       }
   1588   }
   1589 
   1590   unsigned RHSReg = getRegForValue(RHS);
   1591   if (!RHSReg)
   1592     return 0;
   1593   bool RHSIsKill = hasTrivialKill(RHS);
   1594 
   1595   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
   1596   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
   1597   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
   1598     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
   1599     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
   1600   }
   1601   return ResultReg;
   1602 }
   1603 
   1604 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
   1605                                            unsigned LHSReg, bool LHSIsKill,
   1606                                            uint64_t Imm) {
   1607   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
   1608                 "ISD nodes are not consecutive!");
   1609   static const unsigned OpcTable[3][2] = {
   1610     { AArch64::ANDWri, AArch64::ANDXri },
   1611     { AArch64::ORRWri, AArch64::ORRXri },
   1612     { AArch64::EORWri, AArch64::EORXri }
   1613   };
   1614   const TargetRegisterClass *RC;
   1615   unsigned Opc;
   1616   unsigned RegSize;
   1617   switch (RetVT.SimpleTy) {
   1618   default:
   1619     return 0;
   1620   case MVT::i1:
   1621   case MVT::i8:
   1622   case MVT::i16:
   1623   case MVT::i32: {
   1624     unsigned Idx = ISDOpc - ISD::AND;
   1625     Opc = OpcTable[Idx][0];
   1626     RC = &AArch64::GPR32spRegClass;
   1627     RegSize = 32;
   1628     break;
   1629   }
   1630   case MVT::i64:
   1631     Opc = OpcTable[ISDOpc - ISD::AND][1];
   1632     RC = &AArch64::GPR64spRegClass;
   1633     RegSize = 64;
   1634     break;
   1635   }
   1636 
   1637   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
   1638     return 0;
   1639 
   1640   unsigned ResultReg =
   1641       fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
   1642                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
   1643   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
   1644     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
   1645     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
   1646   }
   1647   return ResultReg;
   1648 }
   1649 
   1650 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
   1651                                            unsigned LHSReg, bool LHSIsKill,
   1652                                            unsigned RHSReg, bool RHSIsKill,
   1653                                            uint64_t ShiftImm) {
   1654   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
   1655                 "ISD nodes are not consecutive!");
   1656   static const unsigned OpcTable[3][2] = {
   1657     { AArch64::ANDWrs, AArch64::ANDXrs },
   1658     { AArch64::ORRWrs, AArch64::ORRXrs },
   1659     { AArch64::EORWrs, AArch64::EORXrs }
   1660   };
   1661 
   1662   // Don't deal with undefined shifts.
   1663   if (ShiftImm >= RetVT.getSizeInBits())
   1664     return 0;
   1665 
   1666   const TargetRegisterClass *RC;
   1667   unsigned Opc;
   1668   switch (RetVT.SimpleTy) {
   1669   default:
   1670     return 0;
   1671   case MVT::i1:
   1672   case MVT::i8:
   1673   case MVT::i16:
   1674   case MVT::i32:
   1675     Opc = OpcTable[ISDOpc - ISD::AND][0];
   1676     RC = &AArch64::GPR32RegClass;
   1677     break;
   1678   case MVT::i64:
   1679     Opc = OpcTable[ISDOpc - ISD::AND][1];
   1680     RC = &AArch64::GPR64RegClass;
   1681     break;
   1682   }
   1683   unsigned ResultReg =
   1684       fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
   1685                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
   1686   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
   1687     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
   1688     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
   1689   }
   1690   return ResultReg;
   1691 }
   1692 
   1693 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
   1694                                      uint64_t Imm) {
   1695   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
   1696 }
   1697 
   1698 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
   1699                                    bool WantZExt, MachineMemOperand *MMO) {
   1700   if (!TLI.allowsMisalignedMemoryAccesses(VT))
   1701     return 0;
   1702 
   1703   // Simplify this down to something we can handle.
   1704   if (!simplifyAddress(Addr, VT))
   1705     return 0;
   1706 
   1707   unsigned ScaleFactor = getImplicitScaleFactor(VT);
   1708   if (!ScaleFactor)
   1709     llvm_unreachable("Unexpected value type.");
   1710 
   1711   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
   1712   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
   1713   bool UseScaled = true;
   1714   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
   1715     UseScaled = false;
   1716     ScaleFactor = 1;
   1717   }
   1718 
   1719   static const unsigned GPOpcTable[2][8][4] = {
   1720     // Sign-extend.
   1721     { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
   1722         AArch64::LDURXi  },
   1723       { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
   1724         AArch64::LDURXi  },
   1725       { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
   1726         AArch64::LDRXui  },
   1727       { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
   1728         AArch64::LDRXui  },
   1729       { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
   1730         AArch64::LDRXroX },
   1731       { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
   1732         AArch64::LDRXroX },
   1733       { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
   1734         AArch64::LDRXroW },
   1735       { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
   1736         AArch64::LDRXroW }
   1737     },
   1738     // Zero-extend.
   1739     { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
   1740         AArch64::LDURXi  },
   1741       { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
   1742         AArch64::LDURXi  },
   1743       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
   1744         AArch64::LDRXui  },
   1745       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
   1746         AArch64::LDRXui  },
   1747       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
   1748         AArch64::LDRXroX },
   1749       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
   1750         AArch64::LDRXroX },
   1751       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
   1752         AArch64::LDRXroW },
   1753       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
   1754         AArch64::LDRXroW }
   1755     }
   1756   };
   1757 
   1758   static const unsigned FPOpcTable[4][2] = {
   1759     { AArch64::LDURSi,  AArch64::LDURDi  },
   1760     { AArch64::LDRSui,  AArch64::LDRDui  },
   1761     { AArch64::LDRSroX, AArch64::LDRDroX },
   1762     { AArch64::LDRSroW, AArch64::LDRDroW }
   1763   };
   1764 
   1765   unsigned Opc;
   1766   const TargetRegisterClass *RC;
   1767   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
   1768                       Addr.getOffsetReg();
   1769   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
   1770   if (Addr.getExtendType() == AArch64_AM::UXTW ||
   1771       Addr.getExtendType() == AArch64_AM::SXTW)
   1772     Idx++;
   1773 
   1774   bool IsRet64Bit = RetVT == MVT::i64;
   1775   switch (VT.SimpleTy) {
   1776   default:
   1777     llvm_unreachable("Unexpected value type.");
   1778   case MVT::i1: // Intentional fall-through.
   1779   case MVT::i8:
   1780     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
   1781     RC = (IsRet64Bit && !WantZExt) ?
   1782              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
   1783     break;
   1784   case MVT::i16:
   1785     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
   1786     RC = (IsRet64Bit && !WantZExt) ?
   1787              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
   1788     break;
   1789   case MVT::i32:
   1790     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
   1791     RC = (IsRet64Bit && !WantZExt) ?
   1792              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
   1793     break;
   1794   case MVT::i64:
   1795     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
   1796     RC = &AArch64::GPR64RegClass;
   1797     break;
   1798   case MVT::f32:
   1799     Opc = FPOpcTable[Idx][0];
   1800     RC = &AArch64::FPR32RegClass;
   1801     break;
   1802   case MVT::f64:
   1803     Opc = FPOpcTable[Idx][1];
   1804     RC = &AArch64::FPR64RegClass;
   1805     break;
   1806   }
   1807 
   1808   // Create the base instruction, then add the operands.
   1809   unsigned ResultReg = createResultReg(RC);
   1810   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1811                                     TII.get(Opc), ResultReg);
   1812   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
   1813 
   1814   // Loading an i1 requires special handling.
   1815   if (VT == MVT::i1) {
   1816     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
   1817     assert(ANDReg && "Unexpected AND instruction emission failure.");
   1818     ResultReg = ANDReg;
   1819   }
   1820 
   1821   // For zero-extending loads to 64bit we emit a 32bit load and then convert
   1822   // the 32bit reg to a 64bit reg.
   1823   if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
   1824     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
   1825     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1826             TII.get(AArch64::SUBREG_TO_REG), Reg64)
   1827         .addImm(0)
   1828         .addReg(ResultReg, getKillRegState(true))
   1829         .addImm(AArch64::sub_32);
   1830     ResultReg = Reg64;
   1831   }
   1832   return ResultReg;
   1833 }
   1834 
   1835 bool AArch64FastISel::selectAddSub(const Instruction *I) {
   1836   MVT VT;
   1837   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
   1838     return false;
   1839 
   1840   if (VT.isVector())
   1841     return selectOperator(I, I->getOpcode());
   1842 
   1843   unsigned ResultReg;
   1844   switch (I->getOpcode()) {
   1845   default:
   1846     llvm_unreachable("Unexpected instruction.");
   1847   case Instruction::Add:
   1848     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
   1849     break;
   1850   case Instruction::Sub:
   1851     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
   1852     break;
   1853   }
   1854   if (!ResultReg)
   1855     return false;
   1856 
   1857   updateValueMap(I, ResultReg);
   1858   return true;
   1859 }
   1860 
   1861 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
   1862   MVT VT;
   1863   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
   1864     return false;
   1865 
   1866   if (VT.isVector())
   1867     return selectOperator(I, I->getOpcode());
   1868 
   1869   unsigned ResultReg;
   1870   switch (I->getOpcode()) {
   1871   default:
   1872     llvm_unreachable("Unexpected instruction.");
   1873   case Instruction::And:
   1874     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
   1875     break;
   1876   case Instruction::Or:
   1877     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
   1878     break;
   1879   case Instruction::Xor:
   1880     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
   1881     break;
   1882   }
   1883   if (!ResultReg)
   1884     return false;
   1885 
   1886   updateValueMap(I, ResultReg);
   1887   return true;
   1888 }
   1889 
   1890 bool AArch64FastISel::selectLoad(const Instruction *I) {
   1891   MVT VT;
   1892   // Verify we have a legal type before going any further.  Currently, we handle
   1893   // simple types that will directly fit in a register (i32/f32/i64/f64) or
   1894   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
   1895   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
   1896       cast<LoadInst>(I)->isAtomic())
   1897     return false;
   1898 
   1899   const Value *SV = I->getOperand(0);
   1900   if (TLI.supportSwiftError()) {
   1901     // Swifterror values can come from either a function parameter with
   1902     // swifterror attribute or an alloca with swifterror attribute.
   1903     if (const Argument *Arg = dyn_cast<Argument>(SV)) {
   1904       if (Arg->hasSwiftErrorAttr())
   1905         return false;
   1906     }
   1907 
   1908     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
   1909       if (Alloca->isSwiftError())
   1910         return false;
   1911     }
   1912   }
   1913 
   1914   // See if we can handle this address.
   1915   Address Addr;
   1916   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
   1917     return false;
   1918 
   1919   // Fold the following sign-/zero-extend into the load instruction.
   1920   bool WantZExt = true;
   1921   MVT RetVT = VT;
   1922   const Value *IntExtVal = nullptr;
   1923   if (I->hasOneUse()) {
   1924     if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
   1925       if (isTypeSupported(ZE->getType(), RetVT))
   1926         IntExtVal = ZE;
   1927       else
   1928         RetVT = VT;
   1929     } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
   1930       if (isTypeSupported(SE->getType(), RetVT))
   1931         IntExtVal = SE;
   1932       else
   1933         RetVT = VT;
   1934       WantZExt = false;
   1935     }
   1936   }
   1937 
   1938   unsigned ResultReg =
   1939       emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
   1940   if (!ResultReg)
   1941     return false;
   1942 
   1943   // There are a few different cases we have to handle, because the load or the
   1944   // sign-/zero-extend might not be selected by FastISel if we fall-back to
   1945   // SelectionDAG. There is also an ordering issue when both instructions are in
   1946   // different basic blocks.
   1947   // 1.) The load instruction is selected by FastISel, but the integer extend
   1948   //     not. This usually happens when the integer extend is in a different
   1949   //     basic block and SelectionDAG took over for that basic block.
   1950   // 2.) The load instruction is selected before the integer extend. This only
   1951   //     happens when the integer extend is in a different basic block.
   1952   // 3.) The load instruction is selected by SelectionDAG and the integer extend
   1953   //     by FastISel. This happens if there are instructions between the load
   1954   //     and the integer extend that couldn't be selected by FastISel.
   1955   if (IntExtVal) {
   1956     // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
   1957     // could select it. Emit a copy to subreg if necessary. FastISel will remove
   1958     // it when it selects the integer extend.
   1959     unsigned Reg = lookUpRegForValue(IntExtVal);
   1960     auto *MI = MRI.getUniqueVRegDef(Reg);
   1961     if (!MI) {
   1962       if (RetVT == MVT::i64 && VT <= MVT::i32) {
   1963         if (WantZExt) {
   1964           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
   1965           std::prev(FuncInfo.InsertPt)->eraseFromParent();
   1966           ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
   1967         } else
   1968           ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
   1969                                                  /*IsKill=*/true,
   1970                                                  AArch64::sub_32);
   1971       }
   1972       updateValueMap(I, ResultReg);
   1973       return true;
   1974     }
   1975 
   1976     // The integer extend has already been emitted - delete all the instructions
   1977     // that have been emitted by the integer extend lowering code and use the
   1978     // result from the load instruction directly.
   1979     while (MI) {
   1980       Reg = 0;
   1981       for (auto &Opnd : MI->uses()) {
   1982         if (Opnd.isReg()) {
   1983           Reg = Opnd.getReg();
   1984           break;
   1985         }
   1986       }
   1987       MI->eraseFromParent();
   1988       MI = nullptr;
   1989       if (Reg)
   1990         MI = MRI.getUniqueVRegDef(Reg);
   1991     }
   1992     updateValueMap(IntExtVal, ResultReg);
   1993     return true;
   1994   }
   1995 
   1996   updateValueMap(I, ResultReg);
   1997   return true;
   1998 }
   1999 
   2000 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
   2001                                 MachineMemOperand *MMO) {
   2002   if (!TLI.allowsMisalignedMemoryAccesses(VT))
   2003     return false;
   2004 
   2005   // Simplify this down to something we can handle.
   2006   if (!simplifyAddress(Addr, VT))
   2007     return false;
   2008 
   2009   unsigned ScaleFactor = getImplicitScaleFactor(VT);
   2010   if (!ScaleFactor)
   2011     llvm_unreachable("Unexpected value type.");
   2012 
   2013   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
   2014   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
   2015   bool UseScaled = true;
   2016   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
   2017     UseScaled = false;
   2018     ScaleFactor = 1;
   2019   }
   2020 
   2021   static const unsigned OpcTable[4][6] = {
   2022     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
   2023       AArch64::STURSi,   AArch64::STURDi },
   2024     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
   2025       AArch64::STRSui,   AArch64::STRDui },
   2026     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
   2027       AArch64::STRSroX,  AArch64::STRDroX },
   2028     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
   2029       AArch64::STRSroW,  AArch64::STRDroW }
   2030   };
   2031 
   2032   unsigned Opc;
   2033   bool VTIsi1 = false;
   2034   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
   2035                       Addr.getOffsetReg();
   2036   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
   2037   if (Addr.getExtendType() == AArch64_AM::UXTW ||
   2038       Addr.getExtendType() == AArch64_AM::SXTW)
   2039     Idx++;
   2040 
   2041   switch (VT.SimpleTy) {
   2042   default: llvm_unreachable("Unexpected value type.");
   2043   case MVT::i1:  VTIsi1 = true;
   2044   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
   2045   case MVT::i16: Opc = OpcTable[Idx][1]; break;
   2046   case MVT::i32: Opc = OpcTable[Idx][2]; break;
   2047   case MVT::i64: Opc = OpcTable[Idx][3]; break;
   2048   case MVT::f32: Opc = OpcTable[Idx][4]; break;
   2049   case MVT::f64: Opc = OpcTable[Idx][5]; break;
   2050   }
   2051 
   2052   // Storing an i1 requires special handling.
   2053   if (VTIsi1 && SrcReg != AArch64::WZR) {
   2054     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
   2055     assert(ANDReg && "Unexpected AND instruction emission failure.");
   2056     SrcReg = ANDReg;
   2057   }
   2058   // Create the base instruction, then add the operands.
   2059   const MCInstrDesc &II = TII.get(Opc);
   2060   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
   2061   MachineInstrBuilder MIB =
   2062       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
   2063   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
   2064 
   2065   return true;
   2066 }
   2067 
   2068 bool AArch64FastISel::selectStore(const Instruction *I) {
   2069   MVT VT;
   2070   const Value *Op0 = I->getOperand(0);
   2071   // Verify we have a legal type before going any further.  Currently, we handle
   2072   // simple types that will directly fit in a register (i32/f32/i64/f64) or
   2073   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
   2074   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true) ||
   2075       cast<StoreInst>(I)->isAtomic())
   2076     return false;
   2077 
   2078   const Value *PtrV = I->getOperand(1);
   2079   if (TLI.supportSwiftError()) {
   2080     // Swifterror values can come from either a function parameter with
   2081     // swifterror attribute or an alloca with swifterror attribute.
   2082     if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
   2083       if (Arg->hasSwiftErrorAttr())
   2084         return false;
   2085     }
   2086 
   2087     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
   2088       if (Alloca->isSwiftError())
   2089         return false;
   2090     }
   2091   }
   2092 
   2093   // Get the value to be stored into a register. Use the zero register directly
   2094   // when possible to avoid an unnecessary copy and a wasted register.
   2095   unsigned SrcReg = 0;
   2096   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
   2097     if (CI->isZero())
   2098       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
   2099   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
   2100     if (CF->isZero() && !CF->isNegative()) {
   2101       VT = MVT::getIntegerVT(VT.getSizeInBits());
   2102       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
   2103     }
   2104   }
   2105 
   2106   if (!SrcReg)
   2107     SrcReg = getRegForValue(Op0);
   2108 
   2109   if (!SrcReg)
   2110     return false;
   2111 
   2112   // See if we can handle this address.
   2113   Address Addr;
   2114   if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
   2115     return false;
   2116 
   2117   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
   2118     return false;
   2119   return true;
   2120 }
   2121 
   2122 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
   2123   switch (Pred) {
   2124   case CmpInst::FCMP_ONE:
   2125   case CmpInst::FCMP_UEQ:
   2126   default:
   2127     // AL is our "false" for now. The other two need more compares.
   2128     return AArch64CC::AL;
   2129   case CmpInst::ICMP_EQ:
   2130   case CmpInst::FCMP_OEQ:
   2131     return AArch64CC::EQ;
   2132   case CmpInst::ICMP_SGT:
   2133   case CmpInst::FCMP_OGT:
   2134     return AArch64CC::GT;
   2135   case CmpInst::ICMP_SGE:
   2136   case CmpInst::FCMP_OGE:
   2137     return AArch64CC::GE;
   2138   case CmpInst::ICMP_UGT:
   2139   case CmpInst::FCMP_UGT:
   2140     return AArch64CC::HI;
   2141   case CmpInst::FCMP_OLT:
   2142     return AArch64CC::MI;
   2143   case CmpInst::ICMP_ULE:
   2144   case CmpInst::FCMP_OLE:
   2145     return AArch64CC::LS;
   2146   case CmpInst::FCMP_ORD:
   2147     return AArch64CC::VC;
   2148   case CmpInst::FCMP_UNO:
   2149     return AArch64CC::VS;
   2150   case CmpInst::FCMP_UGE:
   2151     return AArch64CC::PL;
   2152   case CmpInst::ICMP_SLT:
   2153   case CmpInst::FCMP_ULT:
   2154     return AArch64CC::LT;
   2155   case CmpInst::ICMP_SLE:
   2156   case CmpInst::FCMP_ULE:
   2157     return AArch64CC::LE;
   2158   case CmpInst::FCMP_UNE:
   2159   case CmpInst::ICMP_NE:
   2160     return AArch64CC::NE;
   2161   case CmpInst::ICMP_UGE:
   2162     return AArch64CC::HS;
   2163   case CmpInst::ICMP_ULT:
   2164     return AArch64CC::LO;
   2165   }
   2166 }
   2167 
   2168 /// \brief Try to emit a combined compare-and-branch instruction.
   2169 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
   2170   assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
   2171   const CmpInst *CI = cast<CmpInst>(BI->getCondition());
   2172   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
   2173 
   2174   const Value *LHS = CI->getOperand(0);
   2175   const Value *RHS = CI->getOperand(1);
   2176 
   2177   MVT VT;
   2178   if (!isTypeSupported(LHS->getType(), VT))
   2179     return false;
   2180 
   2181   unsigned BW = VT.getSizeInBits();
   2182   if (BW > 64)
   2183     return false;
   2184 
   2185   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
   2186   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
   2187 
   2188   // Try to take advantage of fallthrough opportunities.
   2189   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
   2190     std::swap(TBB, FBB);
   2191     Predicate = CmpInst::getInversePredicate(Predicate);
   2192   }
   2193 
   2194   int TestBit = -1;
   2195   bool IsCmpNE;
   2196   switch (Predicate) {
   2197   default:
   2198     return false;
   2199   case CmpInst::ICMP_EQ:
   2200   case CmpInst::ICMP_NE:
   2201     if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
   2202       std::swap(LHS, RHS);
   2203 
   2204     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
   2205       return false;
   2206 
   2207     if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
   2208       if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
   2209         const Value *AndLHS = AI->getOperand(0);
   2210         const Value *AndRHS = AI->getOperand(1);
   2211 
   2212         if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
   2213           if (C->getValue().isPowerOf2())
   2214             std::swap(AndLHS, AndRHS);
   2215 
   2216         if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
   2217           if (C->getValue().isPowerOf2()) {
   2218             TestBit = C->getValue().logBase2();
   2219             LHS = AndLHS;
   2220           }
   2221       }
   2222 
   2223     if (VT == MVT::i1)
   2224       TestBit = 0;
   2225 
   2226     IsCmpNE = Predicate == CmpInst::ICMP_NE;
   2227     break;
   2228   case CmpInst::ICMP_SLT:
   2229   case CmpInst::ICMP_SGE:
   2230     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
   2231       return false;
   2232 
   2233     TestBit = BW - 1;
   2234     IsCmpNE = Predicate == CmpInst::ICMP_SLT;
   2235     break;
   2236   case CmpInst::ICMP_SGT:
   2237   case CmpInst::ICMP_SLE:
   2238     if (!isa<ConstantInt>(RHS))
   2239       return false;
   2240 
   2241     if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
   2242       return false;
   2243 
   2244     TestBit = BW - 1;
   2245     IsCmpNE = Predicate == CmpInst::ICMP_SLE;
   2246     break;
   2247   } // end switch
   2248 
   2249   static const unsigned OpcTable[2][2][2] = {
   2250     { {AArch64::CBZW,  AArch64::CBZX },
   2251       {AArch64::CBNZW, AArch64::CBNZX} },
   2252     { {AArch64::TBZW,  AArch64::TBZX },
   2253       {AArch64::TBNZW, AArch64::TBNZX} }
   2254   };
   2255 
   2256   bool IsBitTest = TestBit != -1;
   2257   bool Is64Bit = BW == 64;
   2258   if (TestBit < 32 && TestBit >= 0)
   2259     Is64Bit = false;
   2260 
   2261   unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
   2262   const MCInstrDesc &II = TII.get(Opc);
   2263 
   2264   unsigned SrcReg = getRegForValue(LHS);
   2265   if (!SrcReg)
   2266     return false;
   2267   bool SrcIsKill = hasTrivialKill(LHS);
   2268 
   2269   if (BW == 64 && !Is64Bit)
   2270     SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
   2271                                         AArch64::sub_32);
   2272 
   2273   if ((BW < 32) && !IsBitTest)
   2274     SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
   2275 
   2276   // Emit the combined compare and branch instruction.
   2277   SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
   2278   MachineInstrBuilder MIB =
   2279       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
   2280           .addReg(SrcReg, getKillRegState(SrcIsKill));
   2281   if (IsBitTest)
   2282     MIB.addImm(TestBit);
   2283   MIB.addMBB(TBB);
   2284 
   2285   finishCondBranch(BI->getParent(), TBB, FBB);
   2286   return true;
   2287 }
   2288 
   2289 bool AArch64FastISel::selectBranch(const Instruction *I) {
   2290   const BranchInst *BI = cast<BranchInst>(I);
   2291   if (BI->isUnconditional()) {
   2292     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
   2293     fastEmitBranch(MSucc, BI->getDebugLoc());
   2294     return true;
   2295   }
   2296 
   2297   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
   2298   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
   2299 
   2300   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
   2301     if (CI->hasOneUse() && isValueAvailable(CI)) {
   2302       // Try to optimize or fold the cmp.
   2303       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
   2304       switch (Predicate) {
   2305       default:
   2306         break;
   2307       case CmpInst::FCMP_FALSE:
   2308         fastEmitBranch(FBB, DbgLoc);
   2309         return true;
   2310       case CmpInst::FCMP_TRUE:
   2311         fastEmitBranch(TBB, DbgLoc);
   2312         return true;
   2313       }
   2314 
   2315       // Try to emit a combined compare-and-branch first.
   2316       if (emitCompareAndBranch(BI))
   2317         return true;
   2318 
   2319       // Try to take advantage of fallthrough opportunities.
   2320       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
   2321         std::swap(TBB, FBB);
   2322         Predicate = CmpInst::getInversePredicate(Predicate);
   2323       }
   2324 
   2325       // Emit the cmp.
   2326       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
   2327         return false;
   2328 
   2329       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
   2330       // instruction.
   2331       AArch64CC::CondCode CC = getCompareCC(Predicate);
   2332       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
   2333       switch (Predicate) {
   2334       default:
   2335         break;
   2336       case CmpInst::FCMP_UEQ:
   2337         ExtraCC = AArch64CC::EQ;
   2338         CC = AArch64CC::VS;
   2339         break;
   2340       case CmpInst::FCMP_ONE:
   2341         ExtraCC = AArch64CC::MI;
   2342         CC = AArch64CC::GT;
   2343         break;
   2344       }
   2345       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
   2346 
   2347       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
   2348       if (ExtraCC != AArch64CC::AL) {
   2349         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
   2350             .addImm(ExtraCC)
   2351             .addMBB(TBB);
   2352       }
   2353 
   2354       // Emit the branch.
   2355       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
   2356           .addImm(CC)
   2357           .addMBB(TBB);
   2358 
   2359       finishCondBranch(BI->getParent(), TBB, FBB);
   2360       return true;
   2361     }
   2362   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
   2363     uint64_t Imm = CI->getZExtValue();
   2364     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
   2365     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
   2366         .addMBB(Target);
   2367 
   2368     // Obtain the branch probability and add the target to the successor list.
   2369     if (FuncInfo.BPI) {
   2370       auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
   2371           BI->getParent(), Target->getBasicBlock());
   2372       FuncInfo.MBB->addSuccessor(Target, BranchProbability);
   2373     } else
   2374       FuncInfo.MBB->addSuccessorWithoutProb(Target);
   2375     return true;
   2376   } else {
   2377     AArch64CC::CondCode CC = AArch64CC::NE;
   2378     if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
   2379       // Fake request the condition, otherwise the intrinsic might be completely
   2380       // optimized away.
   2381       unsigned CondReg = getRegForValue(BI->getCondition());
   2382       if (!CondReg)
   2383         return false;
   2384 
   2385       // Emit the branch.
   2386       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
   2387         .addImm(CC)
   2388         .addMBB(TBB);
   2389 
   2390       finishCondBranch(BI->getParent(), TBB, FBB);
   2391       return true;
   2392     }
   2393   }
   2394 
   2395   unsigned CondReg = getRegForValue(BI->getCondition());
   2396   if (CondReg == 0)
   2397     return false;
   2398   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
   2399 
   2400   // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
   2401   unsigned Opcode = AArch64::TBNZW;
   2402   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
   2403     std::swap(TBB, FBB);
   2404     Opcode = AArch64::TBZW;
   2405   }
   2406 
   2407   const MCInstrDesc &II = TII.get(Opcode);
   2408   unsigned ConstrainedCondReg
   2409     = constrainOperandRegClass(II, CondReg, II.getNumDefs());
   2410   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
   2411       .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
   2412       .addImm(0)
   2413       .addMBB(TBB);
   2414 
   2415   finishCondBranch(BI->getParent(), TBB, FBB);
   2416   return true;
   2417 }
   2418 
   2419 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
   2420   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
   2421   unsigned AddrReg = getRegForValue(BI->getOperand(0));
   2422   if (AddrReg == 0)
   2423     return false;
   2424 
   2425   // Emit the indirect branch.
   2426   const MCInstrDesc &II = TII.get(AArch64::BR);
   2427   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
   2428   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
   2429 
   2430   // Make sure the CFG is up-to-date.
   2431   for (auto *Succ : BI->successors())
   2432     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
   2433 
   2434   return true;
   2435 }
   2436 
   2437 bool AArch64FastISel::selectCmp(const Instruction *I) {
   2438   const CmpInst *CI = cast<CmpInst>(I);
   2439 
   2440   // Vectors of i1 are weird: bail out.
   2441   if (CI->getType()->isVectorTy())
   2442     return false;
   2443 
   2444   // Try to optimize or fold the cmp.
   2445   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
   2446   unsigned ResultReg = 0;
   2447   switch (Predicate) {
   2448   default:
   2449     break;
   2450   case CmpInst::FCMP_FALSE:
   2451     ResultReg = createResultReg(&AArch64::GPR32RegClass);
   2452     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2453             TII.get(TargetOpcode::COPY), ResultReg)
   2454         .addReg(AArch64::WZR, getKillRegState(true));
   2455     break;
   2456   case CmpInst::FCMP_TRUE:
   2457     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
   2458     break;
   2459   }
   2460 
   2461   if (ResultReg) {
   2462     updateValueMap(I, ResultReg);
   2463     return true;
   2464   }
   2465 
   2466   // Emit the cmp.
   2467   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
   2468     return false;
   2469 
   2470   ResultReg = createResultReg(&AArch64::GPR32RegClass);
   2471 
   2472   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
   2473   // condition codes are inverted, because they are used by CSINC.
   2474   static unsigned CondCodeTable[2][2] = {
   2475     { AArch64CC::NE, AArch64CC::VC },
   2476     { AArch64CC::PL, AArch64CC::LE }
   2477   };
   2478   unsigned *CondCodes = nullptr;
   2479   switch (Predicate) {
   2480   default:
   2481     break;
   2482   case CmpInst::FCMP_UEQ:
   2483     CondCodes = &CondCodeTable[0][0];
   2484     break;
   2485   case CmpInst::FCMP_ONE:
   2486     CondCodes = &CondCodeTable[1][0];
   2487     break;
   2488   }
   2489 
   2490   if (CondCodes) {
   2491     unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
   2492     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
   2493             TmpReg1)
   2494         .addReg(AArch64::WZR, getKillRegState(true))
   2495         .addReg(AArch64::WZR, getKillRegState(true))
   2496         .addImm(CondCodes[0]);
   2497     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
   2498             ResultReg)
   2499         .addReg(TmpReg1, getKillRegState(true))
   2500         .addReg(AArch64::WZR, getKillRegState(true))
   2501         .addImm(CondCodes[1]);
   2502 
   2503     updateValueMap(I, ResultReg);
   2504     return true;
   2505   }
   2506 
   2507   // Now set a register based on the comparison.
   2508   AArch64CC::CondCode CC = getCompareCC(Predicate);
   2509   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
   2510   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
   2511   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
   2512           ResultReg)
   2513       .addReg(AArch64::WZR, getKillRegState(true))
   2514       .addReg(AArch64::WZR, getKillRegState(true))
   2515       .addImm(invertedCC);
   2516 
   2517   updateValueMap(I, ResultReg);
   2518   return true;
   2519 }
   2520 
   2521 /// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false'
   2522 /// value.
   2523 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
   2524   if (!SI->getType()->isIntegerTy(1))
   2525     return false;
   2526 
   2527   const Value *Src1Val, *Src2Val;
   2528   unsigned Opc = 0;
   2529   bool NeedExtraOp = false;
   2530   if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
   2531     if (CI->isOne()) {
   2532       Src1Val = SI->getCondition();
   2533       Src2Val = SI->getFalseValue();
   2534       Opc = AArch64::ORRWrr;
   2535     } else {
   2536       assert(CI->isZero());
   2537       Src1Val = SI->getFalseValue();
   2538       Src2Val = SI->getCondition();
   2539       Opc = AArch64::BICWrr;
   2540     }
   2541   } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
   2542     if (CI->isOne()) {
   2543       Src1Val = SI->getCondition();
   2544       Src2Val = SI->getTrueValue();
   2545       Opc = AArch64::ORRWrr;
   2546       NeedExtraOp = true;
   2547     } else {
   2548       assert(CI->isZero());
   2549       Src1Val = SI->getCondition();
   2550       Src2Val = SI->getTrueValue();
   2551       Opc = AArch64::ANDWrr;
   2552     }
   2553   }
   2554 
   2555   if (!Opc)
   2556     return false;
   2557 
   2558   unsigned Src1Reg = getRegForValue(Src1Val);
   2559   if (!Src1Reg)
   2560     return false;
   2561   bool Src1IsKill = hasTrivialKill(Src1Val);
   2562 
   2563   unsigned Src2Reg = getRegForValue(Src2Val);
   2564   if (!Src2Reg)
   2565     return false;
   2566   bool Src2IsKill = hasTrivialKill(Src2Val);
   2567 
   2568   if (NeedExtraOp) {
   2569     Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
   2570     Src1IsKill = true;
   2571   }
   2572   unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
   2573                                        Src1IsKill, Src2Reg, Src2IsKill);
   2574   updateValueMap(SI, ResultReg);
   2575   return true;
   2576 }
   2577 
   2578 bool AArch64FastISel::selectSelect(const Instruction *I) {
   2579   assert(isa<SelectInst>(I) && "Expected a select instruction.");
   2580   MVT VT;
   2581   if (!isTypeSupported(I->getType(), VT))
   2582     return false;
   2583 
   2584   unsigned Opc;
   2585   const TargetRegisterClass *RC;
   2586   switch (VT.SimpleTy) {
   2587   default:
   2588     return false;
   2589   case MVT::i1:
   2590   case MVT::i8:
   2591   case MVT::i16:
   2592   case MVT::i32:
   2593     Opc = AArch64::CSELWr;
   2594     RC = &AArch64::GPR32RegClass;
   2595     break;
   2596   case MVT::i64:
   2597     Opc = AArch64::CSELXr;
   2598     RC = &AArch64::GPR64RegClass;
   2599     break;
   2600   case MVT::f32:
   2601     Opc = AArch64::FCSELSrrr;
   2602     RC = &AArch64::FPR32RegClass;
   2603     break;
   2604   case MVT::f64:
   2605     Opc = AArch64::FCSELDrrr;
   2606     RC = &AArch64::FPR64RegClass;
   2607     break;
   2608   }
   2609 
   2610   const SelectInst *SI = cast<SelectInst>(I);
   2611   const Value *Cond = SI->getCondition();
   2612   AArch64CC::CondCode CC = AArch64CC::NE;
   2613   AArch64CC::CondCode ExtraCC = AArch64CC::AL;
   2614 
   2615   if (optimizeSelect(SI))
   2616     return true;
   2617 
   2618   // Try to pickup the flags, so we don't have to emit another compare.
   2619   if (foldXALUIntrinsic(CC, I, Cond)) {
   2620     // Fake request the condition to force emission of the XALU intrinsic.
   2621     unsigned CondReg = getRegForValue(Cond);
   2622     if (!CondReg)
   2623       return false;
   2624   } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
   2625              isValueAvailable(Cond)) {
   2626     const auto *Cmp = cast<CmpInst>(Cond);
   2627     // Try to optimize or fold the cmp.
   2628     CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
   2629     const Value *FoldSelect = nullptr;
   2630     switch (Predicate) {
   2631     default:
   2632       break;
   2633     case CmpInst::FCMP_FALSE:
   2634       FoldSelect = SI->getFalseValue();
   2635       break;
   2636     case CmpInst::FCMP_TRUE:
   2637       FoldSelect = SI->getTrueValue();
   2638       break;
   2639     }
   2640 
   2641     if (FoldSelect) {
   2642       unsigned SrcReg = getRegForValue(FoldSelect);
   2643       if (!SrcReg)
   2644         return false;
   2645       unsigned UseReg = lookUpRegForValue(SI);
   2646       if (UseReg)
   2647         MRI.clearKillFlags(UseReg);
   2648 
   2649       updateValueMap(I, SrcReg);
   2650       return true;
   2651     }
   2652 
   2653     // Emit the cmp.
   2654     if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
   2655       return false;
   2656 
   2657     // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
   2658     CC = getCompareCC(Predicate);
   2659     switch (Predicate) {
   2660     default:
   2661       break;
   2662     case CmpInst::FCMP_UEQ:
   2663       ExtraCC = AArch64CC::EQ;
   2664       CC = AArch64CC::VS;
   2665       break;
   2666     case CmpInst::FCMP_ONE:
   2667       ExtraCC = AArch64CC::MI;
   2668       CC = AArch64CC::GT;
   2669       break;
   2670     }
   2671     assert((CC != AArch64CC::AL) && "Unexpected condition code.");
   2672   } else {
   2673     unsigned CondReg = getRegForValue(Cond);
   2674     if (!CondReg)
   2675       return false;
   2676     bool CondIsKill = hasTrivialKill(Cond);
   2677 
   2678     const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
   2679     CondReg = constrainOperandRegClass(II, CondReg, 1);
   2680 
   2681     // Emit a TST instruction (ANDS wzr, reg, #imm).
   2682     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
   2683             AArch64::WZR)
   2684         .addReg(CondReg, getKillRegState(CondIsKill))
   2685         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
   2686   }
   2687 
   2688   unsigned Src1Reg = getRegForValue(SI->getTrueValue());
   2689   bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
   2690 
   2691   unsigned Src2Reg = getRegForValue(SI->getFalseValue());
   2692   bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
   2693 
   2694   if (!Src1Reg || !Src2Reg)
   2695     return false;
   2696 
   2697   if (ExtraCC != AArch64CC::AL) {
   2698     Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
   2699                                Src2IsKill, ExtraCC);
   2700     Src2IsKill = true;
   2701   }
   2702   unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
   2703                                         Src2IsKill, CC);
   2704   updateValueMap(I, ResultReg);
   2705   return true;
   2706 }
   2707 
   2708 bool AArch64FastISel::selectFPExt(const Instruction *I) {
   2709   Value *V = I->getOperand(0);
   2710   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
   2711     return false;
   2712 
   2713   unsigned Op = getRegForValue(V);
   2714   if (Op == 0)
   2715     return false;
   2716 
   2717   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
   2718   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
   2719           ResultReg).addReg(Op);
   2720   updateValueMap(I, ResultReg);
   2721   return true;
   2722 }
   2723 
   2724 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
   2725   Value *V = I->getOperand(0);
   2726   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
   2727     return false;
   2728 
   2729   unsigned Op = getRegForValue(V);
   2730   if (Op == 0)
   2731     return false;
   2732 
   2733   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
   2734   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
   2735           ResultReg).addReg(Op);
   2736   updateValueMap(I, ResultReg);
   2737   return true;
   2738 }
   2739 
   2740 // FPToUI and FPToSI
   2741 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
   2742   MVT DestVT;
   2743   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
   2744     return false;
   2745 
   2746   unsigned SrcReg = getRegForValue(I->getOperand(0));
   2747   if (SrcReg == 0)
   2748     return false;
   2749 
   2750   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
   2751   if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
   2752     return false;
   2753 
   2754   unsigned Opc;
   2755   if (SrcVT == MVT::f64) {
   2756     if (Signed)
   2757       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
   2758     else
   2759       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
   2760   } else {
   2761     if (Signed)
   2762       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
   2763     else
   2764       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
   2765   }
   2766   unsigned ResultReg = createResultReg(
   2767       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
   2768   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
   2769       .addReg(SrcReg);
   2770   updateValueMap(I, ResultReg);
   2771   return true;
   2772 }
   2773 
   2774 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
   2775   MVT DestVT;
   2776   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
   2777     return false;
   2778   // Let regular ISEL handle FP16
   2779   if (DestVT == MVT::f16)
   2780     return false;
   2781 
   2782   assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
   2783          "Unexpected value type.");
   2784 
   2785   unsigned SrcReg = getRegForValue(I->getOperand(0));
   2786   if (!SrcReg)
   2787     return false;
   2788   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
   2789 
   2790   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
   2791 
   2792   // Handle sign-extension.
   2793   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
   2794     SrcReg =
   2795         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
   2796     if (!SrcReg)
   2797       return false;
   2798     SrcIsKill = true;
   2799   }
   2800 
   2801   unsigned Opc;
   2802   if (SrcVT == MVT::i64) {
   2803     if (Signed)
   2804       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
   2805     else
   2806       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
   2807   } else {
   2808     if (Signed)
   2809       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
   2810     else
   2811       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
   2812   }
   2813 
   2814   unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
   2815                                       SrcIsKill);
   2816   updateValueMap(I, ResultReg);
   2817   return true;
   2818 }
   2819 
   2820 bool AArch64FastISel::fastLowerArguments() {
   2821   if (!FuncInfo.CanLowerReturn)
   2822     return false;
   2823 
   2824   const Function *F = FuncInfo.Fn;
   2825   if (F->isVarArg())
   2826     return false;
   2827 
   2828   CallingConv::ID CC = F->getCallingConv();
   2829   if (CC != CallingConv::C)
   2830     return false;
   2831 
   2832   // Only handle simple cases of up to 8 GPR and FPR each.
   2833   unsigned GPRCnt = 0;
   2834   unsigned FPRCnt = 0;
   2835   unsigned Idx = 0;
   2836   for (auto const &Arg : F->args()) {
   2837     // The first argument is at index 1.
   2838     ++Idx;
   2839     if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
   2840         F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
   2841         F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
   2842         F->getAttributes().hasAttribute(Idx, Attribute::SwiftSelf) ||
   2843         F->getAttributes().hasAttribute(Idx, Attribute::SwiftError) ||
   2844         F->getAttributes().hasAttribute(Idx, Attribute::Nest))
   2845       return false;
   2846 
   2847     Type *ArgTy = Arg.getType();
   2848     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
   2849       return false;
   2850 
   2851     EVT ArgVT = TLI.getValueType(DL, ArgTy);
   2852     if (!ArgVT.isSimple())
   2853       return false;
   2854 
   2855     MVT VT = ArgVT.getSimpleVT().SimpleTy;
   2856     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
   2857       return false;
   2858 
   2859     if (VT.isVector() &&
   2860         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
   2861       return false;
   2862 
   2863     if (VT >= MVT::i1 && VT <= MVT::i64)
   2864       ++GPRCnt;
   2865     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
   2866              VT.is128BitVector())
   2867       ++FPRCnt;
   2868     else
   2869       return false;
   2870 
   2871     if (GPRCnt > 8 || FPRCnt > 8)
   2872       return false;
   2873   }
   2874 
   2875   static const MCPhysReg Registers[6][8] = {
   2876     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
   2877       AArch64::W5, AArch64::W6, AArch64::W7 },
   2878     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
   2879       AArch64::X5, AArch64::X6, AArch64::X7 },
   2880     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
   2881       AArch64::H5, AArch64::H6, AArch64::H7 },
   2882     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
   2883       AArch64::S5, AArch64::S6, AArch64::S7 },
   2884     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
   2885       AArch64::D5, AArch64::D6, AArch64::D7 },
   2886     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
   2887       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
   2888   };
   2889 
   2890   unsigned GPRIdx = 0;
   2891   unsigned FPRIdx = 0;
   2892   for (auto const &Arg : F->args()) {
   2893     MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
   2894     unsigned SrcReg;
   2895     const TargetRegisterClass *RC;
   2896     if (VT >= MVT::i1 && VT <= MVT::i32) {
   2897       SrcReg = Registers[0][GPRIdx++];
   2898       RC = &AArch64::GPR32RegClass;
   2899       VT = MVT::i32;
   2900     } else if (VT == MVT::i64) {
   2901       SrcReg = Registers[1][GPRIdx++];
   2902       RC = &AArch64::GPR64RegClass;
   2903     } else if (VT == MVT::f16) {
   2904       SrcReg = Registers[2][FPRIdx++];
   2905       RC = &AArch64::FPR16RegClass;
   2906     } else if (VT ==  MVT::f32) {
   2907       SrcReg = Registers[3][FPRIdx++];
   2908       RC = &AArch64::FPR32RegClass;
   2909     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
   2910       SrcReg = Registers[4][FPRIdx++];
   2911       RC = &AArch64::FPR64RegClass;
   2912     } else if (VT.is128BitVector()) {
   2913       SrcReg = Registers[5][FPRIdx++];
   2914       RC = &AArch64::FPR128RegClass;
   2915     } else
   2916       llvm_unreachable("Unexpected value type.");
   2917 
   2918     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
   2919     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
   2920     // Without this, EmitLiveInCopies may eliminate the livein if its only
   2921     // use is a bitcast (which isn't turned into an instruction).
   2922     unsigned ResultReg = createResultReg(RC);
   2923     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2924             TII.get(TargetOpcode::COPY), ResultReg)
   2925         .addReg(DstReg, getKillRegState(true));
   2926     updateValueMap(&Arg, ResultReg);
   2927   }
   2928   return true;
   2929 }
   2930 
   2931 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
   2932                                       SmallVectorImpl<MVT> &OutVTs,
   2933                                       unsigned &NumBytes) {
   2934   CallingConv::ID CC = CLI.CallConv;
   2935   SmallVector<CCValAssign, 16> ArgLocs;
   2936   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
   2937   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
   2938 
   2939   // Get a count of how many bytes are to be pushed on the stack.
   2940   NumBytes = CCInfo.getNextStackOffset();
   2941 
   2942   // Issue CALLSEQ_START
   2943   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
   2944   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
   2945     .addImm(NumBytes);
   2946 
   2947   // Process the args.
   2948   for (CCValAssign &VA : ArgLocs) {
   2949     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
   2950     MVT ArgVT = OutVTs[VA.getValNo()];
   2951 
   2952     unsigned ArgReg = getRegForValue(ArgVal);
   2953     if (!ArgReg)
   2954       return false;
   2955 
   2956     // Handle arg promotion: SExt, ZExt, AExt.
   2957     switch (VA.getLocInfo()) {
   2958     case CCValAssign::Full:
   2959       break;
   2960     case CCValAssign::SExt: {
   2961       MVT DestVT = VA.getLocVT();
   2962       MVT SrcVT = ArgVT;
   2963       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
   2964       if (!ArgReg)
   2965         return false;
   2966       break;
   2967     }
   2968     case CCValAssign::AExt:
   2969     // Intentional fall-through.
   2970     case CCValAssign::ZExt: {
   2971       MVT DestVT = VA.getLocVT();
   2972       MVT SrcVT = ArgVT;
   2973       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
   2974       if (!ArgReg)
   2975         return false;
   2976       break;
   2977     }
   2978     default:
   2979       llvm_unreachable("Unknown arg promotion!");
   2980     }
   2981 
   2982     // Now copy/store arg to correct locations.
   2983     if (VA.isRegLoc() && !VA.needsCustom()) {
   2984       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2985               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
   2986       CLI.OutRegs.push_back(VA.getLocReg());
   2987     } else if (VA.needsCustom()) {
   2988       // FIXME: Handle custom args.
   2989       return false;
   2990     } else {
   2991       assert(VA.isMemLoc() && "Assuming store on stack.");
   2992 
   2993       // Don't emit stores for undef values.
   2994       if (isa<UndefValue>(ArgVal))
   2995         continue;
   2996 
   2997       // Need to store on the stack.
   2998       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
   2999 
   3000       unsigned BEAlign = 0;
   3001       if (ArgSize < 8 && !Subtarget->isLittleEndian())
   3002         BEAlign = 8 - ArgSize;
   3003 
   3004       Address Addr;
   3005       Addr.setKind(Address::RegBase);
   3006       Addr.setReg(AArch64::SP);
   3007       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
   3008 
   3009       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
   3010       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
   3011           MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
   3012           MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
   3013 
   3014       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
   3015         return false;
   3016     }
   3017   }
   3018   return true;
   3019 }
   3020 
   3021 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
   3022                                  unsigned NumBytes) {
   3023   CallingConv::ID CC = CLI.CallConv;
   3024 
   3025   // Issue CALLSEQ_END
   3026   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
   3027   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
   3028     .addImm(NumBytes).addImm(0);
   3029 
   3030   // Now the return value.
   3031   if (RetVT != MVT::isVoid) {
   3032     SmallVector<CCValAssign, 16> RVLocs;
   3033     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
   3034     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
   3035 
   3036     // Only handle a single return value.
   3037     if (RVLocs.size() != 1)
   3038       return false;
   3039 
   3040     // Copy all of the result registers out of their specified physreg.
   3041     MVT CopyVT = RVLocs[0].getValVT();
   3042 
   3043     // TODO: Handle big-endian results
   3044     if (CopyVT.isVector() && !Subtarget->isLittleEndian())
   3045       return false;
   3046 
   3047     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
   3048     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   3049             TII.get(TargetOpcode::COPY), ResultReg)
   3050         .addReg(RVLocs[0].getLocReg());
   3051     CLI.InRegs.push_back(RVLocs[0].getLocReg());
   3052 
   3053     CLI.ResultReg = ResultReg;
   3054     CLI.NumResultRegs = 1;
   3055   }
   3056 
   3057   return true;
   3058 }
   3059 
   3060 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
   3061   CallingConv::ID CC  = CLI.CallConv;
   3062   bool IsTailCall     = CLI.IsTailCall;
   3063   bool IsVarArg       = CLI.IsVarArg;
   3064   const Value *Callee = CLI.Callee;
   3065   MCSymbol *Symbol = CLI.Symbol;
   3066 
   3067   if (!Callee && !Symbol)
   3068     return false;
   3069 
   3070   // Allow SelectionDAG isel to handle tail calls.
   3071   if (IsTailCall)
   3072     return false;
   3073 
   3074   CodeModel::Model CM = TM.getCodeModel();
   3075   // Only support the small and large code model.
   3076   if (CM != CodeModel::Small && CM != CodeModel::Large)
   3077     return false;
   3078 
   3079   // FIXME: Add large code model support for ELF.
   3080   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
   3081     return false;
   3082 
   3083   // Let SDISel handle vararg functions.
   3084   if (IsVarArg)
   3085     return false;
   3086 
   3087   // FIXME: Only handle *simple* calls for now.
   3088   MVT RetVT;
   3089   if (CLI.RetTy->isVoidTy())
   3090     RetVT = MVT::isVoid;
   3091   else if (!isTypeLegal(CLI.RetTy, RetVT))
   3092     return false;
   3093 
   3094   for (auto Flag : CLI.OutFlags)
   3095     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
   3096         Flag.isSwiftSelf() || Flag.isSwiftError())
   3097       return false;
   3098 
   3099   // Set up the argument vectors.
   3100   SmallVector<MVT, 16> OutVTs;
   3101   OutVTs.reserve(CLI.OutVals.size());
   3102 
   3103   for (auto *Val : CLI.OutVals) {
   3104     MVT VT;
   3105     if (!isTypeLegal(Val->getType(), VT) &&
   3106         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
   3107       return false;
   3108 
   3109     // We don't handle vector parameters yet.
   3110     if (VT.isVector() || VT.getSizeInBits() > 64)
   3111       return false;
   3112 
   3113     OutVTs.push_back(VT);
   3114   }
   3115 
   3116   Address Addr;
   3117   if (Callee && !computeCallAddress(Callee, Addr))
   3118     return false;
   3119 
   3120   // Handle the arguments now that we've gotten them.
   3121   unsigned NumBytes;
   3122   if (!processCallArgs(CLI, OutVTs, NumBytes))
   3123     return false;
   3124 
   3125   // Issue the call.
   3126   MachineInstrBuilder MIB;
   3127   if (CM == CodeModel::Small) {
   3128     const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
   3129     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
   3130     if (Symbol)
   3131       MIB.addSym(Symbol, 0);
   3132     else if (Addr.getGlobalValue())
   3133       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
   3134     else if (Addr.getReg()) {
   3135       unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
   3136       MIB.addReg(Reg);
   3137     } else
   3138       return false;
   3139   } else {
   3140     unsigned CallReg = 0;
   3141     if (Symbol) {
   3142       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
   3143       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
   3144               ADRPReg)
   3145           .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
   3146 
   3147       CallReg = createResultReg(&AArch64::GPR64RegClass);
   3148       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   3149               TII.get(AArch64::LDRXui), CallReg)
   3150           .addReg(ADRPReg)
   3151           .addSym(Symbol,
   3152                   AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
   3153     } else if (Addr.getGlobalValue())
   3154       CallReg = materializeGV(Addr.getGlobalValue());
   3155     else if (Addr.getReg())
   3156       CallReg = Addr.getReg();
   3157 
   3158     if (!CallReg)
   3159       return false;
   3160 
   3161     const MCInstrDesc &II = TII.get(AArch64::BLR);
   3162     CallReg = constrainOperandRegClass(II, CallReg, 0);
   3163     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
   3164   }
   3165 
   3166   // Add implicit physical register uses to the call.
   3167   for (auto Reg : CLI.OutRegs)
   3168     MIB.addReg(Reg, RegState::Implicit);
   3169 
   3170   // Add a register mask with the call-preserved registers.
   3171   // Proper defs for return values will be added by setPhysRegsDeadExcept().
   3172   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
   3173 
   3174   CLI.Call = MIB;
   3175 
   3176   // Finish off the call including any return values.
   3177   return finishCall(CLI, RetVT, NumBytes);
   3178 }
   3179 
   3180 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
   3181   if (Alignment)
   3182     return Len / Alignment <= 4;
   3183   else
   3184     return Len < 32;
   3185 }
   3186 
   3187 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
   3188                                          uint64_t Len, unsigned Alignment) {
   3189   // Make sure we don't bloat code by inlining very large memcpy's.
   3190   if (!isMemCpySmall(Len, Alignment))
   3191     return false;
   3192 
   3193   int64_t UnscaledOffset = 0;
   3194   Address OrigDest = Dest;
   3195   Address OrigSrc = Src;
   3196 
   3197   while (Len) {
   3198     MVT VT;
   3199     if (!Alignment || Alignment >= 8) {
   3200       if (Len >= 8)
   3201         VT = MVT::i64;
   3202       else if (Len >= 4)
   3203         VT = MVT::i32;
   3204       else if (Len >= 2)
   3205         VT = MVT::i16;
   3206       else {
   3207         VT = MVT::i8;
   3208       }
   3209     } else {
   3210       // Bound based on alignment.
   3211       if (Len >= 4 && Alignment == 4)
   3212         VT = MVT::i32;
   3213       else if (Len >= 2 && Alignment == 2)
   3214         VT = MVT::i16;
   3215       else {
   3216         VT = MVT::i8;
   3217       }
   3218     }
   3219 
   3220     unsigned ResultReg = emitLoad(VT, VT, Src);
   3221     if (!ResultReg)
   3222       return false;
   3223 
   3224     if (!emitStore(VT, ResultReg, Dest))
   3225       return false;
   3226 
   3227     int64_t Size = VT.getSizeInBits() / 8;
   3228     Len -= Size;
   3229     UnscaledOffset += Size;
   3230 
   3231     // We need to recompute the unscaled offset for each iteration.
   3232     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
   3233     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
   3234   }
   3235 
   3236   return true;
   3237 }
   3238 
   3239 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
   3240 /// into the user. The condition code will only be updated on success.
   3241 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
   3242                                         const Instruction *I,
   3243                                         const Value *Cond) {
   3244   if (!isa<ExtractValueInst>(Cond))
   3245     return false;
   3246 
   3247   const auto *EV = cast<ExtractValueInst>(Cond);
   3248   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
   3249     return false;
   3250 
   3251   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
   3252   MVT RetVT;
   3253   const Function *Callee = II->getCalledFunction();
   3254   Type *RetTy =
   3255   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
   3256   if (!isTypeLegal(RetTy, RetVT))
   3257     return false;
   3258 
   3259   if (RetVT != MVT::i32 && RetVT != MVT::i64)
   3260     return false;
   3261 
   3262   const Value *LHS = II->getArgOperand(0);
   3263   const Value *RHS = II->getArgOperand(1);
   3264 
   3265   // Canonicalize immediate to the RHS.
   3266   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
   3267       isCommutativeIntrinsic(II))
   3268     std::swap(LHS, RHS);
   3269 
   3270   // Simplify multiplies.
   3271   Intrinsic::ID IID = II->getIntrinsicID();
   3272   switch (IID) {
   3273   default:
   3274     break;
   3275   case Intrinsic::smul_with_overflow:
   3276     if (const auto *C = dyn_cast<ConstantInt>(RHS))
   3277       if (C->getValue() == 2)
   3278         IID = Intrinsic::sadd_with_overflow;
   3279     break;
   3280   case Intrinsic::umul_with_overflow:
   3281     if (const auto *C = dyn_cast<ConstantInt>(RHS))
   3282       if (C->getValue() == 2)
   3283         IID = Intrinsic::uadd_with_overflow;
   3284     break;
   3285   }
   3286 
   3287   AArch64CC::CondCode TmpCC;
   3288   switch (IID) {
   3289   default:
   3290     return false;
   3291   case Intrinsic::sadd_with_overflow:
   3292   case Intrinsic::ssub_with_overflow:
   3293     TmpCC = AArch64CC::VS;
   3294     break;
   3295   case Intrinsic::uadd_with_overflow:
   3296     TmpCC = AArch64CC::HS;
   3297     break;
   3298   case Intrinsic::usub_with_overflow:
   3299     TmpCC = AArch64CC::LO;
   3300     break;
   3301   case Intrinsic::smul_with_overflow:
   3302   case Intrinsic::umul_with_overflow:
   3303     TmpCC = AArch64CC::NE;
   3304     break;
   3305   }
   3306 
   3307   // Check if both instructions are in the same basic block.
   3308   if (!isValueAvailable(II))
   3309     return false;
   3310 
   3311   // Make sure nothing is in the way
   3312   BasicBlock::const_iterator Start(I);
   3313   BasicBlock::const_iterator End(II);
   3314   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
   3315     // We only expect extractvalue instructions between the intrinsic and the
   3316     // instruction to be selected.
   3317     if (!isa<ExtractValueInst>(Itr))
   3318       return false;
   3319 
   3320     // Check that the extractvalue operand comes from the intrinsic.
   3321     const auto *EVI = cast<ExtractValueInst>(Itr);
   3322     if (EVI->getAggregateOperand() != II)
   3323       return false;
   3324   }
   3325 
   3326   CC = TmpCC;
   3327   return true;
   3328 }
   3329 
   3330 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
   3331   // FIXME: Handle more intrinsics.
   3332   switch (II->getIntrinsicID()) {
   3333   default: return false;
   3334   case Intrinsic::frameaddress: {
   3335     MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
   3336     MFI->setFrameAddressIsTaken(true);
   3337 
   3338     const AArch64RegisterInfo *RegInfo =
   3339         static_cast<const AArch64RegisterInfo *>(Subtarget->getRegisterInfo());
   3340     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
   3341     unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
   3342     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   3343             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
   3344     // Recursively load frame address
   3345     // ldr x0, [fp]
   3346     // ldr x0, [x0]
   3347     // ldr x0, [x0]
   3348     // ...
   3349     unsigned DestReg;
   3350     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
   3351     while (Depth--) {
   3352       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
   3353                                 SrcReg, /*IsKill=*/true, 0);
   3354       assert(DestReg && "Unexpected LDR instruction emission failure.");
   3355       SrcReg = DestReg;
   3356     }
   3357 
   3358     updateValueMap(II, SrcReg);
   3359     return true;
   3360   }
   3361   case Intrinsic::memcpy:
   3362   case Intrinsic::memmove: {
   3363     const auto *MTI = cast<MemTransferInst>(II);
   3364     // Don't handle volatile.
   3365     if (MTI->isVolatile())
   3366       return false;
   3367 
   3368     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
   3369     // we would emit dead code because we don't currently handle memmoves.
   3370     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
   3371     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
   3372       // Small memcpy's are common enough that we want to do them without a call
   3373       // if possible.
   3374       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
   3375       unsigned Alignment = MTI->getAlignment();
   3376       if (isMemCpySmall(Len, Alignment)) {
   3377         Address Dest, Src;
   3378         if (!computeAddress(MTI->getRawDest(), Dest) ||
   3379             !computeAddress(MTI->getRawSource(), Src))
   3380           return false;
   3381         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
   3382           return true;
   3383       }
   3384     }
   3385 
   3386     if (!MTI->getLength()->getType()->isIntegerTy(64))
   3387       return false;
   3388 
   3389     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
   3390       // Fast instruction selection doesn't support the special
   3391       // address spaces.
   3392       return false;
   3393 
   3394     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
   3395     return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
   3396   }
   3397   case Intrinsic::memset: {
   3398     const MemSetInst *MSI = cast<MemSetInst>(II);
   3399     // Don't handle volatile.
   3400     if (MSI->isVolatile())
   3401       return false;
   3402 
   3403     if (!MSI->getLength()->getType()->isIntegerTy(64))
   3404       return false;
   3405 
   3406     if (MSI->getDestAddressSpace() > 255)
   3407       // Fast instruction selection doesn't support the special
   3408       // address spaces.
   3409       return false;
   3410 
   3411     return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
   3412   }
   3413   case Intrinsic::sin:
   3414   case Intrinsic::cos:
   3415   case Intrinsic::pow: {
   3416     MVT RetVT;
   3417     if (!isTypeLegal(II->getType(), RetVT))
   3418       return false;
   3419 
   3420     if (RetVT != MVT::f32 && RetVT != MVT::f64)
   3421       return false;
   3422 
   3423     static const RTLIB::Libcall LibCallTable[3][2] = {
   3424       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
   3425       { RTLIB::COS_F32, RTLIB::COS_F64 },
   3426       { RTLIB::POW_F32, RTLIB::POW_F64 }
   3427     };
   3428     RTLIB::Libcall LC;
   3429     bool Is64Bit = RetVT == MVT::f64;
   3430     switch (II->getIntrinsicID()) {
   3431     default:
   3432       llvm_unreachable("Unexpected intrinsic.");
   3433     case Intrinsic::sin:
   3434       LC = LibCallTable[0][Is64Bit];
   3435       break;
   3436     case Intrinsic::cos:
   3437       LC = LibCallTable[1][Is64Bit];
   3438       break;
   3439     case Intrinsic::pow:
   3440       LC = LibCallTable[2][Is64Bit];
   3441       break;
   3442     }
   3443 
   3444     ArgListTy Args;
   3445     Args.reserve(II->getNumArgOperands());
   3446 
   3447     // Populate the argument list.
   3448     for (auto &Arg : II->arg_operands()) {
   3449       ArgListEntry Entry;
   3450       Entry.Val = Arg;
   3451       Entry.Ty = Arg->getType();
   3452       Args.push_back(Entry);
   3453     }
   3454 
   3455     CallLoweringInfo CLI;
   3456     MCContext &Ctx = MF->getContext();
   3457     CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
   3458                   TLI.getLibcallName(LC), std::move(Args));
   3459     if (!lowerCallTo(CLI))
   3460       return false;
   3461     updateValueMap(II, CLI.ResultReg);
   3462     return true;
   3463   }
   3464   case Intrinsic::fabs: {
   3465     MVT VT;
   3466     if (!isTypeLegal(II->getType(), VT))
   3467       return false;
   3468 
   3469     unsigned Opc;
   3470     switch (VT.SimpleTy) {
   3471     default:
   3472       return false;
   3473     case MVT::f32:
   3474       Opc = AArch64::FABSSr;
   3475       break;
   3476     case MVT::f64:
   3477       Opc = AArch64::FABSDr;
   3478       break;
   3479     }
   3480     unsigned SrcReg = getRegForValue(II->getOperand(0));
   3481     if (!SrcReg)
   3482       return false;
   3483     bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
   3484     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
   3485     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
   3486       .addReg(SrcReg, getKillRegState(SrcRegIsKill));
   3487     updateValueMap(II, ResultReg);
   3488     return true;
   3489   }
   3490   case Intrinsic::trap: {
   3491     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
   3492         .addImm(1);
   3493     return true;
   3494   }
   3495   case Intrinsic::sqrt: {
   3496     Type *RetTy = II->getCalledFunction()->getReturnType();
   3497 
   3498     MVT VT;
   3499     if (!isTypeLegal(RetTy, VT))
   3500       return false;
   3501 
   3502     unsigned Op0Reg = getRegForValue(II->getOperand(0));
   3503     if (!Op0Reg)
   3504       return false;
   3505     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
   3506 
   3507     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
   3508     if (!ResultReg)
   3509       return false;
   3510 
   3511     updateValueMap(II, ResultReg);
   3512     return true;
   3513   }
   3514   case Intrinsic::sadd_with_overflow:
   3515   case Intrinsic::uadd_with_overflow:
   3516   case Intrinsic::ssub_with_overflow:
   3517   case Intrinsic::usub_with_overflow:
   3518   case Intrinsic::smul_with_overflow:
   3519   case Intrinsic::umul_with_overflow: {
   3520     // This implements the basic lowering of the xalu with overflow intrinsics.
   3521     const Function *Callee = II->getCalledFunction();
   3522     auto *Ty = cast<StructType>(Callee->getReturnType());
   3523     Type *RetTy = Ty->getTypeAtIndex(0U);
   3524 
   3525     MVT VT;
   3526     if (!isTypeLegal(RetTy, VT))
   3527       return false;
   3528 
   3529     if (VT != MVT::i32 && VT != MVT::i64)
   3530       return false;
   3531 
   3532     const Value *LHS = II->getArgOperand(0);
   3533     const Value *RHS = II->getArgOperand(1);
   3534     // Canonicalize immediate to the RHS.
   3535     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
   3536         isCommutativeIntrinsic(II))
   3537       std::swap(LHS, RHS);
   3538 
   3539     // Simplify multiplies.
   3540     Intrinsic::ID IID = II->getIntrinsicID();
   3541     switch (IID) {
   3542     default:
   3543       break;
   3544     case Intrinsic::smul_with_overflow:
   3545       if (const auto *C = dyn_cast<ConstantInt>(RHS))
   3546         if (C->getValue() == 2) {
   3547           IID = Intrinsic::sadd_with_overflow;
   3548           RHS = LHS;
   3549         }
   3550       break;
   3551     case Intrinsic::umul_with_overflow:
   3552       if (const auto *C = dyn_cast<ConstantInt>(RHS))
   3553         if (C->getValue() == 2) {
   3554           IID = Intrinsic::uadd_with_overflow;
   3555           RHS = LHS;
   3556         }
   3557       break;
   3558     }
   3559 
   3560     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
   3561     AArch64CC::CondCode CC = AArch64CC::Invalid;
   3562     switch (IID) {
   3563     default: llvm_unreachable("Unexpected intrinsic!");
   3564     case Intrinsic::sadd_with_overflow:
   3565       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
   3566       CC = AArch64CC::VS;
   3567       break;
   3568     case Intrinsic::uadd_with_overflow:
   3569       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
   3570       CC = AArch64CC::HS;
   3571       break;
   3572     case Intrinsic::ssub_with_overflow:
   3573       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
   3574       CC = AArch64CC::VS;
   3575       break;
   3576     case Intrinsic::usub_with_overflow:
   3577       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
   3578       CC = AArch64CC::LO;
   3579       break;
   3580     case Intrinsic::smul_with_overflow: {
   3581       CC = AArch64CC::NE;
   3582       unsigned LHSReg = getRegForValue(LHS);
   3583       if (!LHSReg)
   3584         return false;
   3585       bool LHSIsKill = hasTrivialKill(LHS);
   3586 
   3587       unsigned RHSReg = getRegForValue(RHS);
   3588       if (!RHSReg)
   3589         return false;
   3590       bool RHSIsKill = hasTrivialKill(RHS);
   3591 
   3592       if (VT == MVT::i32) {
   3593         MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
   3594         unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
   3595                                        /*IsKill=*/false, 32);
   3596         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
   3597                                             AArch64::sub_32);
   3598         ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
   3599                                               AArch64::sub_32);
   3600         emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
   3601                     AArch64_AM::ASR, 31, /*WantResult=*/false);
   3602       } else {
   3603         assert(VT == MVT::i64 && "Unexpected value type.");
   3604         // LHSReg and RHSReg cannot be killed by this Mul, since they are
   3605         // reused in the next instruction.
   3606         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
   3607                             /*IsKill=*/false);
   3608         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
   3609                                         RHSReg, RHSIsKill);
   3610         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
   3611                     AArch64_AM::ASR, 63, /*WantResult=*/false);
   3612       }
   3613       break;
   3614     }
   3615     case Intrinsic::umul_with_overflow: {
   3616       CC = AArch64CC::NE;
   3617       unsigned LHSReg = getRegForValue(LHS);
   3618       if (!LHSReg)
   3619         return false;
   3620       bool LHSIsKill = hasTrivialKill(LHS);
   3621 
   3622       unsigned RHSReg = getRegForValue(RHS);
   3623       if (!RHSReg)
   3624         return false;
   3625       bool RHSIsKill = hasTrivialKill(RHS);
   3626 
   3627       if (VT == MVT::i32) {
   3628         MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
   3629         emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
   3630                     /*IsKill=*/false, AArch64_AM::LSR, 32,
   3631                     /*WantResult=*/false);
   3632         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
   3633                                             AArch64::sub_32);
   3634       } else {
   3635         assert(VT == MVT::i64 && "Unexpected value type.");
   3636         // LHSReg and RHSReg cannot be killed by this Mul, since they are
   3637         // reused in the next instruction.
   3638         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
   3639                             /*IsKill=*/false);
   3640         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
   3641                                         RHSReg, RHSIsKill);
   3642         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
   3643                     /*IsKill=*/false, /*WantResult=*/false);
   3644       }
   3645       break;
   3646     }
   3647     }
   3648 
   3649     if (MulReg) {
   3650       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
   3651       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   3652               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
   3653     }
   3654 
   3655     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
   3656                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
   3657                                   /*IsKill=*/true, getInvertedCondCode(CC));
   3658     (void)ResultReg2;
   3659     assert((ResultReg1 + 1) == ResultReg2 &&
   3660            "Nonconsecutive result registers.");
   3661     updateValueMap(II, ResultReg1, 2);
   3662     return true;
   3663   }
   3664   }
   3665   return false;
   3666 }
   3667 
   3668 bool AArch64FastISel::selectRet(const Instruction *I) {
   3669   const ReturnInst *Ret = cast<ReturnInst>(I);
   3670   const Function &F = *I->getParent()->getParent();
   3671 
   3672   if (!FuncInfo.CanLowerReturn)
   3673     return false;
   3674 
   3675   if (F.isVarArg())
   3676     return false;
   3677 
   3678   if (TLI.supportSwiftError() &&
   3679       F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
   3680     return false;
   3681 
   3682   if (TLI.supportSplitCSR(FuncInfo.MF))
   3683     return false;
   3684 
   3685   // Build a list of return value registers.
   3686   SmallVector<unsigned, 4> RetRegs;
   3687 
   3688   if (Ret->getNumOperands() > 0) {
   3689     CallingConv::ID CC = F.getCallingConv();
   3690     SmallVector<ISD::OutputArg, 4> Outs;
   3691     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
   3692 
   3693     // Analyze operands of the call, assigning locations to each operand.
   3694     SmallVector<CCValAssign, 16> ValLocs;
   3695     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
   3696     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
   3697                                                      : RetCC_AArch64_AAPCS;
   3698     CCInfo.AnalyzeReturn(Outs, RetCC);
   3699 
   3700     // Only handle a single return value for now.
   3701     if (ValLocs.size() != 1)
   3702       return false;
   3703 
   3704     CCValAssign &VA = ValLocs[0];
   3705     const Value *RV = Ret->getOperand(0);
   3706 
   3707     // Don't bother handling odd stuff for now.
   3708     if ((VA.getLocInfo() != CCValAssign::Full) &&
   3709         (VA.getLocInfo() != CCValAssign::BCvt))
   3710       return false;
   3711 
   3712     // Only handle register returns for now.
   3713     if (!VA.isRegLoc())
   3714       return false;
   3715 
   3716     unsigned Reg = getRegForValue(RV);
   3717     if (Reg == 0)
   3718       return false;
   3719 
   3720     unsigned SrcReg = Reg + VA.getValNo();
   3721     unsigned DestReg = VA.getLocReg();
   3722     // Avoid a cross-class copy. This is very unlikely.
   3723     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
   3724       return false;
   3725 
   3726     EVT RVEVT = TLI.getValueType(DL, RV->getType());
   3727     if (!RVEVT.isSimple())
   3728       return false;
   3729 
   3730     // Vectors (of > 1 lane) in big endian need tricky handling.
   3731     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
   3732         !Subtarget->isLittleEndian())
   3733       return false;
   3734 
   3735     MVT RVVT = RVEVT.getSimpleVT();
   3736     if (RVVT == MVT::f128)
   3737       return false;
   3738 
   3739     MVT DestVT = VA.getValVT();
   3740     // Special handling for extended integers.
   3741     if (RVVT != DestVT) {
   3742       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
   3743         return false;
   3744 
   3745       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
   3746         return false;
   3747 
   3748       bool IsZExt = Outs[0].Flags.isZExt();
   3749       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
   3750       if (SrcReg == 0)
   3751         return false;
   3752     }
   3753 
   3754     // Make the copy.
   3755     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   3756             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
   3757 
   3758     // Add register to return instruction.
   3759     RetRegs.push_back(VA.getLocReg());
   3760   }
   3761 
   3762   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   3763                                     TII.get(AArch64::RET_ReallyLR));
   3764   for (unsigned RetReg : RetRegs)
   3765     MIB.addReg(RetReg, RegState::Implicit);
   3766   return true;
   3767 }
   3768 
   3769 bool AArch64FastISel::selectTrunc(const Instruction *I) {
   3770   Type *DestTy = I->getType();
   3771   Value *Op = I->getOperand(0);
   3772   Type *SrcTy = Op->getType();
   3773 
   3774   EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
   3775   EVT DestEVT = TLI.getValueType(DL, DestTy, true);
   3776   if (!SrcEVT.isSimple())
   3777     return false;
   3778   if (!DestEVT.isSimple())
   3779     return false;
   3780 
   3781   MVT SrcVT = SrcEVT.getSimpleVT();
   3782   MVT DestVT = DestEVT.getSimpleVT();
   3783 
   3784   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
   3785       SrcVT != MVT::i8)
   3786     return false;
   3787   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
   3788       DestVT != MVT::i1)
   3789     return false;
   3790 
   3791   unsigned SrcReg = getRegForValue(Op);
   3792   if (!SrcReg)
   3793     return false;
   3794   bool SrcIsKill = hasTrivialKill(Op);
   3795 
   3796   // If we're truncating from i64 to a smaller non-legal type then generate an
   3797   // AND. Otherwise, we know the high bits are undefined and a truncate only
   3798   // generate a COPY. We cannot mark the source register also as result
   3799   // register, because this can incorrectly transfer the kill flag onto the
   3800   // source register.
   3801   unsigned ResultReg;
   3802   if (SrcVT == MVT::i64) {
   3803     uint64_t Mask = 0;
   3804     switch (DestVT.SimpleTy) {
   3805     default:
   3806       // Trunc i64 to i32 is handled by the target-independent fast-isel.
   3807       return false;
   3808     case MVT::i1:
   3809       Mask = 0x1;
   3810       break;
   3811     case MVT::i8:
   3812       Mask = 0xff;
   3813       break;
   3814     case MVT::i16:
   3815       Mask = 0xffff;
   3816       break;
   3817     }
   3818     // Issue an extract_subreg to get the lower 32-bits.
   3819     unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
   3820                                                 AArch64::sub_32);
   3821     // Create the AND instruction which performs the actual truncation.
   3822     ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
   3823     assert(ResultReg && "Unexpected AND instruction emission failure.");
   3824   } else {
   3825     ResultReg = createResultReg(&AArch64::GPR32RegClass);
   3826     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   3827             TII.get(TargetOpcode::COPY), ResultReg)
   3828         .addReg(SrcReg, getKillRegState(SrcIsKill));
   3829   }
   3830 
   3831   updateValueMap(I, ResultReg);
   3832   return true;
   3833 }
   3834 
   3835 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
   3836   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
   3837           DestVT == MVT::i64) &&
   3838          "Unexpected value type.");
   3839   // Handle i8 and i16 as i32.
   3840   if (DestVT == MVT::i8 || DestVT == MVT::i16)
   3841     DestVT = MVT::i32;
   3842 
   3843   if (IsZExt) {
   3844     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
   3845     assert(ResultReg && "Unexpected AND instruction emission failure.");
   3846     if (DestVT == MVT::i64) {
   3847       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
   3848       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
   3849       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
   3850       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   3851               TII.get(AArch64::SUBREG_TO_REG), Reg64)
   3852           .addImm(0)
   3853           .addReg(ResultReg)
   3854           .addImm(AArch64::sub_32);
   3855       ResultReg = Reg64;
   3856     }
   3857     return ResultReg;
   3858   } else {
   3859     if (DestVT == MVT::i64) {
   3860       // FIXME: We're SExt i1 to i64.
   3861       return 0;
   3862     }
   3863     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
   3864                             /*TODO:IsKill=*/false, 0, 0);
   3865   }
   3866 }
   3867 
   3868 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
   3869                                       unsigned Op1, bool Op1IsKill) {
   3870   unsigned Opc, ZReg;
   3871   switch (RetVT.SimpleTy) {
   3872   default: return 0;
   3873   case MVT::i8:
   3874   case MVT::i16:
   3875   case MVT::i32:
   3876     RetVT = MVT::i32;
   3877     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
   3878   case MVT::i64:
   3879     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
   3880   }
   3881 
   3882   const TargetRegisterClass *RC =
   3883       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
   3884   return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
   3885                           /*IsKill=*/ZReg, true);
   3886 }
   3887 
   3888 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
   3889                                         unsigned Op1, bool Op1IsKill) {
   3890   if (RetVT != MVT::i64)
   3891     return 0;
   3892 
   3893   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
   3894                           Op0, Op0IsKill, Op1, Op1IsKill,
   3895                           AArch64::XZR, /*IsKill=*/true);
   3896 }
   3897 
   3898 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
   3899                                         unsigned Op1, bool Op1IsKill) {
   3900   if (RetVT != MVT::i64)
   3901     return 0;
   3902 
   3903   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
   3904                           Op0, Op0IsKill, Op1, Op1IsKill,
   3905                           AArch64::XZR, /*IsKill=*/true);
   3906 }
   3907 
   3908 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
   3909                                      unsigned Op1Reg, bool Op1IsKill) {
   3910   unsigned Opc = 0;
   3911   bool NeedTrunc = false;
   3912   uint64_t Mask = 0;
   3913   switch (RetVT.SimpleTy) {
   3914   default: return 0;
   3915   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
   3916   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
   3917   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
   3918   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
   3919   }
   3920 
   3921   const TargetRegisterClass *RC =
   3922       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
   3923   if (NeedTrunc) {
   3924     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
   3925     Op1IsKill = true;
   3926   }
   3927   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
   3928                                        Op1IsKill);
   3929   if (NeedTrunc)
   3930     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
   3931   return ResultReg;
   3932 }
   3933 
   3934 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
   3935                                      bool Op0IsKill, uint64_t Shift,
   3936                                      bool IsZExt) {
   3937   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
   3938          "Unexpected source/return type pair.");
   3939   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
   3940           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
   3941          "Unexpected source value type.");
   3942   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
   3943           RetVT == MVT::i64) && "Unexpected return value type.");
   3944 
   3945   bool Is64Bit = (RetVT == MVT::i64);
   3946   unsigned RegSize = Is64Bit ? 64 : 32;
   3947   unsigned DstBits = RetVT.getSizeInBits();
   3948   unsigned SrcBits = SrcVT.getSizeInBits();
   3949   const TargetRegisterClass *RC =
   3950       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
   3951 
   3952   // Just emit a copy for "zero" shifts.
   3953   if (Shift == 0) {
   3954     if (RetVT == SrcVT) {
   3955       unsigned ResultReg = createResultReg(RC);
   3956       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   3957               TII.get(TargetOpcode::COPY), ResultReg)
   3958           .addReg(Op0, getKillRegState(Op0IsKill));
   3959       return ResultReg;
   3960     } else
   3961       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
   3962   }
   3963 
   3964   // Don't deal with undefined shifts.
   3965   if (Shift >= DstBits)
   3966     return 0;
   3967 
   3968   // For immediate shifts we can fold the zero-/sign-extension into the shift.
   3969   // {S|U}BFM Wd, Wn, #r, #s
   3970   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
   3971 
   3972   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
   3973   // %2 = shl i16 %1, 4
   3974   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
   3975   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
   3976   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
   3977   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
   3978 
   3979   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
   3980   // %2 = shl i16 %1, 8
   3981   // Wd<32+7-24,32-24> = Wn<7:0>
   3982   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
   3983   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
   3984   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
   3985 
   3986   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
   3987   // %2 = shl i16 %1, 12
   3988   // Wd<32+3-20,32-20> = Wn<3:0>
   3989   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
   3990   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
   3991   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
   3992 
   3993   unsigned ImmR = RegSize - Shift;
   3994   // Limit the width to the length of the source type.
   3995   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
   3996   static const unsigned OpcTable[2][2] = {
   3997     {AArch64::SBFMWri, AArch64::SBFMXri},
   3998     {AArch64::UBFMWri, AArch64::UBFMXri}
   3999   };
   4000   unsigned Opc = OpcTable[IsZExt][Is64Bit];
   4001   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
   4002     unsigned TmpReg = MRI.createVirtualRegister(RC);
   4003     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   4004             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
   4005         .addImm(0)
   4006         .addReg(Op0, getKillRegState(Op0IsKill))
   4007         .addImm(AArch64::sub_32);
   4008     Op0 = TmpReg;
   4009     Op0IsKill = true;
   4010   }
   4011   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
   4012 }
   4013 
   4014 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
   4015                                      unsigned Op1Reg, bool Op1IsKill) {
   4016   unsigned Opc = 0;
   4017   bool NeedTrunc = false;
   4018   uint64_t Mask = 0;
   4019   switch (RetVT.SimpleTy) {
   4020   default: return 0;
   4021   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
   4022   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
   4023   case MVT::i32: Opc = AArch64::LSRVWr; break;
   4024   case MVT::i64: Opc = AArch64::LSRVXr; break;
   4025   }
   4026 
   4027   const TargetRegisterClass *RC =
   4028       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
   4029   if (NeedTrunc) {
   4030     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
   4031     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
   4032     Op0IsKill = Op1IsKill = true;
   4033   }
   4034   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
   4035                                        Op1IsKill);
   4036   if (NeedTrunc)
   4037     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
   4038   return ResultReg;
   4039 }
   4040 
   4041 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
   4042                                      bool Op0IsKill, uint64_t Shift,
   4043                                      bool IsZExt) {
   4044   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
   4045          "Unexpected source/return type pair.");
   4046   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
   4047           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
   4048          "Unexpected source value type.");
   4049   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
   4050           RetVT == MVT::i64) && "Unexpected return value type.");
   4051 
   4052   bool Is64Bit = (RetVT == MVT::i64);
   4053   unsigned RegSize = Is64Bit ? 64 : 32;
   4054   unsigned DstBits = RetVT.getSizeInBits();
   4055   unsigned SrcBits = SrcVT.getSizeInBits();
   4056   const TargetRegisterClass *RC =
   4057       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
   4058 
   4059   // Just emit a copy for "zero" shifts.
   4060   if (Shift == 0) {
   4061     if (RetVT == SrcVT) {
   4062       unsigned ResultReg = createResultReg(RC);
   4063       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   4064               TII.get(TargetOpcode::COPY), ResultReg)
   4065       .addReg(Op0, getKillRegState(Op0IsKill));
   4066       return ResultReg;
   4067     } else
   4068       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
   4069   }
   4070 
   4071   // Don't deal with undefined shifts.
   4072   if (Shift >= DstBits)
   4073     return 0;
   4074 
   4075   // For immediate shifts we can fold the zero-/sign-extension into the shift.
   4076   // {S|U}BFM Wd, Wn, #r, #s
   4077   // Wd<s-r:0> = Wn<s:r> when r <= s
   4078 
   4079   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
   4080   // %2 = lshr i16 %1, 4
   4081   // Wd<7-4:0> = Wn<7:4>
   4082   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
   4083   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
   4084   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
   4085 
   4086   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
   4087   // %2 = lshr i16 %1, 8
   4088   // Wd<7-7,0> = Wn<7:7>
   4089   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
   4090   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
   4091   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
   4092 
   4093   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
   4094   // %2 = lshr i16 %1, 12
   4095   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
   4096   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
   4097   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
   4098   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
   4099 
   4100   if (Shift >= SrcBits && IsZExt)
   4101     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
   4102 
   4103   // It is not possible to fold a sign-extend into the LShr instruction. In this
   4104   // case emit a sign-extend.
   4105   if (!IsZExt) {
   4106     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
   4107     if (!Op0)
   4108       return 0;
   4109     Op0IsKill = true;
   4110     SrcVT = RetVT;
   4111     SrcBits = SrcVT.getSizeInBits();
   4112     IsZExt = true;
   4113   }
   4114 
   4115   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
   4116   unsigned ImmS = SrcBits - 1;
   4117   static const unsigned OpcTable[2][2] = {
   4118     {AArch64::SBFMWri, AArch64::SBFMXri},
   4119     {AArch64::UBFMWri, AArch64::UBFMXri}
   4120   };
   4121   unsigned Opc = OpcTable[IsZExt][Is64Bit];
   4122   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
   4123     unsigned TmpReg = MRI.createVirtualRegister(RC);
   4124     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   4125             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
   4126         .addImm(0)
   4127         .addReg(Op0, getKillRegState(Op0IsKill))
   4128         .addImm(AArch64::sub_32);
   4129     Op0 = TmpReg;
   4130     Op0IsKill = true;
   4131   }
   4132   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
   4133 }
   4134 
   4135 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
   4136                                      unsigned Op1Reg, bool Op1IsKill) {
   4137   unsigned Opc = 0;
   4138   bool NeedTrunc = false;
   4139   uint64_t Mask = 0;
   4140   switch (RetVT.SimpleTy) {
   4141   default: return 0;
   4142   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
   4143   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
   4144   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
   4145   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
   4146   }
   4147 
   4148   const TargetRegisterClass *RC =
   4149       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
   4150   if (NeedTrunc) {
   4151     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
   4152     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
   4153     Op0IsKill = Op1IsKill = true;
   4154   }
   4155   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
   4156                                        Op1IsKill);
   4157   if (NeedTrunc)
   4158     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
   4159   return ResultReg;
   4160 }
   4161 
   4162 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
   4163                                      bool Op0IsKill, uint64_t Shift,
   4164                                      bool IsZExt) {
   4165   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
   4166          "Unexpected source/return type pair.");
   4167   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
   4168           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
   4169          "Unexpected source value type.");
   4170   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
   4171           RetVT == MVT::i64) && "Unexpected return value type.");
   4172 
   4173   bool Is64Bit = (RetVT == MVT::i64);
   4174   unsigned RegSize = Is64Bit ? 64 : 32;
   4175   unsigned DstBits = RetVT.getSizeInBits();
   4176   unsigned SrcBits = SrcVT.getSizeInBits();
   4177   const TargetRegisterClass *RC =
   4178       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
   4179 
   4180   // Just emit a copy for "zero" shifts.
   4181   if (Shift == 0) {
   4182     if (RetVT == SrcVT) {
   4183       unsigned ResultReg = createResultReg(RC);
   4184       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   4185               TII.get(TargetOpcode::COPY), ResultReg)
   4186       .addReg(Op0, getKillRegState(Op0IsKill));
   4187       return ResultReg;
   4188     } else
   4189       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
   4190   }
   4191 
   4192   // Don't deal with undefined shifts.
   4193   if (Shift >= DstBits)
   4194     return 0;
   4195 
   4196   // For immediate shifts we can fold the zero-/sign-extension into the shift.
   4197   // {S|U}BFM Wd, Wn, #r, #s
   4198   // Wd<s-r:0> = Wn<s:r> when r <= s
   4199 
   4200   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
   4201   // %2 = ashr i16 %1, 4
   4202   // Wd<7-4:0> = Wn<7:4>
   4203   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
   4204   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
   4205   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
   4206 
   4207   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
   4208   // %2 = ashr i16 %1, 8
   4209   // Wd<7-7,0> = Wn<7:7>
   4210   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
   4211   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
   4212   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
   4213 
   4214   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
   4215   // %2 = ashr i16 %1, 12
   4216   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
   4217   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
   4218   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
   4219   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
   4220 
   4221   if (Shift >= SrcBits && IsZExt)
   4222     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
   4223 
   4224   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
   4225   unsigned ImmS = SrcBits - 1;
   4226   static const unsigned OpcTable[2][2] = {
   4227     {AArch64::SBFMWri, AArch64::SBFMXri},
   4228     {AArch64::UBFMWri, AArch64::UBFMXri}
   4229   };
   4230   unsigned Opc = OpcTable[IsZExt][Is64Bit];
   4231   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
   4232     unsigned TmpReg = MRI.createVirtualRegister(RC);
   4233     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   4234             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
   4235         .addImm(0)
   4236         .addReg(Op0, getKillRegState(Op0IsKill))
   4237         .addImm(AArch64::sub_32);
   4238     Op0 = TmpReg;
   4239     Op0IsKill = true;
   4240   }
   4241   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
   4242 }
   4243 
   4244 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
   4245                                      bool IsZExt) {
   4246   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
   4247 
   4248   // FastISel does not have plumbing to deal with extensions where the SrcVT or
   4249   // DestVT are odd things, so test to make sure that they are both types we can
   4250   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
   4251   // bail out to SelectionDAG.
   4252   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
   4253        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
   4254       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
   4255        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
   4256     return 0;
   4257 
   4258   unsigned Opc;
   4259   unsigned Imm = 0;
   4260 
   4261   switch (SrcVT.SimpleTy) {
   4262   default:
   4263     return 0;
   4264   case MVT::i1:
   4265     return emiti1Ext(SrcReg, DestVT, IsZExt);
   4266   case MVT::i8:
   4267     if (DestVT == MVT::i64)
   4268       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
   4269     else
   4270       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
   4271     Imm = 7;
   4272     break;
   4273   case MVT::i16:
   4274     if (DestVT == MVT::i64)
   4275       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
   4276     else
   4277       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
   4278     Imm = 15;
   4279     break;
   4280   case MVT::i32:
   4281     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
   4282     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
   4283     Imm = 31;
   4284     break;
   4285   }
   4286 
   4287   // Handle i8 and i16 as i32.
   4288   if (DestVT == MVT::i8 || DestVT == MVT::i16)
   4289     DestVT = MVT::i32;
   4290   else if (DestVT == MVT::i64) {
   4291     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
   4292     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   4293             TII.get(AArch64::SUBREG_TO_REG), Src64)
   4294         .addImm(0)
   4295         .addReg(SrcReg)
   4296         .addImm(AArch64::sub_32);
   4297     SrcReg = Src64;
   4298   }
   4299 
   4300   const TargetRegisterClass *RC =
   4301       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
   4302   return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
   4303 }
   4304 
   4305 static bool isZExtLoad(const MachineInstr *LI) {
   4306   switch (LI->getOpcode()) {
   4307   default:
   4308     return false;
   4309   case AArch64::LDURBBi:
   4310   case AArch64::LDURHHi:
   4311   case AArch64::LDURWi:
   4312   case AArch64::LDRBBui:
   4313   case AArch64::LDRHHui:
   4314   case AArch64::LDRWui:
   4315   case AArch64::LDRBBroX:
   4316   case AArch64::LDRHHroX:
   4317   case AArch64::LDRWroX:
   4318   case AArch64::LDRBBroW:
   4319   case AArch64::LDRHHroW:
   4320   case AArch64::LDRWroW:
   4321     return true;
   4322   }
   4323 }
   4324 
   4325 static bool isSExtLoad(const MachineInstr *LI) {
   4326   switch (LI->getOpcode()) {
   4327   default:
   4328     return false;
   4329   case AArch64::LDURSBWi:
   4330   case AArch64::LDURSHWi:
   4331   case AArch64::LDURSBXi:
   4332   case AArch64::LDURSHXi:
   4333   case AArch64::LDURSWi:
   4334   case AArch64::LDRSBWui:
   4335   case AArch64::LDRSHWui:
   4336   case AArch64::LDRSBXui:
   4337   case AArch64::LDRSHXui:
   4338   case AArch64::LDRSWui:
   4339   case AArch64::LDRSBWroX:
   4340   case AArch64::LDRSHWroX:
   4341   case AArch64::LDRSBXroX:
   4342   case AArch64::LDRSHXroX:
   4343   case AArch64::LDRSWroX:
   4344   case AArch64::LDRSBWroW:
   4345   case AArch64::LDRSHWroW:
   4346   case AArch64::LDRSBXroW:
   4347   case AArch64::LDRSHXroW:
   4348   case AArch64::LDRSWroW:
   4349     return true;
   4350   }
   4351 }
   4352 
   4353 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
   4354                                          MVT SrcVT) {
   4355   const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
   4356   if (!LI || !LI->hasOneUse())
   4357     return false;
   4358 
   4359   // Check if the load instruction has already been selected.
   4360   unsigned Reg = lookUpRegForValue(LI);
   4361   if (!Reg)
   4362     return false;
   4363 
   4364   MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
   4365   if (!MI)
   4366     return false;
   4367 
   4368   // Check if the correct load instruction has been emitted - SelectionDAG might
   4369   // have emitted a zero-extending load, but we need a sign-extending load.
   4370   bool IsZExt = isa<ZExtInst>(I);
   4371   const auto *LoadMI = MI;
   4372   if (LoadMI->getOpcode() == TargetOpcode::COPY &&
   4373       LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
   4374     unsigned LoadReg = MI->getOperand(1).getReg();
   4375     LoadMI = MRI.getUniqueVRegDef(LoadReg);
   4376     assert(LoadMI && "Expected valid instruction");
   4377   }
   4378   if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
   4379     return false;
   4380 
   4381   // Nothing to be done.
   4382   if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
   4383     updateValueMap(I, Reg);
   4384     return true;
   4385   }
   4386 
   4387   if (IsZExt) {
   4388     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
   4389     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   4390             TII.get(AArch64::SUBREG_TO_REG), Reg64)
   4391         .addImm(0)
   4392         .addReg(Reg, getKillRegState(true))
   4393         .addImm(AArch64::sub_32);
   4394     Reg = Reg64;
   4395   } else {
   4396     assert((MI->getOpcode() == TargetOpcode::COPY &&
   4397             MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
   4398            "Expected copy instruction");
   4399     Reg = MI->getOperand(1).getReg();
   4400     MI->eraseFromParent();
   4401   }
   4402   updateValueMap(I, Reg);
   4403   return true;
   4404 }
   4405 
   4406 bool AArch64FastISel::selectIntExt(const Instruction *I) {
   4407   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
   4408          "Unexpected integer extend instruction.");
   4409   MVT RetVT;
   4410   MVT SrcVT;
   4411   if (!isTypeSupported(I->getType(), RetVT))
   4412     return false;
   4413 
   4414   if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
   4415     return false;
   4416 
   4417   // Try to optimize already sign-/zero-extended values from load instructions.
   4418   if (optimizeIntExtLoad(I, RetVT, SrcVT))
   4419     return true;
   4420 
   4421   unsigned SrcReg = getRegForValue(I->getOperand(0));
   4422   if (!SrcReg)
   4423     return false;
   4424   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
   4425 
   4426   // Try to optimize already sign-/zero-extended values from function arguments.
   4427   bool IsZExt = isa<ZExtInst>(I);
   4428   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
   4429     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
   4430       if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
   4431         unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
   4432         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   4433                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
   4434             .addImm(0)
   4435             .addReg(SrcReg, getKillRegState(SrcIsKill))
   4436             .addImm(AArch64::sub_32);
   4437         SrcReg = ResultReg;
   4438       }
   4439       // Conservatively clear all kill flags from all uses, because we are
   4440       // replacing a sign-/zero-extend instruction at IR level with a nop at MI
   4441       // level. The result of the instruction at IR level might have been
   4442       // trivially dead, which is now not longer true.
   4443       unsigned UseReg = lookUpRegForValue(I);
   4444       if (UseReg)
   4445         MRI.clearKillFlags(UseReg);
   4446 
   4447       updateValueMap(I, SrcReg);
   4448       return true;
   4449     }
   4450   }
   4451 
   4452   unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
   4453   if (!ResultReg)
   4454     return false;
   4455 
   4456   updateValueMap(I, ResultReg);
   4457   return true;
   4458 }
   4459 
   4460 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
   4461   EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
   4462   if (!DestEVT.isSimple())
   4463     return false;
   4464 
   4465   MVT DestVT = DestEVT.getSimpleVT();
   4466   if (DestVT != MVT::i64 && DestVT != MVT::i32)
   4467     return false;
   4468 
   4469   unsigned DivOpc;
   4470   bool Is64bit = (DestVT == MVT::i64);
   4471   switch (ISDOpcode) {
   4472   default:
   4473     return false;
   4474   case ISD::SREM:
   4475     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
   4476     break;
   4477   case ISD::UREM:
   4478     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
   4479     break;
   4480   }
   4481   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
   4482   unsigned Src0Reg = getRegForValue(I->getOperand(0));
   4483   if (!Src0Reg)
   4484     return false;
   4485   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
   4486 
   4487   unsigned Src1Reg = getRegForValue(I->getOperand(1));
   4488   if (!Src1Reg)
   4489     return false;
   4490   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
   4491 
   4492   const TargetRegisterClass *RC =
   4493       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
   4494   unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
   4495                                      Src1Reg, /*IsKill=*/false);
   4496   assert(QuotReg && "Unexpected DIV instruction emission failure.");
   4497   // The remainder is computed as numerator - (quotient * denominator) using the
   4498   // MSUB instruction.
   4499   unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
   4500                                         Src1Reg, Src1IsKill, Src0Reg,
   4501                                         Src0IsKill);
   4502   updateValueMap(I, ResultReg);
   4503   return true;
   4504 }
   4505 
   4506 bool AArch64FastISel::selectMul(const Instruction *I) {
   4507   MVT VT;
   4508   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
   4509     return false;
   4510 
   4511   if (VT.isVector())
   4512     return selectBinaryOp(I, ISD::MUL);
   4513 
   4514   const Value *Src0 = I->getOperand(0);
   4515   const Value *Src1 = I->getOperand(1);
   4516   if (const auto *C = dyn_cast<ConstantInt>(Src0))
   4517     if (C->getValue().isPowerOf2())
   4518       std::swap(Src0, Src1);
   4519 
   4520   // Try to simplify to a shift instruction.
   4521   if (const auto *C = dyn_cast<ConstantInt>(Src1))
   4522     if (C->getValue().isPowerOf2()) {
   4523       uint64_t ShiftVal = C->getValue().logBase2();
   4524       MVT SrcVT = VT;
   4525       bool IsZExt = true;
   4526       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
   4527         if (!isIntExtFree(ZExt)) {
   4528           MVT VT;
   4529           if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
   4530             SrcVT = VT;
   4531             IsZExt = true;
   4532             Src0 = ZExt->getOperand(0);
   4533           }
   4534         }
   4535       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
   4536         if (!isIntExtFree(SExt)) {
   4537           MVT VT;
   4538           if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
   4539             SrcVT = VT;
   4540             IsZExt = false;
   4541             Src0 = SExt->getOperand(0);
   4542           }
   4543         }
   4544       }
   4545 
   4546       unsigned Src0Reg = getRegForValue(Src0);
   4547       if (!Src0Reg)
   4548         return false;
   4549       bool Src0IsKill = hasTrivialKill(Src0);
   4550 
   4551       unsigned ResultReg =
   4552           emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
   4553 
   4554       if (ResultReg) {
   4555         updateValueMap(I, ResultReg);
   4556         return true;
   4557       }
   4558     }
   4559 
   4560   unsigned Src0Reg = getRegForValue(I->getOperand(0));
   4561   if (!Src0Reg)
   4562     return false;
   4563   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
   4564 
   4565   unsigned Src1Reg = getRegForValue(I->getOperand(1));
   4566   if (!Src1Reg)
   4567     return false;
   4568   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
   4569 
   4570   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
   4571 
   4572   if (!ResultReg)
   4573     return false;
   4574 
   4575   updateValueMap(I, ResultReg);
   4576   return true;
   4577 }
   4578 
   4579 bool AArch64FastISel::selectShift(const Instruction *I) {
   4580   MVT RetVT;
   4581   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
   4582     return false;
   4583 
   4584   if (RetVT.isVector())
   4585     return selectOperator(I, I->getOpcode());
   4586 
   4587   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
   4588     unsigned ResultReg = 0;
   4589     uint64_t ShiftVal = C->getZExtValue();
   4590     MVT SrcVT = RetVT;
   4591     bool IsZExt = I->getOpcode() != Instruction::AShr;
   4592     const Value *Op0 = I->getOperand(0);
   4593     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
   4594       if (!isIntExtFree(ZExt)) {
   4595         MVT TmpVT;
   4596         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
   4597           SrcVT = TmpVT;
   4598           IsZExt = true;
   4599           Op0 = ZExt->getOperand(0);
   4600         }
   4601       }
   4602     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
   4603       if (!isIntExtFree(SExt)) {
   4604         MVT TmpVT;
   4605         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
   4606           SrcVT = TmpVT;
   4607           IsZExt = false;
   4608           Op0 = SExt->getOperand(0);
   4609         }
   4610       }
   4611     }
   4612 
   4613     unsigned Op0Reg = getRegForValue(Op0);
   4614     if (!Op0Reg)
   4615       return false;
   4616     bool Op0IsKill = hasTrivialKill(Op0);
   4617 
   4618     switch (I->getOpcode()) {
   4619     default: llvm_unreachable("Unexpected instruction.");
   4620     case Instruction::Shl:
   4621       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
   4622       break;
   4623     case Instruction::AShr:
   4624       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
   4625       break;
   4626     case Instruction::LShr:
   4627       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
   4628       break;
   4629     }
   4630     if (!ResultReg)
   4631       return false;
   4632 
   4633     updateValueMap(I, ResultReg);
   4634     return true;
   4635   }
   4636 
   4637   unsigned Op0Reg = getRegForValue(I->getOperand(0));
   4638   if (!Op0Reg)
   4639     return false;
   4640   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
   4641 
   4642   unsigned Op1Reg = getRegForValue(I->getOperand(1));
   4643   if (!Op1Reg)
   4644     return false;
   4645   bool Op1IsKill = hasTrivialKill(I->getOperand(1));
   4646 
   4647   unsigned ResultReg = 0;
   4648   switch (I->getOpcode()) {
   4649   default: llvm_unreachable("Unexpected instruction.");
   4650   case Instruction::Shl:
   4651     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
   4652     break;
   4653   case Instruction::AShr:
   4654     ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
   4655     break;
   4656   case Instruction::LShr:
   4657     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
   4658     break;
   4659   }
   4660 
   4661   if (!ResultReg)
   4662     return false;
   4663 
   4664   updateValueMap(I, ResultReg);
   4665   return true;
   4666 }
   4667 
   4668 bool AArch64FastISel::selectBitCast(const Instruction *I) {
   4669   MVT RetVT, SrcVT;
   4670 
   4671   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
   4672     return false;
   4673   if (!isTypeLegal(I->getType(), RetVT))
   4674     return false;
   4675 
   4676   unsigned Opc;
   4677   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
   4678     Opc = AArch64::FMOVWSr;
   4679   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
   4680     Opc = AArch64::FMOVXDr;
   4681   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
   4682     Opc = AArch64::FMOVSWr;
   4683   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
   4684     Opc = AArch64::FMOVDXr;
   4685   else
   4686     return false;
   4687 
   4688   const TargetRegisterClass *RC = nullptr;
   4689   switch (RetVT.SimpleTy) {
   4690   default: llvm_unreachable("Unexpected value type.");
   4691   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
   4692   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
   4693   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
   4694   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
   4695   }
   4696   unsigned Op0Reg = getRegForValue(I->getOperand(0));
   4697   if (!Op0Reg)
   4698     return false;
   4699   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
   4700   unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
   4701 
   4702   if (!ResultReg)
   4703     return false;
   4704 
   4705   updateValueMap(I, ResultReg);
   4706   return true;
   4707 }
   4708 
   4709 bool AArch64FastISel::selectFRem(const Instruction *I) {
   4710   MVT RetVT;
   4711   if (!isTypeLegal(I->getType(), RetVT))
   4712     return false;
   4713 
   4714   RTLIB::Libcall LC;
   4715   switch (RetVT.SimpleTy) {
   4716   default:
   4717     return false;
   4718   case MVT::f32:
   4719     LC = RTLIB::REM_F32;
   4720     break;
   4721   case MVT::f64:
   4722     LC = RTLIB::REM_F64;
   4723     break;
   4724   }
   4725 
   4726   ArgListTy Args;
   4727   Args.reserve(I->getNumOperands());
   4728 
   4729   // Populate the argument list.
   4730   for (auto &Arg : I->operands()) {
   4731     ArgListEntry Entry;
   4732     Entry.Val = Arg;
   4733     Entry.Ty = Arg->getType();
   4734     Args.push_back(Entry);
   4735   }
   4736 
   4737   CallLoweringInfo CLI;
   4738   MCContext &Ctx = MF->getContext();
   4739   CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
   4740                 TLI.getLibcallName(LC), std::move(Args));
   4741   if (!lowerCallTo(CLI))
   4742     return false;
   4743   updateValueMap(I, CLI.ResultReg);
   4744   return true;
   4745 }
   4746 
   4747 bool AArch64FastISel::selectSDiv(const Instruction *I) {
   4748   MVT VT;
   4749   if (!isTypeLegal(I->getType(), VT))
   4750     return false;
   4751 
   4752   if (!isa<ConstantInt>(I->getOperand(1)))
   4753     return selectBinaryOp(I, ISD::SDIV);
   4754 
   4755   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
   4756   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
   4757       !(C.isPowerOf2() || (-C).isPowerOf2()))
   4758     return selectBinaryOp(I, ISD::SDIV);
   4759 
   4760   unsigned Lg2 = C.countTrailingZeros();
   4761   unsigned Src0Reg = getRegForValue(I->getOperand(0));
   4762   if (!Src0Reg)
   4763     return false;
   4764   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
   4765 
   4766   if (cast<BinaryOperator>(I)->isExact()) {
   4767     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
   4768     if (!ResultReg)
   4769       return false;
   4770     updateValueMap(I, ResultReg);
   4771     return true;
   4772   }
   4773 
   4774   int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
   4775   unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
   4776   if (!AddReg)
   4777     return false;
   4778 
   4779   // (Src0 < 0) ? Pow2 - 1 : 0;
   4780   if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
   4781     return false;
   4782 
   4783   unsigned SelectOpc;
   4784   const TargetRegisterClass *RC;
   4785   if (VT == MVT::i64) {
   4786     SelectOpc = AArch64::CSELXr;
   4787     RC = &AArch64::GPR64RegClass;
   4788   } else {
   4789     SelectOpc = AArch64::CSELWr;
   4790     RC = &AArch64::GPR32RegClass;
   4791   }
   4792   unsigned SelectReg =
   4793       fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
   4794                        Src0IsKill, AArch64CC::LT);
   4795   if (!SelectReg)
   4796     return false;
   4797 
   4798   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
   4799   // negate the result.
   4800   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
   4801   unsigned ResultReg;
   4802   if (C.isNegative())
   4803     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
   4804                               SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
   4805   else
   4806     ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
   4807 
   4808   if (!ResultReg)
   4809     return false;
   4810 
   4811   updateValueMap(I, ResultReg);
   4812   return true;
   4813 }
   4814 
   4815 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
   4816 /// have to duplicate it for AArch64, because otherwise we would fail during the
   4817 /// sign-extend emission.
   4818 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
   4819   unsigned IdxN = getRegForValue(Idx);
   4820   if (IdxN == 0)
   4821     // Unhandled operand. Halt "fast" selection and bail.
   4822     return std::pair<unsigned, bool>(0, false);
   4823 
   4824   bool IdxNIsKill = hasTrivialKill(Idx);
   4825 
   4826   // If the index is smaller or larger than intptr_t, truncate or extend it.
   4827   MVT PtrVT = TLI.getPointerTy(DL);
   4828   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
   4829   if (IdxVT.bitsLT(PtrVT)) {
   4830     IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
   4831     IdxNIsKill = true;
   4832   } else if (IdxVT.bitsGT(PtrVT))
   4833     llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
   4834   return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
   4835 }
   4836 
   4837 /// This is mostly a copy of the existing FastISel GEP code, but we have to
   4838 /// duplicate it for AArch64, because otherwise we would bail out even for
   4839 /// simple cases. This is because the standard fastEmit functions don't cover
   4840 /// MUL at all and ADD is lowered very inefficientily.
   4841 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
   4842   unsigned N = getRegForValue(I->getOperand(0));
   4843   if (!N)
   4844     return false;
   4845   bool NIsKill = hasTrivialKill(I->getOperand(0));
   4846 
   4847   // Keep a running tab of the total offset to coalesce multiple N = N + Offset
   4848   // into a single N = N + TotalOffset.
   4849   uint64_t TotalOffs = 0;
   4850   MVT VT = TLI.getPointerTy(DL);
   4851   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
   4852        GTI != E; ++GTI) {
   4853     const Value *Idx = GTI.getOperand();
   4854     if (auto *StTy = dyn_cast<StructType>(*GTI)) {
   4855       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
   4856       // N = N + Offset
   4857       if (Field)
   4858         TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
   4859     } else {
   4860       Type *Ty = GTI.getIndexedType();
   4861 
   4862       // If this is a constant subscript, handle it quickly.
   4863       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
   4864         if (CI->isZero())
   4865           continue;
   4866         // N = N + Offset
   4867         TotalOffs +=
   4868             DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
   4869         continue;
   4870       }
   4871       if (TotalOffs) {
   4872         N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
   4873         if (!N)
   4874           return false;
   4875         NIsKill = true;
   4876         TotalOffs = 0;
   4877       }
   4878 
   4879       // N = N + Idx * ElementSize;
   4880       uint64_t ElementSize = DL.getTypeAllocSize(Ty);
   4881       std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
   4882       unsigned IdxN = Pair.first;
   4883       bool IdxNIsKill = Pair.second;
   4884       if (!IdxN)
   4885         return false;
   4886 
   4887       if (ElementSize != 1) {
   4888         unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
   4889         if (!C)
   4890           return false;
   4891         IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
   4892         if (!IdxN)
   4893           return false;
   4894         IdxNIsKill = true;
   4895       }
   4896       N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
   4897       if (!N)
   4898         return false;
   4899     }
   4900   }
   4901   if (TotalOffs) {
   4902     N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
   4903     if (!N)
   4904       return false;
   4905   }
   4906   updateValueMap(I, N);
   4907   return true;
   4908 }
   4909 
   4910 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
   4911   switch (I->getOpcode()) {
   4912   default:
   4913     break;
   4914   case Instruction::Add:
   4915   case Instruction::Sub:
   4916     return selectAddSub(I);
   4917   case Instruction::Mul:
   4918     return selectMul(I);
   4919   case Instruction::SDiv:
   4920     return selectSDiv(I);
   4921   case Instruction::SRem:
   4922     if (!selectBinaryOp(I, ISD::SREM))
   4923       return selectRem(I, ISD::SREM);
   4924     return true;
   4925   case Instruction::URem:
   4926     if (!selectBinaryOp(I, ISD::UREM))
   4927       return selectRem(I, ISD::UREM);
   4928     return true;
   4929   case Instruction::Shl:
   4930   case Instruction::LShr:
   4931   case Instruction::AShr:
   4932     return selectShift(I);
   4933   case Instruction::And:
   4934   case Instruction::Or:
   4935   case Instruction::Xor:
   4936     return selectLogicalOp(I);
   4937   case Instruction::Br:
   4938     return selectBranch(I);
   4939   case Instruction::IndirectBr:
   4940     return selectIndirectBr(I);
   4941   case Instruction::BitCast:
   4942     if (!FastISel::selectBitCast(I))
   4943       return selectBitCast(I);
   4944     return true;
   4945   case Instruction::FPToSI:
   4946     if (!selectCast(I, ISD::FP_TO_SINT))
   4947       return selectFPToInt(I, /*Signed=*/true);
   4948     return true;
   4949   case Instruction::FPToUI:
   4950     return selectFPToInt(I, /*Signed=*/false);
   4951   case Instruction::ZExt:
   4952   case Instruction::SExt:
   4953     return selectIntExt(I);
   4954   case Instruction::Trunc:
   4955     if (!selectCast(I, ISD::TRUNCATE))
   4956       return selectTrunc(I);
   4957     return true;
   4958   case Instruction::FPExt:
   4959     return selectFPExt(I);
   4960   case Instruction::FPTrunc:
   4961     return selectFPTrunc(I);
   4962   case Instruction::SIToFP:
   4963     if (!selectCast(I, ISD::SINT_TO_FP))
   4964       return selectIntToFP(I, /*Signed=*/true);
   4965     return true;
   4966   case Instruction::UIToFP:
   4967     return selectIntToFP(I, /*Signed=*/false);
   4968   case Instruction::Load:
   4969     return selectLoad(I);
   4970   case Instruction::Store:
   4971     return selectStore(I);
   4972   case Instruction::FCmp:
   4973   case Instruction::ICmp:
   4974     return selectCmp(I);
   4975   case Instruction::Select:
   4976     return selectSelect(I);
   4977   case Instruction::Ret:
   4978     return selectRet(I);
   4979   case Instruction::FRem:
   4980     return selectFRem(I);
   4981   case Instruction::GetElementPtr:
   4982     return selectGetElementPtr(I);
   4983   }
   4984 
   4985   // fall-back to target-independent instruction selection.
   4986   return selectOperator(I, I->getOpcode());
   4987   // Silence warnings.
   4988   (void)&CC_AArch64_DarwinPCS_VarArg;
   4989 }
   4990 
   4991 namespace llvm {
   4992 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
   4993                                         const TargetLibraryInfo *LibInfo) {
   4994   return new AArch64FastISel(FuncInfo, LibInfo);
   4995 }
   4996 }
   4997