Home | History | Annotate | Download | only in AArch64
      1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the AArch64-specific support for the FastISel class. Some
     11 // of the target-specific code is generated by tablegen in the file
     12 // AArch64GenFastISel.inc, which is #included here.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "AArch64.h"
     17 #include "AArch64TargetMachine.h"
     18 #include "AArch64Subtarget.h"
     19 #include "MCTargetDesc/AArch64AddressingModes.h"
     20 #include "llvm/CodeGen/CallingConvLower.h"
     21 #include "llvm/CodeGen/FastISel.h"
     22 #include "llvm/CodeGen/FunctionLoweringInfo.h"
     23 #include "llvm/CodeGen/MachineConstantPool.h"
     24 #include "llvm/CodeGen/MachineFrameInfo.h"
     25 #include "llvm/CodeGen/MachineInstrBuilder.h"
     26 #include "llvm/CodeGen/MachineRegisterInfo.h"
     27 #include "llvm/IR/CallingConv.h"
     28 #include "llvm/IR/DataLayout.h"
     29 #include "llvm/IR/DerivedTypes.h"
     30 #include "llvm/IR/Function.h"
     31 #include "llvm/IR/GetElementPtrTypeIterator.h"
     32 #include "llvm/IR/GlobalAlias.h"
     33 #include "llvm/IR/GlobalVariable.h"
     34 #include "llvm/IR/Instructions.h"
     35 #include "llvm/IR/IntrinsicInst.h"
     36 #include "llvm/IR/Operator.h"
     37 #include "llvm/Support/CommandLine.h"
     38 using namespace llvm;
     39 
     40 namespace {
     41 
     42 class AArch64FastISel : public FastISel {
     43 
     44   class Address {
     45   public:
     46     typedef enum {
     47       RegBase,
     48       FrameIndexBase
     49     } BaseKind;
     50 
     51   private:
     52     BaseKind Kind;
     53     union {
     54       unsigned Reg;
     55       int FI;
     56     } Base;
     57     int64_t Offset;
     58 
     59   public:
     60     Address() : Kind(RegBase), Offset(0) { Base.Reg = 0; }
     61     void setKind(BaseKind K) { Kind = K; }
     62     BaseKind getKind() const { return Kind; }
     63     bool isRegBase() const { return Kind == RegBase; }
     64     bool isFIBase() const { return Kind == FrameIndexBase; }
     65     void setReg(unsigned Reg) {
     66       assert(isRegBase() && "Invalid base register access!");
     67       Base.Reg = Reg;
     68     }
     69     unsigned getReg() const {
     70       assert(isRegBase() && "Invalid base register access!");
     71       return Base.Reg;
     72     }
     73     void setFI(unsigned FI) {
     74       assert(isFIBase() && "Invalid base frame index  access!");
     75       Base.FI = FI;
     76     }
     77     unsigned getFI() const {
     78       assert(isFIBase() && "Invalid base frame index access!");
     79       return Base.FI;
     80     }
     81     void setOffset(int64_t O) { Offset = O; }
     82     int64_t getOffset() { return Offset; }
     83 
     84     bool isValid() { return isFIBase() || (isRegBase() && getReg() != 0); }
     85   };
     86 
     87   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
     88   /// make the right decision when generating code for different targets.
     89   const AArch64Subtarget *Subtarget;
     90   LLVMContext *Context;
     91 
     92 private:
     93   // Selection routines.
     94   bool SelectLoad(const Instruction *I);
     95   bool SelectStore(const Instruction *I);
     96   bool SelectBranch(const Instruction *I);
     97   bool SelectIndirectBr(const Instruction *I);
     98   bool SelectCmp(const Instruction *I);
     99   bool SelectSelect(const Instruction *I);
    100   bool SelectFPExt(const Instruction *I);
    101   bool SelectFPTrunc(const Instruction *I);
    102   bool SelectFPToInt(const Instruction *I, bool Signed);
    103   bool SelectIntToFP(const Instruction *I, bool Signed);
    104   bool SelectRem(const Instruction *I, unsigned ISDOpcode);
    105   bool SelectCall(const Instruction *I, const char *IntrMemName);
    106   bool SelectIntrinsicCall(const IntrinsicInst &I);
    107   bool SelectRet(const Instruction *I);
    108   bool SelectTrunc(const Instruction *I);
    109   bool SelectIntExt(const Instruction *I);
    110   bool SelectMul(const Instruction *I);
    111 
    112   // Utility helper routines.
    113   bool isTypeLegal(Type *Ty, MVT &VT);
    114   bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
    115   bool ComputeAddress(const Value *Obj, Address &Addr);
    116   bool SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor,
    117                        bool UseUnscaled);
    118   void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
    119                             unsigned Flags, bool UseUnscaled);
    120   bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
    121   bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
    122                           unsigned Alignment);
    123   // Emit functions.
    124   bool EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt);
    125   bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
    126                 bool UseUnscaled = false);
    127   bool EmitStore(MVT VT, unsigned SrcReg, Address Addr,
    128                  bool UseUnscaled = false);
    129   unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
    130   unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
    131 
    132   unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT);
    133   unsigned AArch64MaterializeGV(const GlobalValue *GV);
    134 
    135   // Call handling routines.
    136 private:
    137   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
    138   bool ProcessCallArgs(SmallVectorImpl<Value *> &Args,
    139                        SmallVectorImpl<unsigned> &ArgRegs,
    140                        SmallVectorImpl<MVT> &ArgVTs,
    141                        SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
    142                        SmallVectorImpl<unsigned> &RegArgs, CallingConv::ID CC,
    143                        unsigned &NumBytes);
    144   bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
    145                   const Instruction *I, CallingConv::ID CC, unsigned &NumBytes);
    146 
    147 public:
    148   // Backend specific FastISel code.
    149   unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
    150   unsigned TargetMaterializeConstant(const Constant *C) override;
    151 
    152   explicit AArch64FastISel(FunctionLoweringInfo &funcInfo,
    153                          const TargetLibraryInfo *libInfo)
    154       : FastISel(funcInfo, libInfo) {
    155     Subtarget = &TM.getSubtarget<AArch64Subtarget>();
    156     Context = &funcInfo.Fn->getContext();
    157   }
    158 
    159   bool TargetSelectInstruction(const Instruction *I) override;
    160 
    161 #include "AArch64GenFastISel.inc"
    162 };
    163 
    164 } // end anonymous namespace
    165 
    166 #include "AArch64GenCallingConv.inc"
    167 
    168 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
    169   if (CC == CallingConv::WebKit_JS)
    170     return CC_AArch64_WebKit_JS;
    171   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
    172 }
    173 
    174 unsigned AArch64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
    175   assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
    176          "Alloca should always return a pointer.");
    177 
    178   // Don't handle dynamic allocas.
    179   if (!FuncInfo.StaticAllocaMap.count(AI))
    180     return 0;
    181 
    182   DenseMap<const AllocaInst *, int>::iterator SI =
    183       FuncInfo.StaticAllocaMap.find(AI);
    184 
    185   if (SI != FuncInfo.StaticAllocaMap.end()) {
    186     unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
    187     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
    188             ResultReg)
    189         .addFrameIndex(SI->second)
    190         .addImm(0)
    191         .addImm(0);
    192     return ResultReg;
    193   }
    194 
    195   return 0;
    196 }
    197 
    198 unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
    199   if (VT != MVT::f32 && VT != MVT::f64)
    200     return 0;
    201 
    202   const APFloat Val = CFP->getValueAPF();
    203   bool is64bit = (VT == MVT::f64);
    204 
    205   // This checks to see if we can use FMOV instructions to materialize
    206   // a constant, otherwise we have to materialize via the constant pool.
    207   if (TLI.isFPImmLegal(Val, VT)) {
    208     int Imm;
    209     unsigned Opc;
    210     if (is64bit) {
    211       Imm = AArch64_AM::getFP64Imm(Val);
    212       Opc = AArch64::FMOVDi;
    213     } else {
    214       Imm = AArch64_AM::getFP32Imm(Val);
    215       Opc = AArch64::FMOVSi;
    216     }
    217     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
    218     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
    219         .addImm(Imm);
    220     return ResultReg;
    221   }
    222 
    223   // Materialize via constant pool.  MachineConstantPool wants an explicit
    224   // alignment.
    225   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
    226   if (Align == 0)
    227     Align = DL.getTypeAllocSize(CFP->getType());
    228 
    229   unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
    230   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
    231   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
    232           ADRPReg).addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGE);
    233 
    234   unsigned Opc = is64bit ? AArch64::LDRDui : AArch64::LDRSui;
    235   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
    236   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
    237       .addReg(ADRPReg)
    238       .addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
    239   return ResultReg;
    240 }
    241 
    242 unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) {
    243   // We can't handle thread-local variables quickly yet.
    244   if (GV->isThreadLocal())
    245     return 0;
    246 
    247   // MachO still uses GOT for large code-model accesses, but ELF requires
    248   // movz/movk sequences, which FastISel doesn't handle yet.
    249   if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
    250     return 0;
    251 
    252   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
    253 
    254   EVT DestEVT = TLI.getValueType(GV->getType(), true);
    255   if (!DestEVT.isSimple())
    256     return 0;
    257 
    258   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
    259   unsigned ResultReg;
    260 
    261   if (OpFlags & AArch64II::MO_GOT) {
    262     // ADRP + LDRX
    263     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
    264             ADRPReg)
    265         .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
    266 
    267     ResultReg = createResultReg(&AArch64::GPR64RegClass);
    268     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
    269             ResultReg)
    270         .addReg(ADRPReg)
    271         .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
    272                           AArch64II::MO_NC);
    273   } else {
    274     // ADRP + ADDX
    275     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
    276             ADRPReg).addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
    277 
    278     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
    279     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
    280             ResultReg)
    281         .addReg(ADRPReg)
    282         .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
    283         .addImm(0);
    284   }
    285   return ResultReg;
    286 }
    287 
    288 unsigned AArch64FastISel::TargetMaterializeConstant(const Constant *C) {
    289   EVT CEVT = TLI.getValueType(C->getType(), true);
    290 
    291   // Only handle simple types.
    292   if (!CEVT.isSimple())
    293     return 0;
    294   MVT VT = CEVT.getSimpleVT();
    295 
    296   // FIXME: Handle ConstantInt.
    297   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
    298     return AArch64MaterializeFP(CFP, VT);
    299   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
    300     return AArch64MaterializeGV(GV);
    301 
    302   return 0;
    303 }
    304 
    305 // Computes the address to get to an object.
    306 bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
    307   const User *U = nullptr;
    308   unsigned Opcode = Instruction::UserOp1;
    309   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
    310     // Don't walk into other basic blocks unless the object is an alloca from
    311     // another block, otherwise it may not have a virtual register assigned.
    312     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
    313         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
    314       Opcode = I->getOpcode();
    315       U = I;
    316     }
    317   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
    318     Opcode = C->getOpcode();
    319     U = C;
    320   }
    321 
    322   if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
    323     if (Ty->getAddressSpace() > 255)
    324       // Fast instruction selection doesn't support the special
    325       // address spaces.
    326       return false;
    327 
    328   switch (Opcode) {
    329   default:
    330     break;
    331   case Instruction::BitCast: {
    332     // Look through bitcasts.
    333     return ComputeAddress(U->getOperand(0), Addr);
    334   }
    335   case Instruction::IntToPtr: {
    336     // Look past no-op inttoptrs.
    337     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
    338       return ComputeAddress(U->getOperand(0), Addr);
    339     break;
    340   }
    341   case Instruction::PtrToInt: {
    342     // Look past no-op ptrtoints.
    343     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
    344       return ComputeAddress(U->getOperand(0), Addr);
    345     break;
    346   }
    347   case Instruction::GetElementPtr: {
    348     Address SavedAddr = Addr;
    349     uint64_t TmpOffset = Addr.getOffset();
    350 
    351     // Iterate through the GEP folding the constants into offsets where
    352     // we can.
    353     gep_type_iterator GTI = gep_type_begin(U);
    354     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
    355          ++i, ++GTI) {
    356       const Value *Op = *i;
    357       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
    358         const StructLayout *SL = DL.getStructLayout(STy);
    359         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
    360         TmpOffset += SL->getElementOffset(Idx);
    361       } else {
    362         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
    363         for (;;) {
    364           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
    365             // Constant-offset addressing.
    366             TmpOffset += CI->getSExtValue() * S;
    367             break;
    368           }
    369           if (canFoldAddIntoGEP(U, Op)) {
    370             // A compatible add with a constant operand. Fold the constant.
    371             ConstantInt *CI =
    372                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
    373             TmpOffset += CI->getSExtValue() * S;
    374             // Iterate on the other operand.
    375             Op = cast<AddOperator>(Op)->getOperand(0);
    376             continue;
    377           }
    378           // Unsupported
    379           goto unsupported_gep;
    380         }
    381       }
    382     }
    383 
    384     // Try to grab the base operand now.
    385     Addr.setOffset(TmpOffset);
    386     if (ComputeAddress(U->getOperand(0), Addr))
    387       return true;
    388 
    389     // We failed, restore everything and try the other options.
    390     Addr = SavedAddr;
    391 
    392   unsupported_gep:
    393     break;
    394   }
    395   case Instruction::Alloca: {
    396     const AllocaInst *AI = cast<AllocaInst>(Obj);
    397     DenseMap<const AllocaInst *, int>::iterator SI =
    398         FuncInfo.StaticAllocaMap.find(AI);
    399     if (SI != FuncInfo.StaticAllocaMap.end()) {
    400       Addr.setKind(Address::FrameIndexBase);
    401       Addr.setFI(SI->second);
    402       return true;
    403     }
    404     break;
    405   }
    406   }
    407 
    408   // Try to get this in a register if nothing else has worked.
    409   if (!Addr.isValid())
    410     Addr.setReg(getRegForValue(Obj));
    411   return Addr.isValid();
    412 }
    413 
    414 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
    415   EVT evt = TLI.getValueType(Ty, true);
    416 
    417   // Only handle simple types.
    418   if (evt == MVT::Other || !evt.isSimple())
    419     return false;
    420   VT = evt.getSimpleVT();
    421 
    422   // This is a legal type, but it's not something we handle in fast-isel.
    423   if (VT == MVT::f128)
    424     return false;
    425 
    426   // Handle all other legal types, i.e. a register that will directly hold this
    427   // value.
    428   return TLI.isTypeLegal(VT);
    429 }
    430 
    431 bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
    432   if (isTypeLegal(Ty, VT))
    433     return true;
    434 
    435   // If this is a type than can be sign or zero-extended to a basic operation
    436   // go ahead and accept it now. For stores, this reflects truncation.
    437   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
    438     return true;
    439 
    440   return false;
    441 }
    442 
    443 bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT,
    444                                       int64_t ScaleFactor, bool UseUnscaled) {
    445   bool needsLowering = false;
    446   int64_t Offset = Addr.getOffset();
    447   switch (VT.SimpleTy) {
    448   default:
    449     return false;
    450   case MVT::i1:
    451   case MVT::i8:
    452   case MVT::i16:
    453   case MVT::i32:
    454   case MVT::i64:
    455   case MVT::f32:
    456   case MVT::f64:
    457     if (!UseUnscaled)
    458       // Using scaled, 12-bit, unsigned immediate offsets.
    459       needsLowering = ((Offset & 0xfff) != Offset);
    460     else
    461       // Using unscaled, 9-bit, signed immediate offsets.
    462       needsLowering = (Offset > 256 || Offset < -256);
    463     break;
    464   }
    465 
    466   //If this is a stack pointer and the offset needs to be simplified then put
    467   // the alloca address into a register, set the base type back to register and
    468   // continue. This should almost never happen.
    469   if (needsLowering && Addr.getKind() == Address::FrameIndexBase) {
    470     unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
    471     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
    472             ResultReg)
    473         .addFrameIndex(Addr.getFI())
    474         .addImm(0)
    475         .addImm(0);
    476     Addr.setKind(Address::RegBase);
    477     Addr.setReg(ResultReg);
    478   }
    479 
    480   // Since the offset is too large for the load/store instruction get the
    481   // reg+offset into a register.
    482   if (needsLowering) {
    483     uint64_t UnscaledOffset = Addr.getOffset() * ScaleFactor;
    484     unsigned ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(), false,
    485                                       UnscaledOffset, MVT::i64);
    486     if (ResultReg == 0)
    487       return false;
    488     Addr.setReg(ResultReg);
    489     Addr.setOffset(0);
    490   }
    491   return true;
    492 }
    493 
    494 void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
    495                                            const MachineInstrBuilder &MIB,
    496                                            unsigned Flags, bool UseUnscaled) {
    497   int64_t Offset = Addr.getOffset();
    498   // Frame base works a bit differently. Handle it separately.
    499   if (Addr.getKind() == Address::FrameIndexBase) {
    500     int FI = Addr.getFI();
    501     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
    502     // and alignment should be based on the VT.
    503     MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
    504         MachinePointerInfo::getFixedStack(FI, Offset), Flags,
    505         MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
    506     // Now add the rest of the operands.
    507     MIB.addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
    508   } else {
    509     // Now add the rest of the operands.
    510     MIB.addReg(Addr.getReg());
    511     MIB.addImm(Offset);
    512   }
    513 }
    514 
    515 bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
    516                                bool UseUnscaled) {
    517   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
    518   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
    519   if (!UseUnscaled && Addr.getOffset() < 0)
    520     UseUnscaled = true;
    521 
    522   unsigned Opc;
    523   const TargetRegisterClass *RC;
    524   bool VTIsi1 = false;
    525   int64_t ScaleFactor = 0;
    526   switch (VT.SimpleTy) {
    527   default:
    528     return false;
    529   case MVT::i1:
    530     VTIsi1 = true;
    531   // Intentional fall-through.
    532   case MVT::i8:
    533     Opc = UseUnscaled ? AArch64::LDURBBi : AArch64::LDRBBui;
    534     RC = &AArch64::GPR32RegClass;
    535     ScaleFactor = 1;
    536     break;
    537   case MVT::i16:
    538     Opc = UseUnscaled ? AArch64::LDURHHi : AArch64::LDRHHui;
    539     RC = &AArch64::GPR32RegClass;
    540     ScaleFactor = 2;
    541     break;
    542   case MVT::i32:
    543     Opc = UseUnscaled ? AArch64::LDURWi : AArch64::LDRWui;
    544     RC = &AArch64::GPR32RegClass;
    545     ScaleFactor = 4;
    546     break;
    547   case MVT::i64:
    548     Opc = UseUnscaled ? AArch64::LDURXi : AArch64::LDRXui;
    549     RC = &AArch64::GPR64RegClass;
    550     ScaleFactor = 8;
    551     break;
    552   case MVT::f32:
    553     Opc = UseUnscaled ? AArch64::LDURSi : AArch64::LDRSui;
    554     RC = TLI.getRegClassFor(VT);
    555     ScaleFactor = 4;
    556     break;
    557   case MVT::f64:
    558     Opc = UseUnscaled ? AArch64::LDURDi : AArch64::LDRDui;
    559     RC = TLI.getRegClassFor(VT);
    560     ScaleFactor = 8;
    561     break;
    562   }
    563   // Scale the offset.
    564   if (!UseUnscaled) {
    565     int64_t Offset = Addr.getOffset();
    566     if (Offset & (ScaleFactor - 1))
    567       // Retry using an unscaled, 9-bit, signed immediate offset.
    568       return EmitLoad(VT, ResultReg, Addr, /*UseUnscaled*/ true);
    569 
    570     Addr.setOffset(Offset / ScaleFactor);
    571   }
    572 
    573   // Simplify this down to something we can handle.
    574   if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
    575     return false;
    576 
    577   // Create the base instruction, then add the operands.
    578   ResultReg = createResultReg(RC);
    579   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    580                                     TII.get(Opc), ResultReg);
    581   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, UseUnscaled);
    582 
    583   // Loading an i1 requires special handling.
    584   if (VTIsi1) {
    585     MRI.constrainRegClass(ResultReg, &AArch64::GPR32RegClass);
    586     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
    587     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
    588             ANDReg)
    589         .addReg(ResultReg)
    590         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
    591     ResultReg = ANDReg;
    592   }
    593   return true;
    594 }
    595 
    596 bool AArch64FastISel::SelectLoad(const Instruction *I) {
    597   MVT VT;
    598   // Verify we have a legal type before going any further.  Currently, we handle
    599   // simple types that will directly fit in a register (i32/f32/i64/f64) or
    600   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
    601   if (!isLoadStoreTypeLegal(I->getType(), VT) || cast<LoadInst>(I)->isAtomic())
    602     return false;
    603 
    604   // See if we can handle this address.
    605   Address Addr;
    606   if (!ComputeAddress(I->getOperand(0), Addr))
    607     return false;
    608 
    609   unsigned ResultReg;
    610   if (!EmitLoad(VT, ResultReg, Addr))
    611     return false;
    612 
    613   UpdateValueMap(I, ResultReg);
    614   return true;
    615 }
    616 
    617 bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
    618                                 bool UseUnscaled) {
    619   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
    620   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
    621   if (!UseUnscaled && Addr.getOffset() < 0)
    622     UseUnscaled = true;
    623 
    624   unsigned StrOpc;
    625   bool VTIsi1 = false;
    626   int64_t ScaleFactor = 0;
    627   // Using scaled, 12-bit, unsigned immediate offsets.
    628   switch (VT.SimpleTy) {
    629   default:
    630     return false;
    631   case MVT::i1:
    632     VTIsi1 = true;
    633   case MVT::i8:
    634     StrOpc = UseUnscaled ? AArch64::STURBBi : AArch64::STRBBui;
    635     ScaleFactor = 1;
    636     break;
    637   case MVT::i16:
    638     StrOpc = UseUnscaled ? AArch64::STURHHi : AArch64::STRHHui;
    639     ScaleFactor = 2;
    640     break;
    641   case MVT::i32:
    642     StrOpc = UseUnscaled ? AArch64::STURWi : AArch64::STRWui;
    643     ScaleFactor = 4;
    644     break;
    645   case MVT::i64:
    646     StrOpc = UseUnscaled ? AArch64::STURXi : AArch64::STRXui;
    647     ScaleFactor = 8;
    648     break;
    649   case MVT::f32:
    650     StrOpc = UseUnscaled ? AArch64::STURSi : AArch64::STRSui;
    651     ScaleFactor = 4;
    652     break;
    653   case MVT::f64:
    654     StrOpc = UseUnscaled ? AArch64::STURDi : AArch64::STRDui;
    655     ScaleFactor = 8;
    656     break;
    657   }
    658   // Scale the offset.
    659   if (!UseUnscaled) {
    660     int64_t Offset = Addr.getOffset();
    661     if (Offset & (ScaleFactor - 1))
    662       // Retry using an unscaled, 9-bit, signed immediate offset.
    663       return EmitStore(VT, SrcReg, Addr, /*UseUnscaled*/ true);
    664 
    665     Addr.setOffset(Offset / ScaleFactor);
    666   }
    667 
    668   // Simplify this down to something we can handle.
    669   if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
    670     return false;
    671 
    672   // Storing an i1 requires special handling.
    673   if (VTIsi1) {
    674     MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
    675     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
    676     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
    677             ANDReg)
    678         .addReg(SrcReg)
    679         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
    680     SrcReg = ANDReg;
    681   }
    682   // Create the base instruction, then add the operands.
    683   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    684                                     TII.get(StrOpc)).addReg(SrcReg);
    685   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, UseUnscaled);
    686   return true;
    687 }
    688 
    689 bool AArch64FastISel::SelectStore(const Instruction *I) {
    690   MVT VT;
    691   Value *Op0 = I->getOperand(0);
    692   // Verify we have a legal type before going any further.  Currently, we handle
    693   // simple types that will directly fit in a register (i32/f32/i64/f64) or
    694   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
    695   if (!isLoadStoreTypeLegal(Op0->getType(), VT) ||
    696       cast<StoreInst>(I)->isAtomic())
    697     return false;
    698 
    699   // Get the value to be stored into a register.
    700   unsigned SrcReg = getRegForValue(Op0);
    701   if (SrcReg == 0)
    702     return false;
    703 
    704   // See if we can handle this address.
    705   Address Addr;
    706   if (!ComputeAddress(I->getOperand(1), Addr))
    707     return false;
    708 
    709   if (!EmitStore(VT, SrcReg, Addr))
    710     return false;
    711   return true;
    712 }
    713 
    714 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
    715   switch (Pred) {
    716   case CmpInst::FCMP_ONE:
    717   case CmpInst::FCMP_UEQ:
    718   default:
    719     // AL is our "false" for now. The other two need more compares.
    720     return AArch64CC::AL;
    721   case CmpInst::ICMP_EQ:
    722   case CmpInst::FCMP_OEQ:
    723     return AArch64CC::EQ;
    724   case CmpInst::ICMP_SGT:
    725   case CmpInst::FCMP_OGT:
    726     return AArch64CC::GT;
    727   case CmpInst::ICMP_SGE:
    728   case CmpInst::FCMP_OGE:
    729     return AArch64CC::GE;
    730   case CmpInst::ICMP_UGT:
    731   case CmpInst::FCMP_UGT:
    732     return AArch64CC::HI;
    733   case CmpInst::FCMP_OLT:
    734     return AArch64CC::MI;
    735   case CmpInst::ICMP_ULE:
    736   case CmpInst::FCMP_OLE:
    737     return AArch64CC::LS;
    738   case CmpInst::FCMP_ORD:
    739     return AArch64CC::VC;
    740   case CmpInst::FCMP_UNO:
    741     return AArch64CC::VS;
    742   case CmpInst::FCMP_UGE:
    743     return AArch64CC::PL;
    744   case CmpInst::ICMP_SLT:
    745   case CmpInst::FCMP_ULT:
    746     return AArch64CC::LT;
    747   case CmpInst::ICMP_SLE:
    748   case CmpInst::FCMP_ULE:
    749     return AArch64CC::LE;
    750   case CmpInst::FCMP_UNE:
    751   case CmpInst::ICMP_NE:
    752     return AArch64CC::NE;
    753   case CmpInst::ICMP_UGE:
    754     return AArch64CC::HS;
    755   case CmpInst::ICMP_ULT:
    756     return AArch64CC::LO;
    757   }
    758 }
    759 
    760 bool AArch64FastISel::SelectBranch(const Instruction *I) {
    761   const BranchInst *BI = cast<BranchInst>(I);
    762   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
    763   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
    764 
    765   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
    766     if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
    767       // We may not handle every CC for now.
    768       AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
    769       if (CC == AArch64CC::AL)
    770         return false;
    771 
    772       // Emit the cmp.
    773       if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
    774         return false;
    775 
    776       // Emit the branch.
    777       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
    778           .addImm(CC)
    779           .addMBB(TBB);
    780       FuncInfo.MBB->addSuccessor(TBB);
    781 
    782       FastEmitBranch(FBB, DbgLoc);
    783       return true;
    784     }
    785   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
    786     MVT SrcVT;
    787     if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
    788         (isLoadStoreTypeLegal(TI->getOperand(0)->getType(), SrcVT))) {
    789       unsigned CondReg = getRegForValue(TI->getOperand(0));
    790       if (CondReg == 0)
    791         return false;
    792 
    793       // Issue an extract_subreg to get the lower 32-bits.
    794       if (SrcVT == MVT::i64)
    795         CondReg = FastEmitInst_extractsubreg(MVT::i32, CondReg, /*Kill=*/true,
    796                                              AArch64::sub_32);
    797 
    798       MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
    799       unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
    800       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    801               TII.get(AArch64::ANDWri), ANDReg)
    802           .addReg(CondReg)
    803           .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
    804       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    805               TII.get(AArch64::SUBSWri))
    806           .addReg(ANDReg)
    807           .addReg(ANDReg)
    808           .addImm(0)
    809           .addImm(0);
    810 
    811       unsigned CC = AArch64CC::NE;
    812       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
    813         std::swap(TBB, FBB);
    814         CC = AArch64CC::EQ;
    815       }
    816       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
    817           .addImm(CC)
    818           .addMBB(TBB);
    819       FuncInfo.MBB->addSuccessor(TBB);
    820       FastEmitBranch(FBB, DbgLoc);
    821       return true;
    822     }
    823   } else if (const ConstantInt *CI =
    824                  dyn_cast<ConstantInt>(BI->getCondition())) {
    825     uint64_t Imm = CI->getZExtValue();
    826     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
    827     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
    828         .addMBB(Target);
    829     FuncInfo.MBB->addSuccessor(Target);
    830     return true;
    831   }
    832 
    833   unsigned CondReg = getRegForValue(BI->getCondition());
    834   if (CondReg == 0)
    835     return false;
    836 
    837   // We've been divorced from our compare!  Our block was split, and
    838   // now our compare lives in a predecessor block.  We musn't
    839   // re-compare here, as the children of the compare aren't guaranteed
    840   // live across the block boundary (we *could* check for this).
    841   // Regardless, the compare has been done in the predecessor block,
    842   // and it left a value for us in a virtual register.  Ergo, we test
    843   // the one-bit value left in the virtual register.
    844   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri),
    845           AArch64::WZR)
    846       .addReg(CondReg)
    847       .addImm(0)
    848       .addImm(0);
    849 
    850   unsigned CC = AArch64CC::NE;
    851   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
    852     std::swap(TBB, FBB);
    853     CC = AArch64CC::EQ;
    854   }
    855 
    856   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
    857       .addImm(CC)
    858       .addMBB(TBB);
    859   FuncInfo.MBB->addSuccessor(TBB);
    860   FastEmitBranch(FBB, DbgLoc);
    861   return true;
    862 }
    863 
    864 bool AArch64FastISel::SelectIndirectBr(const Instruction *I) {
    865   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
    866   unsigned AddrReg = getRegForValue(BI->getOperand(0));
    867   if (AddrReg == 0)
    868     return false;
    869 
    870   // Emit the indirect branch.
    871   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BR))
    872       .addReg(AddrReg);
    873 
    874   // Make sure the CFG is up-to-date.
    875   for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
    876     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
    877 
    878   return true;
    879 }
    880 
    881 bool AArch64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) {
    882   Type *Ty = Src1Value->getType();
    883   EVT SrcEVT = TLI.getValueType(Ty, true);
    884   if (!SrcEVT.isSimple())
    885     return false;
    886   MVT SrcVT = SrcEVT.getSimpleVT();
    887 
    888   // Check to see if the 2nd operand is a constant that we can encode directly
    889   // in the compare.
    890   uint64_t Imm;
    891   bool UseImm = false;
    892   bool isNegativeImm = false;
    893   if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
    894     if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
    895         SrcVT == MVT::i8 || SrcVT == MVT::i1) {
    896       const APInt &CIVal = ConstInt->getValue();
    897 
    898       Imm = (isZExt) ? CIVal.getZExtValue() : CIVal.getSExtValue();
    899       if (CIVal.isNegative()) {
    900         isNegativeImm = true;
    901         Imm = -Imm;
    902       }
    903       // FIXME: We can handle more immediates using shifts.
    904       UseImm = ((Imm & 0xfff) == Imm);
    905     }
    906   } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
    907     if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
    908       if (ConstFP->isZero() && !ConstFP->isNegative())
    909         UseImm = true;
    910   }
    911 
    912   unsigned ZReg;
    913   unsigned CmpOpc;
    914   bool isICmp = true;
    915   bool needsExt = false;
    916   switch (SrcVT.SimpleTy) {
    917   default:
    918     return false;
    919   case MVT::i1:
    920   case MVT::i8:
    921   case MVT::i16:
    922     needsExt = true;
    923   // Intentional fall-through.
    924   case MVT::i32:
    925     ZReg = AArch64::WZR;
    926     if (UseImm)
    927       CmpOpc = isNegativeImm ? AArch64::ADDSWri : AArch64::SUBSWri;
    928     else
    929       CmpOpc = AArch64::SUBSWrr;
    930     break;
    931   case MVT::i64:
    932     ZReg = AArch64::XZR;
    933     if (UseImm)
    934       CmpOpc = isNegativeImm ? AArch64::ADDSXri : AArch64::SUBSXri;
    935     else
    936       CmpOpc = AArch64::SUBSXrr;
    937     break;
    938   case MVT::f32:
    939     isICmp = false;
    940     CmpOpc = UseImm ? AArch64::FCMPSri : AArch64::FCMPSrr;
    941     break;
    942   case MVT::f64:
    943     isICmp = false;
    944     CmpOpc = UseImm ? AArch64::FCMPDri : AArch64::FCMPDrr;
    945     break;
    946   }
    947 
    948   unsigned SrcReg1 = getRegForValue(Src1Value);
    949   if (SrcReg1 == 0)
    950     return false;
    951 
    952   unsigned SrcReg2;
    953   if (!UseImm) {
    954     SrcReg2 = getRegForValue(Src2Value);
    955     if (SrcReg2 == 0)
    956       return false;
    957   }
    958 
    959   // We have i1, i8, or i16, we need to either zero extend or sign extend.
    960   if (needsExt) {
    961     SrcReg1 = EmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
    962     if (SrcReg1 == 0)
    963       return false;
    964     if (!UseImm) {
    965       SrcReg2 = EmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
    966       if (SrcReg2 == 0)
    967         return false;
    968     }
    969   }
    970 
    971   if (isICmp) {
    972     if (UseImm)
    973       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
    974           .addReg(ZReg)
    975           .addReg(SrcReg1)
    976           .addImm(Imm)
    977           .addImm(0);
    978     else
    979       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
    980           .addReg(ZReg)
    981           .addReg(SrcReg1)
    982           .addReg(SrcReg2);
    983   } else {
    984     if (UseImm)
    985       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
    986           .addReg(SrcReg1);
    987     else
    988       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
    989           .addReg(SrcReg1)
    990           .addReg(SrcReg2);
    991   }
    992   return true;
    993 }
    994 
    995 bool AArch64FastISel::SelectCmp(const Instruction *I) {
    996   const CmpInst *CI = cast<CmpInst>(I);
    997 
    998   // We may not handle every CC for now.
    999   AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
   1000   if (CC == AArch64CC::AL)
   1001     return false;
   1002 
   1003   // Emit the cmp.
   1004   if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
   1005     return false;
   1006 
   1007   // Now set a register based on the comparison.
   1008   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
   1009   unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
   1010   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
   1011           ResultReg)
   1012       .addReg(AArch64::WZR)
   1013       .addReg(AArch64::WZR)
   1014       .addImm(invertedCC);
   1015 
   1016   UpdateValueMap(I, ResultReg);
   1017   return true;
   1018 }
   1019 
   1020 bool AArch64FastISel::SelectSelect(const Instruction *I) {
   1021   const SelectInst *SI = cast<SelectInst>(I);
   1022 
   1023   EVT DestEVT = TLI.getValueType(SI->getType(), true);
   1024   if (!DestEVT.isSimple())
   1025     return false;
   1026 
   1027   MVT DestVT = DestEVT.getSimpleVT();
   1028   if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
   1029       DestVT != MVT::f64)
   1030     return false;
   1031 
   1032   unsigned CondReg = getRegForValue(SI->getCondition());
   1033   if (CondReg == 0)
   1034     return false;
   1035   unsigned TrueReg = getRegForValue(SI->getTrueValue());
   1036   if (TrueReg == 0)
   1037     return false;
   1038   unsigned FalseReg = getRegForValue(SI->getFalseValue());
   1039   if (FalseReg == 0)
   1040     return false;
   1041 
   1042 
   1043   MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
   1044   unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
   1045   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
   1046           ANDReg)
   1047       .addReg(CondReg)
   1048       .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
   1049 
   1050   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri))
   1051       .addReg(ANDReg)
   1052       .addReg(ANDReg)
   1053       .addImm(0)
   1054       .addImm(0);
   1055 
   1056   unsigned SelectOpc;
   1057   switch (DestVT.SimpleTy) {
   1058   default:
   1059     return false;
   1060   case MVT::i32:
   1061     SelectOpc = AArch64::CSELWr;
   1062     break;
   1063   case MVT::i64:
   1064     SelectOpc = AArch64::CSELXr;
   1065     break;
   1066   case MVT::f32:
   1067     SelectOpc = AArch64::FCSELSrrr;
   1068     break;
   1069   case MVT::f64:
   1070     SelectOpc = AArch64::FCSELDrrr;
   1071     break;
   1072   }
   1073 
   1074   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
   1075   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SelectOpc),
   1076           ResultReg)
   1077       .addReg(TrueReg)
   1078       .addReg(FalseReg)
   1079       .addImm(AArch64CC::NE);
   1080 
   1081   UpdateValueMap(I, ResultReg);
   1082   return true;
   1083 }
   1084 
   1085 bool AArch64FastISel::SelectFPExt(const Instruction *I) {
   1086   Value *V = I->getOperand(0);
   1087   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
   1088     return false;
   1089 
   1090   unsigned Op = getRegForValue(V);
   1091   if (Op == 0)
   1092     return false;
   1093 
   1094   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
   1095   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
   1096           ResultReg).addReg(Op);
   1097   UpdateValueMap(I, ResultReg);
   1098   return true;
   1099 }
   1100 
   1101 bool AArch64FastISel::SelectFPTrunc(const Instruction *I) {
   1102   Value *V = I->getOperand(0);
   1103   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
   1104     return false;
   1105 
   1106   unsigned Op = getRegForValue(V);
   1107   if (Op == 0)
   1108     return false;
   1109 
   1110   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
   1111   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
   1112           ResultReg).addReg(Op);
   1113   UpdateValueMap(I, ResultReg);
   1114   return true;
   1115 }
   1116 
   1117 // FPToUI and FPToSI
   1118 bool AArch64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
   1119   MVT DestVT;
   1120   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
   1121     return false;
   1122 
   1123   unsigned SrcReg = getRegForValue(I->getOperand(0));
   1124   if (SrcReg == 0)
   1125     return false;
   1126 
   1127   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
   1128   if (SrcVT == MVT::f128)
   1129     return false;
   1130 
   1131   unsigned Opc;
   1132   if (SrcVT == MVT::f64) {
   1133     if (Signed)
   1134       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
   1135     else
   1136       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
   1137   } else {
   1138     if (Signed)
   1139       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
   1140     else
   1141       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
   1142   }
   1143   unsigned ResultReg = createResultReg(
   1144       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
   1145   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
   1146       .addReg(SrcReg);
   1147   UpdateValueMap(I, ResultReg);
   1148   return true;
   1149 }
   1150 
   1151 bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
   1152   MVT DestVT;
   1153   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
   1154     return false;
   1155   assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
   1156           "Unexpected value type.");
   1157 
   1158   unsigned SrcReg = getRegForValue(I->getOperand(0));
   1159   if (SrcReg == 0)
   1160     return false;
   1161 
   1162   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
   1163 
   1164   // Handle sign-extension.
   1165   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
   1166     SrcReg =
   1167         EmitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
   1168     if (SrcReg == 0)
   1169       return false;
   1170   }
   1171 
   1172   MRI.constrainRegClass(SrcReg, SrcVT == MVT::i64 ? &AArch64::GPR64RegClass
   1173                                                   : &AArch64::GPR32RegClass);
   1174 
   1175   unsigned Opc;
   1176   if (SrcVT == MVT::i64) {
   1177     if (Signed)
   1178       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
   1179     else
   1180       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
   1181   } else {
   1182     if (Signed)
   1183       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
   1184     else
   1185       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
   1186   }
   1187 
   1188   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
   1189   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
   1190       .addReg(SrcReg);
   1191   UpdateValueMap(I, ResultReg);
   1192   return true;
   1193 }
   1194 
   1195 bool AArch64FastISel::ProcessCallArgs(
   1196     SmallVectorImpl<Value *> &Args, SmallVectorImpl<unsigned> &ArgRegs,
   1197     SmallVectorImpl<MVT> &ArgVTs, SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
   1198     SmallVectorImpl<unsigned> &RegArgs, CallingConv::ID CC,
   1199     unsigned &NumBytes) {
   1200   SmallVector<CCValAssign, 16> ArgLocs;
   1201   CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
   1202   CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC));
   1203 
   1204   // Get a count of how many bytes are to be pushed on the stack.
   1205   NumBytes = CCInfo.getNextStackOffset();
   1206 
   1207   // Issue CALLSEQ_START
   1208   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
   1209   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
   1210       .addImm(NumBytes);
   1211 
   1212   // Process the args.
   1213   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
   1214     CCValAssign &VA = ArgLocs[i];
   1215     unsigned Arg = ArgRegs[VA.getValNo()];
   1216     MVT ArgVT = ArgVTs[VA.getValNo()];
   1217 
   1218     // Handle arg promotion: SExt, ZExt, AExt.
   1219     switch (VA.getLocInfo()) {
   1220     case CCValAssign::Full:
   1221       break;
   1222     case CCValAssign::SExt: {
   1223       MVT DestVT = VA.getLocVT();
   1224       MVT SrcVT = ArgVT;
   1225       Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ false);
   1226       if (Arg == 0)
   1227         return false;
   1228       break;
   1229     }
   1230     case CCValAssign::AExt:
   1231     // Intentional fall-through.
   1232     case CCValAssign::ZExt: {
   1233       MVT DestVT = VA.getLocVT();
   1234       MVT SrcVT = ArgVT;
   1235       Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ true);
   1236       if (Arg == 0)
   1237         return false;
   1238       break;
   1239     }
   1240     default:
   1241       llvm_unreachable("Unknown arg promotion!");
   1242     }
   1243 
   1244     // Now copy/store arg to correct locations.
   1245     if (VA.isRegLoc() && !VA.needsCustom()) {
   1246       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1247               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg);
   1248       RegArgs.push_back(VA.getLocReg());
   1249     } else if (VA.needsCustom()) {
   1250       // FIXME: Handle custom args.
   1251       return false;
   1252     } else {
   1253       assert(VA.isMemLoc() && "Assuming store on stack.");
   1254 
   1255       // Need to store on the stack.
   1256       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
   1257 
   1258       unsigned BEAlign = 0;
   1259       if (ArgSize < 8 && !Subtarget->isLittleEndian())
   1260         BEAlign = 8 - ArgSize;
   1261 
   1262       Address Addr;
   1263       Addr.setKind(Address::RegBase);
   1264       Addr.setReg(AArch64::SP);
   1265       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
   1266 
   1267       if (!EmitStore(ArgVT, Arg, Addr))
   1268         return false;
   1269     }
   1270   }
   1271   return true;
   1272 }
   1273 
   1274 bool AArch64FastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
   1275                                  const Instruction *I, CallingConv::ID CC,
   1276                                  unsigned &NumBytes) {
   1277   // Issue CALLSEQ_END
   1278   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
   1279   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
   1280       .addImm(NumBytes)
   1281       .addImm(0);
   1282 
   1283   // Now the return value.
   1284   if (RetVT != MVT::isVoid) {
   1285     SmallVector<CCValAssign, 16> RVLocs;
   1286     CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
   1287     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
   1288 
   1289     // Only handle a single return value.
   1290     if (RVLocs.size() != 1)
   1291       return false;
   1292 
   1293     // Copy all of the result registers out of their specified physreg.
   1294     MVT CopyVT = RVLocs[0].getValVT();
   1295     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
   1296     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1297             TII.get(TargetOpcode::COPY),
   1298             ResultReg).addReg(RVLocs[0].getLocReg());
   1299     UsedRegs.push_back(RVLocs[0].getLocReg());
   1300 
   1301     // Finally update the result.
   1302     UpdateValueMap(I, ResultReg);
   1303   }
   1304 
   1305   return true;
   1306 }
   1307 
   1308 bool AArch64FastISel::SelectCall(const Instruction *I,
   1309                                  const char *IntrMemName = nullptr) {
   1310   const CallInst *CI = cast<CallInst>(I);
   1311   const Value *Callee = CI->getCalledValue();
   1312 
   1313   // Don't handle inline asm or intrinsics.
   1314   if (isa<InlineAsm>(Callee))
   1315     return false;
   1316 
   1317   // Only handle global variable Callees.
   1318   const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
   1319   if (!GV)
   1320     return false;
   1321 
   1322   // Check the calling convention.
   1323   ImmutableCallSite CS(CI);
   1324   CallingConv::ID CC = CS.getCallingConv();
   1325 
   1326   // Let SDISel handle vararg functions.
   1327   PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
   1328   FunctionType *FTy = cast<FunctionType>(PT->getElementType());
   1329   if (FTy->isVarArg())
   1330     return false;
   1331 
   1332   // Handle *simple* calls for now.
   1333   MVT RetVT;
   1334   Type *RetTy = I->getType();
   1335   if (RetTy->isVoidTy())
   1336     RetVT = MVT::isVoid;
   1337   else if (!isTypeLegal(RetTy, RetVT))
   1338     return false;
   1339 
   1340   // Set up the argument vectors.
   1341   SmallVector<Value *, 8> Args;
   1342   SmallVector<unsigned, 8> ArgRegs;
   1343   SmallVector<MVT, 8> ArgVTs;
   1344   SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
   1345   Args.reserve(CS.arg_size());
   1346   ArgRegs.reserve(CS.arg_size());
   1347   ArgVTs.reserve(CS.arg_size());
   1348   ArgFlags.reserve(CS.arg_size());
   1349 
   1350   for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
   1351        i != e; ++i) {
   1352     // If we're lowering a memory intrinsic instead of a regular call, skip the
   1353     // last two arguments, which shouldn't be passed to the underlying function.
   1354     if (IntrMemName && e - i <= 2)
   1355       break;
   1356 
   1357     unsigned Arg = getRegForValue(*i);
   1358     if (Arg == 0)
   1359       return false;
   1360 
   1361     ISD::ArgFlagsTy Flags;
   1362     unsigned AttrInd = i - CS.arg_begin() + 1;
   1363     if (CS.paramHasAttr(AttrInd, Attribute::SExt))
   1364       Flags.setSExt();
   1365     if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
   1366       Flags.setZExt();
   1367 
   1368     // FIXME: Only handle *easy* calls for now.
   1369     if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
   1370         CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
   1371         CS.paramHasAttr(AttrInd, Attribute::Nest) ||
   1372         CS.paramHasAttr(AttrInd, Attribute::ByVal))
   1373       return false;
   1374 
   1375     MVT ArgVT;
   1376     Type *ArgTy = (*i)->getType();
   1377     if (!isTypeLegal(ArgTy, ArgVT) &&
   1378         !(ArgVT == MVT::i1 || ArgVT == MVT::i8 || ArgVT == MVT::i16))
   1379       return false;
   1380 
   1381     // We don't handle vector parameters yet.
   1382     if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
   1383       return false;
   1384 
   1385     unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
   1386     Flags.setOrigAlign(OriginalAlignment);
   1387 
   1388     Args.push_back(*i);
   1389     ArgRegs.push_back(Arg);
   1390     ArgVTs.push_back(ArgVT);
   1391     ArgFlags.push_back(Flags);
   1392   }
   1393 
   1394   // Handle the arguments now that we've gotten them.
   1395   SmallVector<unsigned, 4> RegArgs;
   1396   unsigned NumBytes;
   1397   if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
   1398     return false;
   1399 
   1400   // Issue the call.
   1401   MachineInstrBuilder MIB;
   1402   MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BL));
   1403   if (!IntrMemName)
   1404     MIB.addGlobalAddress(GV, 0, 0);
   1405   else
   1406     MIB.addExternalSymbol(IntrMemName, 0);
   1407 
   1408   // Add implicit physical register uses to the call.
   1409   for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
   1410     MIB.addReg(RegArgs[i], RegState::Implicit);
   1411 
   1412   // Add a register mask with the call-preserved registers.
   1413   // Proper defs for return values will be added by setPhysRegsDeadExcept().
   1414   MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv()));
   1415 
   1416   // Finish off the call including any return values.
   1417   SmallVector<unsigned, 4> UsedRegs;
   1418   if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes))
   1419     return false;
   1420 
   1421   // Set all unused physreg defs as dead.
   1422   static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
   1423 
   1424   return true;
   1425 }
   1426 
   1427 bool AArch64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
   1428   if (Alignment)
   1429     return Len / Alignment <= 4;
   1430   else
   1431     return Len < 32;
   1432 }
   1433 
   1434 bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
   1435                                          uint64_t Len, unsigned Alignment) {
   1436   // Make sure we don't bloat code by inlining very large memcpy's.
   1437   if (!IsMemCpySmall(Len, Alignment))
   1438     return false;
   1439 
   1440   int64_t UnscaledOffset = 0;
   1441   Address OrigDest = Dest;
   1442   Address OrigSrc = Src;
   1443 
   1444   while (Len) {
   1445     MVT VT;
   1446     if (!Alignment || Alignment >= 8) {
   1447       if (Len >= 8)
   1448         VT = MVT::i64;
   1449       else if (Len >= 4)
   1450         VT = MVT::i32;
   1451       else if (Len >= 2)
   1452         VT = MVT::i16;
   1453       else {
   1454         VT = MVT::i8;
   1455       }
   1456     } else {
   1457       // Bound based on alignment.
   1458       if (Len >= 4 && Alignment == 4)
   1459         VT = MVT::i32;
   1460       else if (Len >= 2 && Alignment == 2)
   1461         VT = MVT::i16;
   1462       else {
   1463         VT = MVT::i8;
   1464       }
   1465     }
   1466 
   1467     bool RV;
   1468     unsigned ResultReg;
   1469     RV = EmitLoad(VT, ResultReg, Src);
   1470     if (!RV)
   1471       return false;
   1472 
   1473     RV = EmitStore(VT, ResultReg, Dest);
   1474     if (!RV)
   1475       return false;
   1476 
   1477     int64_t Size = VT.getSizeInBits() / 8;
   1478     Len -= Size;
   1479     UnscaledOffset += Size;
   1480 
   1481     // We need to recompute the unscaled offset for each iteration.
   1482     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
   1483     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
   1484   }
   1485 
   1486   return true;
   1487 }
   1488 
   1489 bool AArch64FastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
   1490   // FIXME: Handle more intrinsics.
   1491   switch (I.getIntrinsicID()) {
   1492   default:
   1493     return false;
   1494   case Intrinsic::memcpy:
   1495   case Intrinsic::memmove: {
   1496     const MemTransferInst &MTI = cast<MemTransferInst>(I);
   1497     // Don't handle volatile.
   1498     if (MTI.isVolatile())
   1499       return false;
   1500 
   1501     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
   1502     // we would emit dead code because we don't currently handle memmoves.
   1503     bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
   1504     if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
   1505       // Small memcpy's are common enough that we want to do them without a call
   1506       // if possible.
   1507       uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
   1508       unsigned Alignment = MTI.getAlignment();
   1509       if (IsMemCpySmall(Len, Alignment)) {
   1510         Address Dest, Src;
   1511         if (!ComputeAddress(MTI.getRawDest(), Dest) ||
   1512             !ComputeAddress(MTI.getRawSource(), Src))
   1513           return false;
   1514         if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
   1515           return true;
   1516       }
   1517     }
   1518 
   1519     if (!MTI.getLength()->getType()->isIntegerTy(64))
   1520       return false;
   1521 
   1522     if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
   1523       // Fast instruction selection doesn't support the special
   1524       // address spaces.
   1525       return false;
   1526 
   1527     const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
   1528     return SelectCall(&I, IntrMemName);
   1529   }
   1530   case Intrinsic::memset: {
   1531     const MemSetInst &MSI = cast<MemSetInst>(I);
   1532     // Don't handle volatile.
   1533     if (MSI.isVolatile())
   1534       return false;
   1535 
   1536     if (!MSI.getLength()->getType()->isIntegerTy(64))
   1537       return false;
   1538 
   1539     if (MSI.getDestAddressSpace() > 255)
   1540       // Fast instruction selection doesn't support the special
   1541       // address spaces.
   1542       return false;
   1543 
   1544     return SelectCall(&I, "memset");
   1545   }
   1546   case Intrinsic::trap: {
   1547     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
   1548         .addImm(1);
   1549     return true;
   1550   }
   1551   }
   1552   return false;
   1553 }
   1554 
   1555 bool AArch64FastISel::SelectRet(const Instruction *I) {
   1556   const ReturnInst *Ret = cast<ReturnInst>(I);
   1557   const Function &F = *I->getParent()->getParent();
   1558 
   1559   if (!FuncInfo.CanLowerReturn)
   1560     return false;
   1561 
   1562   if (F.isVarArg())
   1563     return false;
   1564 
   1565   // Build a list of return value registers.
   1566   SmallVector<unsigned, 4> RetRegs;
   1567 
   1568   if (Ret->getNumOperands() > 0) {
   1569     CallingConv::ID CC = F.getCallingConv();
   1570     SmallVector<ISD::OutputArg, 4> Outs;
   1571     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
   1572 
   1573     // Analyze operands of the call, assigning locations to each operand.
   1574     SmallVector<CCValAssign, 16> ValLocs;
   1575     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
   1576                    I->getContext());
   1577     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
   1578                                                      : RetCC_AArch64_AAPCS;
   1579     CCInfo.AnalyzeReturn(Outs, RetCC);
   1580 
   1581     // Only handle a single return value for now.
   1582     if (ValLocs.size() != 1)
   1583       return false;
   1584 
   1585     CCValAssign &VA = ValLocs[0];
   1586     const Value *RV = Ret->getOperand(0);
   1587 
   1588     // Don't bother handling odd stuff for now.
   1589     if (VA.getLocInfo() != CCValAssign::Full)
   1590       return false;
   1591     // Only handle register returns for now.
   1592     if (!VA.isRegLoc())
   1593       return false;
   1594     unsigned Reg = getRegForValue(RV);
   1595     if (Reg == 0)
   1596       return false;
   1597 
   1598     unsigned SrcReg = Reg + VA.getValNo();
   1599     unsigned DestReg = VA.getLocReg();
   1600     // Avoid a cross-class copy. This is very unlikely.
   1601     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
   1602       return false;
   1603 
   1604     EVT RVEVT = TLI.getValueType(RV->getType());
   1605     if (!RVEVT.isSimple())
   1606       return false;
   1607 
   1608     // Vectors (of > 1 lane) in big endian need tricky handling.
   1609     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1)
   1610       return false;
   1611 
   1612     MVT RVVT = RVEVT.getSimpleVT();
   1613     if (RVVT == MVT::f128)
   1614       return false;
   1615     MVT DestVT = VA.getValVT();
   1616     // Special handling for extended integers.
   1617     if (RVVT != DestVT) {
   1618       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
   1619         return false;
   1620 
   1621       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
   1622         return false;
   1623 
   1624       bool isZExt = Outs[0].Flags.isZExt();
   1625       SrcReg = EmitIntExt(RVVT, SrcReg, DestVT, isZExt);
   1626       if (SrcReg == 0)
   1627         return false;
   1628     }
   1629 
   1630     // Make the copy.
   1631     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1632             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
   1633 
   1634     // Add register to return instruction.
   1635     RetRegs.push_back(VA.getLocReg());
   1636   }
   1637 
   1638   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1639                                     TII.get(AArch64::RET_ReallyLR));
   1640   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
   1641     MIB.addReg(RetRegs[i], RegState::Implicit);
   1642   return true;
   1643 }
   1644 
   1645 bool AArch64FastISel::SelectTrunc(const Instruction *I) {
   1646   Type *DestTy = I->getType();
   1647   Value *Op = I->getOperand(0);
   1648   Type *SrcTy = Op->getType();
   1649 
   1650   EVT SrcEVT = TLI.getValueType(SrcTy, true);
   1651   EVT DestEVT = TLI.getValueType(DestTy, true);
   1652   if (!SrcEVT.isSimple())
   1653     return false;
   1654   if (!DestEVT.isSimple())
   1655     return false;
   1656 
   1657   MVT SrcVT = SrcEVT.getSimpleVT();
   1658   MVT DestVT = DestEVT.getSimpleVT();
   1659 
   1660   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
   1661       SrcVT != MVT::i8)
   1662     return false;
   1663   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
   1664       DestVT != MVT::i1)
   1665     return false;
   1666 
   1667   unsigned SrcReg = getRegForValue(Op);
   1668   if (!SrcReg)
   1669     return false;
   1670 
   1671   // If we're truncating from i64 to a smaller non-legal type then generate an
   1672   // AND.  Otherwise, we know the high bits are undefined and a truncate doesn't
   1673   // generate any code.
   1674   if (SrcVT == MVT::i64) {
   1675     uint64_t Mask = 0;
   1676     switch (DestVT.SimpleTy) {
   1677     default:
   1678       // Trunc i64 to i32 is handled by the target-independent fast-isel.
   1679       return false;
   1680     case MVT::i1:
   1681       Mask = 0x1;
   1682       break;
   1683     case MVT::i8:
   1684       Mask = 0xff;
   1685       break;
   1686     case MVT::i16:
   1687       Mask = 0xffff;
   1688       break;
   1689     }
   1690     // Issue an extract_subreg to get the lower 32-bits.
   1691     unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, /*Kill=*/true,
   1692                                                 AArch64::sub_32);
   1693     MRI.constrainRegClass(Reg32, &AArch64::GPR32RegClass);
   1694     // Create the AND instruction which performs the actual truncation.
   1695     unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
   1696     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
   1697             ANDReg)
   1698         .addReg(Reg32)
   1699         .addImm(AArch64_AM::encodeLogicalImmediate(Mask, 32));
   1700     SrcReg = ANDReg;
   1701   }
   1702 
   1703   UpdateValueMap(I, SrcReg);
   1704   return true;
   1705 }
   1706 
   1707 unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
   1708   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
   1709           DestVT == MVT::i64) &&
   1710          "Unexpected value type.");
   1711   // Handle i8 and i16 as i32.
   1712   if (DestVT == MVT::i8 || DestVT == MVT::i16)
   1713     DestVT = MVT::i32;
   1714 
   1715   if (isZExt) {
   1716     MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
   1717     unsigned ResultReg = createResultReg(&AArch64::GPR32spRegClass);
   1718     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
   1719             ResultReg)
   1720         .addReg(SrcReg)
   1721         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
   1722 
   1723     if (DestVT == MVT::i64) {
   1724       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
   1725       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
   1726       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
   1727       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1728               TII.get(AArch64::SUBREG_TO_REG), Reg64)
   1729           .addImm(0)
   1730           .addReg(ResultReg)
   1731           .addImm(AArch64::sub_32);
   1732       ResultReg = Reg64;
   1733     }
   1734     return ResultReg;
   1735   } else {
   1736     if (DestVT == MVT::i64) {
   1737       // FIXME: We're SExt i1 to i64.
   1738       return 0;
   1739     }
   1740     unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
   1741     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SBFMWri),
   1742             ResultReg)
   1743         .addReg(SrcReg)
   1744         .addImm(0)
   1745         .addImm(0);
   1746     return ResultReg;
   1747   }
   1748 }
   1749 
   1750 unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
   1751                                      bool isZExt) {
   1752   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
   1753 
   1754   // FastISel does not have plumbing to deal with extensions where the SrcVT or
   1755   // DestVT are odd things, so test to make sure that they are both types we can
   1756   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
   1757   // bail out to SelectionDAG.
   1758   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
   1759        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
   1760       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
   1761        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
   1762     return 0;
   1763 
   1764   unsigned Opc;
   1765   unsigned Imm = 0;
   1766 
   1767   switch (SrcVT.SimpleTy) {
   1768   default:
   1769     return 0;
   1770   case MVT::i1:
   1771     return Emiti1Ext(SrcReg, DestVT, isZExt);
   1772   case MVT::i8:
   1773     if (DestVT == MVT::i64)
   1774       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
   1775     else
   1776       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
   1777     Imm = 7;
   1778     break;
   1779   case MVT::i16:
   1780     if (DestVT == MVT::i64)
   1781       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
   1782     else
   1783       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
   1784     Imm = 15;
   1785     break;
   1786   case MVT::i32:
   1787     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
   1788     Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
   1789     Imm = 31;
   1790     break;
   1791   }
   1792 
   1793   // Handle i8 and i16 as i32.
   1794   if (DestVT == MVT::i8 || DestVT == MVT::i16)
   1795     DestVT = MVT::i32;
   1796   else if (DestVT == MVT::i64) {
   1797     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
   1798     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1799             TII.get(AArch64::SUBREG_TO_REG), Src64)
   1800         .addImm(0)
   1801         .addReg(SrcReg)
   1802         .addImm(AArch64::sub_32);
   1803     SrcReg = Src64;
   1804   }
   1805 
   1806   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
   1807   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
   1808       .addReg(SrcReg)
   1809       .addImm(0)
   1810       .addImm(Imm);
   1811 
   1812   return ResultReg;
   1813 }
   1814 
   1815 bool AArch64FastISel::SelectIntExt(const Instruction *I) {
   1816   // On ARM, in general, integer casts don't involve legal types; this code
   1817   // handles promotable integers.  The high bits for a type smaller than
   1818   // the register size are assumed to be undefined.
   1819   Type *DestTy = I->getType();
   1820   Value *Src = I->getOperand(0);
   1821   Type *SrcTy = Src->getType();
   1822 
   1823   bool isZExt = isa<ZExtInst>(I);
   1824   unsigned SrcReg = getRegForValue(Src);
   1825   if (!SrcReg)
   1826     return false;
   1827 
   1828   EVT SrcEVT = TLI.getValueType(SrcTy, true);
   1829   EVT DestEVT = TLI.getValueType(DestTy, true);
   1830   if (!SrcEVT.isSimple())
   1831     return false;
   1832   if (!DestEVT.isSimple())
   1833     return false;
   1834 
   1835   MVT SrcVT = SrcEVT.getSimpleVT();
   1836   MVT DestVT = DestEVT.getSimpleVT();
   1837   unsigned ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
   1838   if (ResultReg == 0)
   1839     return false;
   1840   UpdateValueMap(I, ResultReg);
   1841   return true;
   1842 }
   1843 
   1844 bool AArch64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
   1845   EVT DestEVT = TLI.getValueType(I->getType(), true);
   1846   if (!DestEVT.isSimple())
   1847     return false;
   1848 
   1849   MVT DestVT = DestEVT.getSimpleVT();
   1850   if (DestVT != MVT::i64 && DestVT != MVT::i32)
   1851     return false;
   1852 
   1853   unsigned DivOpc;
   1854   bool is64bit = (DestVT == MVT::i64);
   1855   switch (ISDOpcode) {
   1856   default:
   1857     return false;
   1858   case ISD::SREM:
   1859     DivOpc = is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
   1860     break;
   1861   case ISD::UREM:
   1862     DivOpc = is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
   1863     break;
   1864   }
   1865   unsigned MSubOpc = is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
   1866   unsigned Src0Reg = getRegForValue(I->getOperand(0));
   1867   if (!Src0Reg)
   1868     return false;
   1869 
   1870   unsigned Src1Reg = getRegForValue(I->getOperand(1));
   1871   if (!Src1Reg)
   1872     return false;
   1873 
   1874   unsigned QuotReg = createResultReg(TLI.getRegClassFor(DestVT));
   1875   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(DivOpc), QuotReg)
   1876       .addReg(Src0Reg)
   1877       .addReg(Src1Reg);
   1878   // The remainder is computed as numerator - (quotient * denominator) using the
   1879   // MSUB instruction.
   1880   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
   1881   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MSubOpc), ResultReg)
   1882       .addReg(QuotReg)
   1883       .addReg(Src1Reg)
   1884       .addReg(Src0Reg);
   1885   UpdateValueMap(I, ResultReg);
   1886   return true;
   1887 }
   1888 
   1889 bool AArch64FastISel::SelectMul(const Instruction *I) {
   1890   EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true);
   1891   if (!SrcEVT.isSimple())
   1892     return false;
   1893   MVT SrcVT = SrcEVT.getSimpleVT();
   1894 
   1895   // Must be simple value type.  Don't handle vectors.
   1896   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
   1897       SrcVT != MVT::i8)
   1898     return false;
   1899 
   1900   unsigned Opc;
   1901   unsigned ZReg;
   1902   switch (SrcVT.SimpleTy) {
   1903   default:
   1904     return false;
   1905   case MVT::i8:
   1906   case MVT::i16:
   1907   case MVT::i32:
   1908     ZReg = AArch64::WZR;
   1909     Opc = AArch64::MADDWrrr;
   1910     SrcVT = MVT::i32;
   1911     break;
   1912   case MVT::i64:
   1913     ZReg = AArch64::XZR;
   1914     Opc = AArch64::MADDXrrr;
   1915     break;
   1916   }
   1917 
   1918   unsigned Src0Reg = getRegForValue(I->getOperand(0));
   1919   if (!Src0Reg)
   1920     return false;
   1921 
   1922   unsigned Src1Reg = getRegForValue(I->getOperand(1));
   1923   if (!Src1Reg)
   1924     return false;
   1925 
   1926   // Create the base instruction, then add the operands.
   1927   unsigned ResultReg = createResultReg(TLI.getRegClassFor(SrcVT));
   1928   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
   1929       .addReg(Src0Reg)
   1930       .addReg(Src1Reg)
   1931       .addReg(ZReg);
   1932   UpdateValueMap(I, ResultReg);
   1933   return true;
   1934 }
   1935 
   1936 bool AArch64FastISel::TargetSelectInstruction(const Instruction *I) {
   1937   switch (I->getOpcode()) {
   1938   default:
   1939     break;
   1940   case Instruction::Load:
   1941     return SelectLoad(I);
   1942   case Instruction::Store:
   1943     return SelectStore(I);
   1944   case Instruction::Br:
   1945     return SelectBranch(I);
   1946   case Instruction::IndirectBr:
   1947     return SelectIndirectBr(I);
   1948   case Instruction::FCmp:
   1949   case Instruction::ICmp:
   1950     return SelectCmp(I);
   1951   case Instruction::Select:
   1952     return SelectSelect(I);
   1953   case Instruction::FPExt:
   1954     return SelectFPExt(I);
   1955   case Instruction::FPTrunc:
   1956     return SelectFPTrunc(I);
   1957   case Instruction::FPToSI:
   1958     return SelectFPToInt(I, /*Signed=*/true);
   1959   case Instruction::FPToUI:
   1960     return SelectFPToInt(I, /*Signed=*/false);
   1961   case Instruction::SIToFP:
   1962     return SelectIntToFP(I, /*Signed=*/true);
   1963   case Instruction::UIToFP:
   1964     return SelectIntToFP(I, /*Signed=*/false);
   1965   case Instruction::SRem:
   1966     return SelectRem(I, ISD::SREM);
   1967   case Instruction::URem:
   1968     return SelectRem(I, ISD::UREM);
   1969   case Instruction::Call:
   1970     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
   1971       return SelectIntrinsicCall(*II);
   1972     return SelectCall(I);
   1973   case Instruction::Ret:
   1974     return SelectRet(I);
   1975   case Instruction::Trunc:
   1976     return SelectTrunc(I);
   1977   case Instruction::ZExt:
   1978   case Instruction::SExt:
   1979     return SelectIntExt(I);
   1980   case Instruction::Mul:
   1981     // FIXME: This really should be handled by the target-independent selector.
   1982     return SelectMul(I);
   1983   }
   1984   return false;
   1985   // Silence warnings.
   1986   (void)&CC_AArch64_DarwinPCS_VarArg;
   1987 }
   1988 
   1989 namespace llvm {
   1990 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &funcInfo,
   1991                                         const TargetLibraryInfo *libInfo) {
   1992   return new AArch64FastISel(funcInfo, libInfo);
   1993 }
   1994 }
   1995