1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the AArch64-specific support for the FastISel class. Some 11 // of the target-specific code is generated by tablegen in the file 12 // AArch64GenFastISel.inc, which is #included here. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AArch64.h" 17 #include "AArch64CallingConvention.h" 18 #include "AArch64Subtarget.h" 19 #include "AArch64TargetMachine.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "llvm/Analysis/BranchProbabilityInfo.h" 22 #include "llvm/CodeGen/CallingConvLower.h" 23 #include "llvm/CodeGen/FastISel.h" 24 #include "llvm/CodeGen/FunctionLoweringInfo.h" 25 #include "llvm/CodeGen/MachineConstantPool.h" 26 #include "llvm/CodeGen/MachineFrameInfo.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/IR/CallingConv.h" 30 #include "llvm/IR/DataLayout.h" 31 #include "llvm/IR/DerivedTypes.h" 32 #include "llvm/IR/Function.h" 33 #include "llvm/IR/GetElementPtrTypeIterator.h" 34 #include "llvm/IR/GlobalAlias.h" 35 #include "llvm/IR/GlobalVariable.h" 36 #include "llvm/IR/Instructions.h" 37 #include "llvm/IR/IntrinsicInst.h" 38 #include "llvm/IR/Operator.h" 39 #include "llvm/MC/MCSymbol.h" 40 #include "llvm/Support/CommandLine.h" 41 using namespace llvm; 42 43 namespace { 44 45 class AArch64FastISel final : public FastISel { 46 class Address { 47 public: 48 typedef enum { 49 RegBase, 50 FrameIndexBase 51 } BaseKind; 52 53 private: 54 BaseKind Kind; 55 AArch64_AM::ShiftExtendType ExtType; 56 union { 57 unsigned Reg; 58 int FI; 59 } Base; 60 unsigned OffsetReg; 61 unsigned Shift; 62 int64_t Offset; 63 const GlobalValue *GV; 64 65 public: 66 Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend), 67 OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; } 68 void setKind(BaseKind K) { Kind = K; } 69 BaseKind getKind() const { return Kind; } 70 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } 71 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } 72 bool isRegBase() const { return Kind == RegBase; } 73 bool isFIBase() const { return Kind == FrameIndexBase; } 74 void setReg(unsigned Reg) { 75 assert(isRegBase() && "Invalid base register access!"); 76 Base.Reg = Reg; 77 } 78 unsigned getReg() const { 79 assert(isRegBase() && "Invalid base register access!"); 80 return Base.Reg; 81 } 82 void setOffsetReg(unsigned Reg) { 83 OffsetReg = Reg; 84 } 85 unsigned getOffsetReg() const { 86 return OffsetReg; 87 } 88 void setFI(unsigned FI) { 89 assert(isFIBase() && "Invalid base frame index access!"); 90 Base.FI = FI; 91 } 92 unsigned getFI() const { 93 assert(isFIBase() && "Invalid base frame index access!"); 94 return Base.FI; 95 } 96 void setOffset(int64_t O) { Offset = O; } 97 int64_t getOffset() { return Offset; } 98 void setShift(unsigned S) { Shift = S; } 99 unsigned getShift() { return Shift; } 100 101 void setGlobalValue(const GlobalValue *G) { GV = G; } 102 const GlobalValue *getGlobalValue() { return GV; } 103 }; 104 105 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 106 /// make the right decision when generating code for different targets. 107 const AArch64Subtarget *Subtarget; 108 LLVMContext *Context; 109 110 bool fastLowerArguments() override; 111 bool fastLowerCall(CallLoweringInfo &CLI) override; 112 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; 113 114 private: 115 // Selection routines. 116 bool selectAddSub(const Instruction *I); 117 bool selectLogicalOp(const Instruction *I); 118 bool selectLoad(const Instruction *I); 119 bool selectStore(const Instruction *I); 120 bool selectBranch(const Instruction *I); 121 bool selectIndirectBr(const Instruction *I); 122 bool selectCmp(const Instruction *I); 123 bool selectSelect(const Instruction *I); 124 bool selectFPExt(const Instruction *I); 125 bool selectFPTrunc(const Instruction *I); 126 bool selectFPToInt(const Instruction *I, bool Signed); 127 bool selectIntToFP(const Instruction *I, bool Signed); 128 bool selectRem(const Instruction *I, unsigned ISDOpcode); 129 bool selectRet(const Instruction *I); 130 bool selectTrunc(const Instruction *I); 131 bool selectIntExt(const Instruction *I); 132 bool selectMul(const Instruction *I); 133 bool selectShift(const Instruction *I); 134 bool selectBitCast(const Instruction *I); 135 bool selectFRem(const Instruction *I); 136 bool selectSDiv(const Instruction *I); 137 bool selectGetElementPtr(const Instruction *I); 138 139 // Utility helper routines. 140 bool isTypeLegal(Type *Ty, MVT &VT); 141 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false); 142 bool isValueAvailable(const Value *V) const; 143 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); 144 bool computeCallAddress(const Value *V, Address &Addr); 145 bool simplifyAddress(Address &Addr, MVT VT); 146 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, 147 unsigned Flags, unsigned ScaleFactor, 148 MachineMemOperand *MMO); 149 bool isMemCpySmall(uint64_t Len, unsigned Alignment); 150 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, 151 unsigned Alignment); 152 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, 153 const Value *Cond); 154 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); 155 bool optimizeSelect(const SelectInst *SI); 156 std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx); 157 158 // Emit helper routines. 159 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 160 const Value *RHS, bool SetFlags = false, 161 bool WantResult = true, bool IsZExt = false); 162 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 163 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 164 bool SetFlags = false, bool WantResult = true); 165 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 166 bool LHSIsKill, uint64_t Imm, bool SetFlags = false, 167 bool WantResult = true); 168 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 169 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 170 AArch64_AM::ShiftExtendType ShiftType, 171 uint64_t ShiftImm, bool SetFlags = false, 172 bool WantResult = true); 173 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 174 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 175 AArch64_AM::ShiftExtendType ExtType, 176 uint64_t ShiftImm, bool SetFlags = false, 177 bool WantResult = true); 178 179 // Emit functions. 180 bool emitCompareAndBranch(const BranchInst *BI); 181 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); 182 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); 183 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); 184 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); 185 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true, 186 MachineMemOperand *MMO = nullptr); 187 bool emitStore(MVT VT, unsigned SrcReg, Address Addr, 188 MachineMemOperand *MMO = nullptr); 189 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); 190 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); 191 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 192 bool SetFlags = false, bool WantResult = true, 193 bool IsZExt = false); 194 unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm); 195 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 196 bool SetFlags = false, bool WantResult = true, 197 bool IsZExt = false); 198 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 199 unsigned RHSReg, bool RHSIsKill, bool WantResult = true); 200 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 201 unsigned RHSReg, bool RHSIsKill, 202 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, 203 bool WantResult = true); 204 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, 205 const Value *RHS); 206 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 207 bool LHSIsKill, uint64_t Imm); 208 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 209 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 210 uint64_t ShiftImm); 211 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); 212 unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 213 unsigned Op1, bool Op1IsKill); 214 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 215 unsigned Op1, bool Op1IsKill); 216 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 217 unsigned Op1, bool Op1IsKill); 218 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 219 unsigned Op1Reg, bool Op1IsKill); 220 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 221 uint64_t Imm, bool IsZExt = true); 222 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 223 unsigned Op1Reg, bool Op1IsKill); 224 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 225 uint64_t Imm, bool IsZExt = true); 226 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 227 unsigned Op1Reg, bool Op1IsKill); 228 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 229 uint64_t Imm, bool IsZExt = false); 230 231 unsigned materializeInt(const ConstantInt *CI, MVT VT); 232 unsigned materializeFP(const ConstantFP *CFP, MVT VT); 233 unsigned materializeGV(const GlobalValue *GV); 234 235 // Call handling routines. 236 private: 237 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; 238 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs, 239 unsigned &NumBytes); 240 bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes); 241 242 public: 243 // Backend specific FastISel code. 244 unsigned fastMaterializeAlloca(const AllocaInst *AI) override; 245 unsigned fastMaterializeConstant(const Constant *C) override; 246 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; 247 248 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, 249 const TargetLibraryInfo *LibInfo) 250 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { 251 Subtarget = 252 &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget()); 253 Context = &FuncInfo.Fn->getContext(); 254 } 255 256 bool fastSelectInstruction(const Instruction *I) override; 257 258 #include "AArch64GenFastISel.inc" 259 }; 260 261 } // end anonymous namespace 262 263 #include "AArch64GenCallingConv.inc" 264 265 /// \brief Check if the sign-/zero-extend will be a noop. 266 static bool isIntExtFree(const Instruction *I) { 267 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 268 "Unexpected integer extend instruction."); 269 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && 270 "Unexpected value type."); 271 bool IsZExt = isa<ZExtInst>(I); 272 273 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0))) 274 if (LI->hasOneUse()) 275 return true; 276 277 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) 278 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) 279 return true; 280 281 return false; 282 } 283 284 /// \brief Determine the implicit scale factor that is applied by a memory 285 /// operation for a given value type. 286 static unsigned getImplicitScaleFactor(MVT VT) { 287 switch (VT.SimpleTy) { 288 default: 289 return 0; // invalid 290 case MVT::i1: // fall-through 291 case MVT::i8: 292 return 1; 293 case MVT::i16: 294 return 2; 295 case MVT::i32: // fall-through 296 case MVT::f32: 297 return 4; 298 case MVT::i64: // fall-through 299 case MVT::f64: 300 return 8; 301 } 302 } 303 304 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { 305 if (CC == CallingConv::WebKit_JS) 306 return CC_AArch64_WebKit_JS; 307 if (CC == CallingConv::GHC) 308 return CC_AArch64_GHC; 309 return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS; 310 } 311 312 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { 313 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && 314 "Alloca should always return a pointer."); 315 316 // Don't handle dynamic allocas. 317 if (!FuncInfo.StaticAllocaMap.count(AI)) 318 return 0; 319 320 DenseMap<const AllocaInst *, int>::iterator SI = 321 FuncInfo.StaticAllocaMap.find(AI); 322 323 if (SI != FuncInfo.StaticAllocaMap.end()) { 324 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); 325 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 326 ResultReg) 327 .addFrameIndex(SI->second) 328 .addImm(0) 329 .addImm(0); 330 return ResultReg; 331 } 332 333 return 0; 334 } 335 336 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) { 337 if (VT > MVT::i64) 338 return 0; 339 340 if (!CI->isZero()) 341 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); 342 343 // Create a copy from the zero register to materialize a "0" value. 344 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass 345 : &AArch64::GPR32RegClass; 346 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 347 unsigned ResultReg = createResultReg(RC); 348 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), 349 ResultReg).addReg(ZeroReg, getKillRegState(true)); 350 return ResultReg; 351 } 352 353 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { 354 // Positive zero (+0.0) has to be materialized with a fmov from the zero 355 // register, because the immediate version of fmov cannot encode zero. 356 if (CFP->isNullValue()) 357 return fastMaterializeFloatZero(CFP); 358 359 if (VT != MVT::f32 && VT != MVT::f64) 360 return 0; 361 362 const APFloat Val = CFP->getValueAPF(); 363 bool Is64Bit = (VT == MVT::f64); 364 // This checks to see if we can use FMOV instructions to materialize 365 // a constant, otherwise we have to materialize via the constant pool. 366 if (TLI.isFPImmLegal(Val, VT)) { 367 int Imm = 368 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val); 369 assert((Imm != -1) && "Cannot encode floating-point constant."); 370 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi; 371 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); 372 } 373 374 // For the MachO large code model materialize the FP constant in code. 375 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) { 376 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm; 377 const TargetRegisterClass *RC = Is64Bit ? 378 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 379 380 unsigned TmpReg = createResultReg(RC); 381 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg) 382 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); 383 384 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 385 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 386 TII.get(TargetOpcode::COPY), ResultReg) 387 .addReg(TmpReg, getKillRegState(true)); 388 389 return ResultReg; 390 } 391 392 // Materialize via constant pool. MachineConstantPool wants an explicit 393 // alignment. 394 unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); 395 if (Align == 0) 396 Align = DL.getTypeAllocSize(CFP->getType()); 397 398 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); 399 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 400 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 401 ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE); 402 403 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui; 404 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 405 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 406 .addReg(ADRPReg) 407 .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 408 return ResultReg; 409 } 410 411 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { 412 // We can't handle thread-local variables quickly yet. 413 if (GV->isThreadLocal()) 414 return 0; 415 416 // MachO still uses GOT for large code-model accesses, but ELF requires 417 // movz/movk sequences, which FastISel doesn't handle yet. 418 if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO()) 419 return 0; 420 421 unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); 422 423 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true); 424 if (!DestEVT.isSimple()) 425 return 0; 426 427 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 428 unsigned ResultReg; 429 430 if (OpFlags & AArch64II::MO_GOT) { 431 // ADRP + LDRX 432 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 433 ADRPReg) 434 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE); 435 436 ResultReg = createResultReg(&AArch64::GPR64RegClass); 437 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui), 438 ResultReg) 439 .addReg(ADRPReg) 440 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | 441 AArch64II::MO_NC); 442 } else if (OpFlags & AArch64II::MO_CONSTPOOL) { 443 // We can't handle addresses loaded from a constant pool quickly yet. 444 return 0; 445 } else { 446 // ADRP + ADDX 447 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 448 ADRPReg) 449 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE); 450 451 ResultReg = createResultReg(&AArch64::GPR64spRegClass); 452 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 453 ResultReg) 454 .addReg(ADRPReg) 455 .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC) 456 .addImm(0); 457 } 458 return ResultReg; 459 } 460 461 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { 462 EVT CEVT = TLI.getValueType(DL, C->getType(), true); 463 464 // Only handle simple types. 465 if (!CEVT.isSimple()) 466 return 0; 467 MVT VT = CEVT.getSimpleVT(); 468 469 if (const auto *CI = dyn_cast<ConstantInt>(C)) 470 return materializeInt(CI, VT); 471 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 472 return materializeFP(CFP, VT); 473 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 474 return materializeGV(GV); 475 476 return 0; 477 } 478 479 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) { 480 assert(CFP->isNullValue() && 481 "Floating-point constant is not a positive zero."); 482 MVT VT; 483 if (!isTypeLegal(CFP->getType(), VT)) 484 return 0; 485 486 if (VT != MVT::f32 && VT != MVT::f64) 487 return 0; 488 489 bool Is64Bit = (VT == MVT::f64); 490 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 491 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr; 492 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true); 493 } 494 495 /// \brief Check if the multiply is by a power-of-2 constant. 496 static bool isMulPowOf2(const Value *I) { 497 if (const auto *MI = dyn_cast<MulOperator>(I)) { 498 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0))) 499 if (C->getValue().isPowerOf2()) 500 return true; 501 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1))) 502 if (C->getValue().isPowerOf2()) 503 return true; 504 } 505 return false; 506 } 507 508 // Computes the address to get to an object. 509 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) 510 { 511 const User *U = nullptr; 512 unsigned Opcode = Instruction::UserOp1; 513 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 514 // Don't walk into other basic blocks unless the object is an alloca from 515 // another block, otherwise it may not have a virtual register assigned. 516 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 517 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 518 Opcode = I->getOpcode(); 519 U = I; 520 } 521 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 522 Opcode = C->getOpcode(); 523 U = C; 524 } 525 526 if (auto *Ty = dyn_cast<PointerType>(Obj->getType())) 527 if (Ty->getAddressSpace() > 255) 528 // Fast instruction selection doesn't support the special 529 // address spaces. 530 return false; 531 532 switch (Opcode) { 533 default: 534 break; 535 case Instruction::BitCast: { 536 // Look through bitcasts. 537 return computeAddress(U->getOperand(0), Addr, Ty); 538 } 539 case Instruction::IntToPtr: { 540 // Look past no-op inttoptrs. 541 if (TLI.getValueType(DL, U->getOperand(0)->getType()) == 542 TLI.getPointerTy(DL)) 543 return computeAddress(U->getOperand(0), Addr, Ty); 544 break; 545 } 546 case Instruction::PtrToInt: { 547 // Look past no-op ptrtoints. 548 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 549 return computeAddress(U->getOperand(0), Addr, Ty); 550 break; 551 } 552 case Instruction::GetElementPtr: { 553 Address SavedAddr = Addr; 554 uint64_t TmpOffset = Addr.getOffset(); 555 556 // Iterate through the GEP folding the constants into offsets where 557 // we can. 558 gep_type_iterator GTI = gep_type_begin(U); 559 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; 560 ++i, ++GTI) { 561 const Value *Op = *i; 562 if (StructType *STy = dyn_cast<StructType>(*GTI)) { 563 const StructLayout *SL = DL.getStructLayout(STy); 564 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 565 TmpOffset += SL->getElementOffset(Idx); 566 } else { 567 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); 568 for (;;) { 569 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 570 // Constant-offset addressing. 571 TmpOffset += CI->getSExtValue() * S; 572 break; 573 } 574 if (canFoldAddIntoGEP(U, Op)) { 575 // A compatible add with a constant operand. Fold the constant. 576 ConstantInt *CI = 577 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 578 TmpOffset += CI->getSExtValue() * S; 579 // Iterate on the other operand. 580 Op = cast<AddOperator>(Op)->getOperand(0); 581 continue; 582 } 583 // Unsupported 584 goto unsupported_gep; 585 } 586 } 587 } 588 589 // Try to grab the base operand now. 590 Addr.setOffset(TmpOffset); 591 if (computeAddress(U->getOperand(0), Addr, Ty)) 592 return true; 593 594 // We failed, restore everything and try the other options. 595 Addr = SavedAddr; 596 597 unsupported_gep: 598 break; 599 } 600 case Instruction::Alloca: { 601 const AllocaInst *AI = cast<AllocaInst>(Obj); 602 DenseMap<const AllocaInst *, int>::iterator SI = 603 FuncInfo.StaticAllocaMap.find(AI); 604 if (SI != FuncInfo.StaticAllocaMap.end()) { 605 Addr.setKind(Address::FrameIndexBase); 606 Addr.setFI(SI->second); 607 return true; 608 } 609 break; 610 } 611 case Instruction::Add: { 612 // Adds of constants are common and easy enough. 613 const Value *LHS = U->getOperand(0); 614 const Value *RHS = U->getOperand(1); 615 616 if (isa<ConstantInt>(LHS)) 617 std::swap(LHS, RHS); 618 619 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 620 Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); 621 return computeAddress(LHS, Addr, Ty); 622 } 623 624 Address Backup = Addr; 625 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty)) 626 return true; 627 Addr = Backup; 628 629 break; 630 } 631 case Instruction::Sub: { 632 // Subs of constants are common and easy enough. 633 const Value *LHS = U->getOperand(0); 634 const Value *RHS = U->getOperand(1); 635 636 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 637 Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); 638 return computeAddress(LHS, Addr, Ty); 639 } 640 break; 641 } 642 case Instruction::Shl: { 643 if (Addr.getOffsetReg()) 644 break; 645 646 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1)); 647 if (!CI) 648 break; 649 650 unsigned Val = CI->getZExtValue(); 651 if (Val < 1 || Val > 3) 652 break; 653 654 uint64_t NumBytes = 0; 655 if (Ty && Ty->isSized()) { 656 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 657 NumBytes = NumBits / 8; 658 if (!isPowerOf2_64(NumBits)) 659 NumBytes = 0; 660 } 661 662 if (NumBytes != (1ULL << Val)) 663 break; 664 665 Addr.setShift(Val); 666 Addr.setExtendType(AArch64_AM::LSL); 667 668 const Value *Src = U->getOperand(0); 669 if (const auto *I = dyn_cast<Instruction>(Src)) { 670 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 671 // Fold the zext or sext when it won't become a noop. 672 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 673 if (!isIntExtFree(ZE) && 674 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 675 Addr.setExtendType(AArch64_AM::UXTW); 676 Src = ZE->getOperand(0); 677 } 678 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 679 if (!isIntExtFree(SE) && 680 SE->getOperand(0)->getType()->isIntegerTy(32)) { 681 Addr.setExtendType(AArch64_AM::SXTW); 682 Src = SE->getOperand(0); 683 } 684 } 685 } 686 } 687 688 if (const auto *AI = dyn_cast<BinaryOperator>(Src)) 689 if (AI->getOpcode() == Instruction::And) { 690 const Value *LHS = AI->getOperand(0); 691 const Value *RHS = AI->getOperand(1); 692 693 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 694 if (C->getValue() == 0xffffffff) 695 std::swap(LHS, RHS); 696 697 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 698 if (C->getValue() == 0xffffffff) { 699 Addr.setExtendType(AArch64_AM::UXTW); 700 unsigned Reg = getRegForValue(LHS); 701 if (!Reg) 702 return false; 703 bool RegIsKill = hasTrivialKill(LHS); 704 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, 705 AArch64::sub_32); 706 Addr.setOffsetReg(Reg); 707 return true; 708 } 709 } 710 711 unsigned Reg = getRegForValue(Src); 712 if (!Reg) 713 return false; 714 Addr.setOffsetReg(Reg); 715 return true; 716 } 717 case Instruction::Mul: { 718 if (Addr.getOffsetReg()) 719 break; 720 721 if (!isMulPowOf2(U)) 722 break; 723 724 const Value *LHS = U->getOperand(0); 725 const Value *RHS = U->getOperand(1); 726 727 // Canonicalize power-of-2 value to the RHS. 728 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 729 if (C->getValue().isPowerOf2()) 730 std::swap(LHS, RHS); 731 732 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt."); 733 const auto *C = cast<ConstantInt>(RHS); 734 unsigned Val = C->getValue().logBase2(); 735 if (Val < 1 || Val > 3) 736 break; 737 738 uint64_t NumBytes = 0; 739 if (Ty && Ty->isSized()) { 740 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 741 NumBytes = NumBits / 8; 742 if (!isPowerOf2_64(NumBits)) 743 NumBytes = 0; 744 } 745 746 if (NumBytes != (1ULL << Val)) 747 break; 748 749 Addr.setShift(Val); 750 Addr.setExtendType(AArch64_AM::LSL); 751 752 const Value *Src = LHS; 753 if (const auto *I = dyn_cast<Instruction>(Src)) { 754 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 755 // Fold the zext or sext when it won't become a noop. 756 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 757 if (!isIntExtFree(ZE) && 758 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 759 Addr.setExtendType(AArch64_AM::UXTW); 760 Src = ZE->getOperand(0); 761 } 762 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 763 if (!isIntExtFree(SE) && 764 SE->getOperand(0)->getType()->isIntegerTy(32)) { 765 Addr.setExtendType(AArch64_AM::SXTW); 766 Src = SE->getOperand(0); 767 } 768 } 769 } 770 } 771 772 unsigned Reg = getRegForValue(Src); 773 if (!Reg) 774 return false; 775 Addr.setOffsetReg(Reg); 776 return true; 777 } 778 case Instruction::And: { 779 if (Addr.getOffsetReg()) 780 break; 781 782 if (!Ty || DL.getTypeSizeInBits(Ty) != 8) 783 break; 784 785 const Value *LHS = U->getOperand(0); 786 const Value *RHS = U->getOperand(1); 787 788 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 789 if (C->getValue() == 0xffffffff) 790 std::swap(LHS, RHS); 791 792 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 793 if (C->getValue() == 0xffffffff) { 794 Addr.setShift(0); 795 Addr.setExtendType(AArch64_AM::LSL); 796 Addr.setExtendType(AArch64_AM::UXTW); 797 798 unsigned Reg = getRegForValue(LHS); 799 if (!Reg) 800 return false; 801 bool RegIsKill = hasTrivialKill(LHS); 802 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, 803 AArch64::sub_32); 804 Addr.setOffsetReg(Reg); 805 return true; 806 } 807 break; 808 } 809 case Instruction::SExt: 810 case Instruction::ZExt: { 811 if (!Addr.getReg() || Addr.getOffsetReg()) 812 break; 813 814 const Value *Src = nullptr; 815 // Fold the zext or sext when it won't become a noop. 816 if (const auto *ZE = dyn_cast<ZExtInst>(U)) { 817 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { 818 Addr.setExtendType(AArch64_AM::UXTW); 819 Src = ZE->getOperand(0); 820 } 821 } else if (const auto *SE = dyn_cast<SExtInst>(U)) { 822 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { 823 Addr.setExtendType(AArch64_AM::SXTW); 824 Src = SE->getOperand(0); 825 } 826 } 827 828 if (!Src) 829 break; 830 831 Addr.setShift(0); 832 unsigned Reg = getRegForValue(Src); 833 if (!Reg) 834 return false; 835 Addr.setOffsetReg(Reg); 836 return true; 837 } 838 } // end switch 839 840 if (Addr.isRegBase() && !Addr.getReg()) { 841 unsigned Reg = getRegForValue(Obj); 842 if (!Reg) 843 return false; 844 Addr.setReg(Reg); 845 return true; 846 } 847 848 if (!Addr.getOffsetReg()) { 849 unsigned Reg = getRegForValue(Obj); 850 if (!Reg) 851 return false; 852 Addr.setOffsetReg(Reg); 853 return true; 854 } 855 856 return false; 857 } 858 859 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { 860 const User *U = nullptr; 861 unsigned Opcode = Instruction::UserOp1; 862 bool InMBB = true; 863 864 if (const auto *I = dyn_cast<Instruction>(V)) { 865 Opcode = I->getOpcode(); 866 U = I; 867 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); 868 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) { 869 Opcode = C->getOpcode(); 870 U = C; 871 } 872 873 switch (Opcode) { 874 default: break; 875 case Instruction::BitCast: 876 // Look past bitcasts if its operand is in the same BB. 877 if (InMBB) 878 return computeCallAddress(U->getOperand(0), Addr); 879 break; 880 case Instruction::IntToPtr: 881 // Look past no-op inttoptrs if its operand is in the same BB. 882 if (InMBB && 883 TLI.getValueType(DL, U->getOperand(0)->getType()) == 884 TLI.getPointerTy(DL)) 885 return computeCallAddress(U->getOperand(0), Addr); 886 break; 887 case Instruction::PtrToInt: 888 // Look past no-op ptrtoints if its operand is in the same BB. 889 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 890 return computeCallAddress(U->getOperand(0), Addr); 891 break; 892 } 893 894 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 895 Addr.setGlobalValue(GV); 896 return true; 897 } 898 899 // If all else fails, try to materialize the value in a register. 900 if (!Addr.getGlobalValue()) { 901 Addr.setReg(getRegForValue(V)); 902 return Addr.getReg() != 0; 903 } 904 905 return false; 906 } 907 908 909 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { 910 EVT evt = TLI.getValueType(DL, Ty, true); 911 912 // Only handle simple types. 913 if (evt == MVT::Other || !evt.isSimple()) 914 return false; 915 VT = evt.getSimpleVT(); 916 917 // This is a legal type, but it's not something we handle in fast-isel. 918 if (VT == MVT::f128) 919 return false; 920 921 // Handle all other legal types, i.e. a register that will directly hold this 922 // value. 923 return TLI.isTypeLegal(VT); 924 } 925 926 /// \brief Determine if the value type is supported by FastISel. 927 /// 928 /// FastISel for AArch64 can handle more value types than are legal. This adds 929 /// simple value type such as i1, i8, and i16. 930 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) { 931 if (Ty->isVectorTy() && !IsVectorAllowed) 932 return false; 933 934 if (isTypeLegal(Ty, VT)) 935 return true; 936 937 // If this is a type than can be sign or zero-extended to a basic operation 938 // go ahead and accept it now. 939 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) 940 return true; 941 942 return false; 943 } 944 945 bool AArch64FastISel::isValueAvailable(const Value *V) const { 946 if (!isa<Instruction>(V)) 947 return true; 948 949 const auto *I = cast<Instruction>(V); 950 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) 951 return true; 952 953 return false; 954 } 955 956 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { 957 unsigned ScaleFactor = getImplicitScaleFactor(VT); 958 if (!ScaleFactor) 959 return false; 960 961 bool ImmediateOffsetNeedsLowering = false; 962 bool RegisterOffsetNeedsLowering = false; 963 int64_t Offset = Addr.getOffset(); 964 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset)) 965 ImmediateOffsetNeedsLowering = true; 966 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) && 967 !isUInt<12>(Offset / ScaleFactor)) 968 ImmediateOffsetNeedsLowering = true; 969 970 // Cannot encode an offset register and an immediate offset in the same 971 // instruction. Fold the immediate offset into the load/store instruction and 972 // emit an additional add to take care of the offset register. 973 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) 974 RegisterOffsetNeedsLowering = true; 975 976 // Cannot encode zero register as base. 977 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg()) 978 RegisterOffsetNeedsLowering = true; 979 980 // If this is a stack pointer and the offset needs to be simplified then put 981 // the alloca address into a register, set the base type back to register and 982 // continue. This should almost never happen. 983 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase()) 984 { 985 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); 986 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 987 ResultReg) 988 .addFrameIndex(Addr.getFI()) 989 .addImm(0) 990 .addImm(0); 991 Addr.setKind(Address::RegBase); 992 Addr.setReg(ResultReg); 993 } 994 995 if (RegisterOffsetNeedsLowering) { 996 unsigned ResultReg = 0; 997 if (Addr.getReg()) { 998 if (Addr.getExtendType() == AArch64_AM::SXTW || 999 Addr.getExtendType() == AArch64_AM::UXTW ) 1000 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1001 /*TODO:IsKill=*/false, Addr.getOffsetReg(), 1002 /*TODO:IsKill=*/false, Addr.getExtendType(), 1003 Addr.getShift()); 1004 else 1005 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1006 /*TODO:IsKill=*/false, Addr.getOffsetReg(), 1007 /*TODO:IsKill=*/false, AArch64_AM::LSL, 1008 Addr.getShift()); 1009 } else { 1010 if (Addr.getExtendType() == AArch64_AM::UXTW) 1011 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1012 /*Op0IsKill=*/false, Addr.getShift(), 1013 /*IsZExt=*/true); 1014 else if (Addr.getExtendType() == AArch64_AM::SXTW) 1015 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1016 /*Op0IsKill=*/false, Addr.getShift(), 1017 /*IsZExt=*/false); 1018 else 1019 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(), 1020 /*Op0IsKill=*/false, Addr.getShift()); 1021 } 1022 if (!ResultReg) 1023 return false; 1024 1025 Addr.setReg(ResultReg); 1026 Addr.setOffsetReg(0); 1027 Addr.setShift(0); 1028 Addr.setExtendType(AArch64_AM::InvalidShiftExtend); 1029 } 1030 1031 // Since the offset is too large for the load/store instruction get the 1032 // reg+offset into a register. 1033 if (ImmediateOffsetNeedsLowering) { 1034 unsigned ResultReg; 1035 if (Addr.getReg()) 1036 // Try to fold the immediate into the add instruction. 1037 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset); 1038 else 1039 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); 1040 1041 if (!ResultReg) 1042 return false; 1043 Addr.setReg(ResultReg); 1044 Addr.setOffset(0); 1045 } 1046 return true; 1047 } 1048 1049 void AArch64FastISel::addLoadStoreOperands(Address &Addr, 1050 const MachineInstrBuilder &MIB, 1051 unsigned Flags, 1052 unsigned ScaleFactor, 1053 MachineMemOperand *MMO) { 1054 int64_t Offset = Addr.getOffset() / ScaleFactor; 1055 // Frame base works a bit differently. Handle it separately. 1056 if (Addr.isFIBase()) { 1057 int FI = Addr.getFI(); 1058 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size 1059 // and alignment should be based on the VT. 1060 MMO = FuncInfo.MF->getMachineMemOperand( 1061 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags, 1062 MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); 1063 // Now add the rest of the operands. 1064 MIB.addFrameIndex(FI).addImm(Offset); 1065 } else { 1066 assert(Addr.isRegBase() && "Unexpected address kind."); 1067 const MCInstrDesc &II = MIB->getDesc(); 1068 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0; 1069 Addr.setReg( 1070 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx)); 1071 Addr.setOffsetReg( 1072 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1)); 1073 if (Addr.getOffsetReg()) { 1074 assert(Addr.getOffset() == 0 && "Unexpected offset"); 1075 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || 1076 Addr.getExtendType() == AArch64_AM::SXTX; 1077 MIB.addReg(Addr.getReg()); 1078 MIB.addReg(Addr.getOffsetReg()); 1079 MIB.addImm(IsSigned); 1080 MIB.addImm(Addr.getShift() != 0); 1081 } else 1082 MIB.addReg(Addr.getReg()).addImm(Offset); 1083 } 1084 1085 if (MMO) 1086 MIB.addMemOperand(MMO); 1087 } 1088 1089 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 1090 const Value *RHS, bool SetFlags, 1091 bool WantResult, bool IsZExt) { 1092 AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; 1093 bool NeedExtend = false; 1094 switch (RetVT.SimpleTy) { 1095 default: 1096 return 0; 1097 case MVT::i1: 1098 NeedExtend = true; 1099 break; 1100 case MVT::i8: 1101 NeedExtend = true; 1102 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB; 1103 break; 1104 case MVT::i16: 1105 NeedExtend = true; 1106 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH; 1107 break; 1108 case MVT::i32: // fall-through 1109 case MVT::i64: 1110 break; 1111 } 1112 MVT SrcVT = RetVT; 1113 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32); 1114 1115 // Canonicalize immediates to the RHS first. 1116 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS)) 1117 std::swap(LHS, RHS); 1118 1119 // Canonicalize mul by power of 2 to the RHS. 1120 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1121 if (isMulPowOf2(LHS)) 1122 std::swap(LHS, RHS); 1123 1124 // Canonicalize shift immediate to the RHS. 1125 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1126 if (const auto *SI = dyn_cast<BinaryOperator>(LHS)) 1127 if (isa<ConstantInt>(SI->getOperand(1))) 1128 if (SI->getOpcode() == Instruction::Shl || 1129 SI->getOpcode() == Instruction::LShr || 1130 SI->getOpcode() == Instruction::AShr ) 1131 std::swap(LHS, RHS); 1132 1133 unsigned LHSReg = getRegForValue(LHS); 1134 if (!LHSReg) 1135 return 0; 1136 bool LHSIsKill = hasTrivialKill(LHS); 1137 1138 if (NeedExtend) 1139 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt); 1140 1141 unsigned ResultReg = 0; 1142 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1143 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue(); 1144 if (C->isNegative()) 1145 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm, 1146 SetFlags, WantResult); 1147 else 1148 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags, 1149 WantResult); 1150 } else if (const auto *C = dyn_cast<Constant>(RHS)) 1151 if (C->isNullValue()) 1152 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags, 1153 WantResult); 1154 1155 if (ResultReg) 1156 return ResultReg; 1157 1158 // Only extend the RHS within the instruction if there is a valid extend type. 1159 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && 1160 isValueAvailable(RHS)) { 1161 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) 1162 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) 1163 if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) { 1164 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 1165 if (!RHSReg) 1166 return 0; 1167 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 1168 return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, 1169 RHSIsKill, ExtendType, C->getZExtValue(), 1170 SetFlags, WantResult); 1171 } 1172 unsigned RHSReg = getRegForValue(RHS); 1173 if (!RHSReg) 1174 return 0; 1175 bool RHSIsKill = hasTrivialKill(RHS); 1176 return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 1177 ExtendType, 0, SetFlags, WantResult); 1178 } 1179 1180 // Check if the mul can be folded into the instruction. 1181 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1182 if (isMulPowOf2(RHS)) { 1183 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1184 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1185 1186 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1187 if (C->getValue().isPowerOf2()) 1188 std::swap(MulLHS, MulRHS); 1189 1190 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1191 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1192 unsigned RHSReg = getRegForValue(MulLHS); 1193 if (!RHSReg) 1194 return 0; 1195 bool RHSIsKill = hasTrivialKill(MulLHS); 1196 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, 1197 RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags, 1198 WantResult); 1199 if (ResultReg) 1200 return ResultReg; 1201 } 1202 } 1203 1204 // Check if the shift can be folded into the instruction. 1205 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1206 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) { 1207 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1208 AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; 1209 switch (SI->getOpcode()) { 1210 default: break; 1211 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; 1212 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; 1213 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; 1214 } 1215 uint64_t ShiftVal = C->getZExtValue(); 1216 if (ShiftType != AArch64_AM::InvalidShiftExtend) { 1217 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 1218 if (!RHSReg) 1219 return 0; 1220 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 1221 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, 1222 RHSIsKill, ShiftType, ShiftVal, SetFlags, 1223 WantResult); 1224 if (ResultReg) 1225 return ResultReg; 1226 } 1227 } 1228 } 1229 } 1230 1231 unsigned RHSReg = getRegForValue(RHS); 1232 if (!RHSReg) 1233 return 0; 1234 bool RHSIsKill = hasTrivialKill(RHS); 1235 1236 if (NeedExtend) 1237 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt); 1238 1239 return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 1240 SetFlags, WantResult); 1241 } 1242 1243 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 1244 bool LHSIsKill, unsigned RHSReg, 1245 bool RHSIsKill, bool SetFlags, 1246 bool WantResult) { 1247 assert(LHSReg && RHSReg && "Invalid register number."); 1248 1249 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1250 return 0; 1251 1252 static const unsigned OpcTable[2][2][2] = { 1253 { { AArch64::SUBWrr, AArch64::SUBXrr }, 1254 { AArch64::ADDWrr, AArch64::ADDXrr } }, 1255 { { AArch64::SUBSWrr, AArch64::SUBSXrr }, 1256 { AArch64::ADDSWrr, AArch64::ADDSXrr } } 1257 }; 1258 bool Is64Bit = RetVT == MVT::i64; 1259 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1260 const TargetRegisterClass *RC = 1261 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1262 unsigned ResultReg; 1263 if (WantResult) 1264 ResultReg = createResultReg(RC); 1265 else 1266 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1267 1268 const MCInstrDesc &II = TII.get(Opc); 1269 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1270 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1271 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1272 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1273 .addReg(RHSReg, getKillRegState(RHSIsKill)); 1274 return ResultReg; 1275 } 1276 1277 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 1278 bool LHSIsKill, uint64_t Imm, 1279 bool SetFlags, bool WantResult) { 1280 assert(LHSReg && "Invalid register number."); 1281 1282 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1283 return 0; 1284 1285 unsigned ShiftImm; 1286 if (isUInt<12>(Imm)) 1287 ShiftImm = 0; 1288 else if ((Imm & 0xfff000) == Imm) { 1289 ShiftImm = 12; 1290 Imm >>= 12; 1291 } else 1292 return 0; 1293 1294 static const unsigned OpcTable[2][2][2] = { 1295 { { AArch64::SUBWri, AArch64::SUBXri }, 1296 { AArch64::ADDWri, AArch64::ADDXri } }, 1297 { { AArch64::SUBSWri, AArch64::SUBSXri }, 1298 { AArch64::ADDSWri, AArch64::ADDSXri } } 1299 }; 1300 bool Is64Bit = RetVT == MVT::i64; 1301 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1302 const TargetRegisterClass *RC; 1303 if (SetFlags) 1304 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1305 else 1306 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1307 unsigned ResultReg; 1308 if (WantResult) 1309 ResultReg = createResultReg(RC); 1310 else 1311 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1312 1313 const MCInstrDesc &II = TII.get(Opc); 1314 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1315 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1316 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1317 .addImm(Imm) 1318 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm)); 1319 return ResultReg; 1320 } 1321 1322 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 1323 bool LHSIsKill, unsigned RHSReg, 1324 bool RHSIsKill, 1325 AArch64_AM::ShiftExtendType ShiftType, 1326 uint64_t ShiftImm, bool SetFlags, 1327 bool WantResult) { 1328 assert(LHSReg && RHSReg && "Invalid register number."); 1329 1330 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1331 return 0; 1332 1333 // Don't deal with undefined shifts. 1334 if (ShiftImm >= RetVT.getSizeInBits()) 1335 return 0; 1336 1337 static const unsigned OpcTable[2][2][2] = { 1338 { { AArch64::SUBWrs, AArch64::SUBXrs }, 1339 { AArch64::ADDWrs, AArch64::ADDXrs } }, 1340 { { AArch64::SUBSWrs, AArch64::SUBSXrs }, 1341 { AArch64::ADDSWrs, AArch64::ADDSXrs } } 1342 }; 1343 bool Is64Bit = RetVT == MVT::i64; 1344 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1345 const TargetRegisterClass *RC = 1346 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1347 unsigned ResultReg; 1348 if (WantResult) 1349 ResultReg = createResultReg(RC); 1350 else 1351 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1352 1353 const MCInstrDesc &II = TII.get(Opc); 1354 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1355 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1356 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1357 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1358 .addReg(RHSReg, getKillRegState(RHSIsKill)) 1359 .addImm(getShifterImm(ShiftType, ShiftImm)); 1360 return ResultReg; 1361 } 1362 1363 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 1364 bool LHSIsKill, unsigned RHSReg, 1365 bool RHSIsKill, 1366 AArch64_AM::ShiftExtendType ExtType, 1367 uint64_t ShiftImm, bool SetFlags, 1368 bool WantResult) { 1369 assert(LHSReg && RHSReg && "Invalid register number."); 1370 1371 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1372 return 0; 1373 1374 if (ShiftImm >= 4) 1375 return 0; 1376 1377 static const unsigned OpcTable[2][2][2] = { 1378 { { AArch64::SUBWrx, AArch64::SUBXrx }, 1379 { AArch64::ADDWrx, AArch64::ADDXrx } }, 1380 { { AArch64::SUBSWrx, AArch64::SUBSXrx }, 1381 { AArch64::ADDSWrx, AArch64::ADDSXrx } } 1382 }; 1383 bool Is64Bit = RetVT == MVT::i64; 1384 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1385 const TargetRegisterClass *RC = nullptr; 1386 if (SetFlags) 1387 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1388 else 1389 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1390 unsigned ResultReg; 1391 if (WantResult) 1392 ResultReg = createResultReg(RC); 1393 else 1394 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1395 1396 const MCInstrDesc &II = TII.get(Opc); 1397 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1398 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1399 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1400 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1401 .addReg(RHSReg, getKillRegState(RHSIsKill)) 1402 .addImm(getArithExtendImm(ExtType, ShiftImm)); 1403 return ResultReg; 1404 } 1405 1406 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { 1407 Type *Ty = LHS->getType(); 1408 EVT EVT = TLI.getValueType(DL, Ty, true); 1409 if (!EVT.isSimple()) 1410 return false; 1411 MVT VT = EVT.getSimpleVT(); 1412 1413 switch (VT.SimpleTy) { 1414 default: 1415 return false; 1416 case MVT::i1: 1417 case MVT::i8: 1418 case MVT::i16: 1419 case MVT::i32: 1420 case MVT::i64: 1421 return emitICmp(VT, LHS, RHS, IsZExt); 1422 case MVT::f32: 1423 case MVT::f64: 1424 return emitFCmp(VT, LHS, RHS); 1425 } 1426 } 1427 1428 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, 1429 bool IsZExt) { 1430 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false, 1431 IsZExt) != 0; 1432 } 1433 1434 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 1435 uint64_t Imm) { 1436 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm, 1437 /*SetFlags=*/true, /*WantResult=*/false) != 0; 1438 } 1439 1440 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { 1441 if (RetVT != MVT::f32 && RetVT != MVT::f64) 1442 return false; 1443 1444 // Check to see if the 2nd operand is a constant that we can encode directly 1445 // in the compare. 1446 bool UseImm = false; 1447 if (const auto *CFP = dyn_cast<ConstantFP>(RHS)) 1448 if (CFP->isZero() && !CFP->isNegative()) 1449 UseImm = true; 1450 1451 unsigned LHSReg = getRegForValue(LHS); 1452 if (!LHSReg) 1453 return false; 1454 bool LHSIsKill = hasTrivialKill(LHS); 1455 1456 if (UseImm) { 1457 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri; 1458 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 1459 .addReg(LHSReg, getKillRegState(LHSIsKill)); 1460 return true; 1461 } 1462 1463 unsigned RHSReg = getRegForValue(RHS); 1464 if (!RHSReg) 1465 return false; 1466 bool RHSIsKill = hasTrivialKill(RHS); 1467 1468 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr; 1469 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 1470 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1471 .addReg(RHSReg, getKillRegState(RHSIsKill)); 1472 return true; 1473 } 1474 1475 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 1476 bool SetFlags, bool WantResult, bool IsZExt) { 1477 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult, 1478 IsZExt); 1479 } 1480 1481 /// \brief This method is a wrapper to simplify add emission. 1482 /// 1483 /// First try to emit an add with an immediate operand using emitAddSub_ri. If 1484 /// that fails, then try to materialize the immediate into a register and use 1485 /// emitAddSub_rr instead. 1486 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, 1487 int64_t Imm) { 1488 unsigned ResultReg; 1489 if (Imm < 0) 1490 ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm); 1491 else 1492 ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm); 1493 1494 if (ResultReg) 1495 return ResultReg; 1496 1497 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm); 1498 if (!CReg) 1499 return 0; 1500 1501 ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true); 1502 return ResultReg; 1503 } 1504 1505 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 1506 bool SetFlags, bool WantResult, bool IsZExt) { 1507 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, 1508 IsZExt); 1509 } 1510 1511 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg, 1512 bool LHSIsKill, unsigned RHSReg, 1513 bool RHSIsKill, bool WantResult) { 1514 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg, 1515 RHSIsKill, /*SetFlags=*/true, WantResult); 1516 } 1517 1518 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg, 1519 bool LHSIsKill, unsigned RHSReg, 1520 bool RHSIsKill, 1521 AArch64_AM::ShiftExtendType ShiftType, 1522 uint64_t ShiftImm, bool WantResult) { 1523 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg, 1524 RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true, 1525 WantResult); 1526 } 1527 1528 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, 1529 const Value *LHS, const Value *RHS) { 1530 // Canonicalize immediates to the RHS first. 1531 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS)) 1532 std::swap(LHS, RHS); 1533 1534 // Canonicalize mul by power-of-2 to the RHS. 1535 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1536 if (isMulPowOf2(LHS)) 1537 std::swap(LHS, RHS); 1538 1539 // Canonicalize shift immediate to the RHS. 1540 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1541 if (const auto *SI = dyn_cast<ShlOperator>(LHS)) 1542 if (isa<ConstantInt>(SI->getOperand(1))) 1543 std::swap(LHS, RHS); 1544 1545 unsigned LHSReg = getRegForValue(LHS); 1546 if (!LHSReg) 1547 return 0; 1548 bool LHSIsKill = hasTrivialKill(LHS); 1549 1550 unsigned ResultReg = 0; 1551 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1552 uint64_t Imm = C->getZExtValue(); 1553 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm); 1554 } 1555 if (ResultReg) 1556 return ResultReg; 1557 1558 // Check if the mul can be folded into the instruction. 1559 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1560 if (isMulPowOf2(RHS)) { 1561 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1562 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1563 1564 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1565 if (C->getValue().isPowerOf2()) 1566 std::swap(MulLHS, MulRHS); 1567 1568 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1569 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1570 1571 unsigned RHSReg = getRegForValue(MulLHS); 1572 if (!RHSReg) 1573 return 0; 1574 bool RHSIsKill = hasTrivialKill(MulLHS); 1575 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, 1576 RHSIsKill, ShiftVal); 1577 if (ResultReg) 1578 return ResultReg; 1579 } 1580 } 1581 1582 // Check if the shift can be folded into the instruction. 1583 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1584 if (const auto *SI = dyn_cast<ShlOperator>(RHS)) 1585 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1586 uint64_t ShiftVal = C->getZExtValue(); 1587 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 1588 if (!RHSReg) 1589 return 0; 1590 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 1591 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, 1592 RHSIsKill, ShiftVal); 1593 if (ResultReg) 1594 return ResultReg; 1595 } 1596 } 1597 1598 unsigned RHSReg = getRegForValue(RHS); 1599 if (!RHSReg) 1600 return 0; 1601 bool RHSIsKill = hasTrivialKill(RHS); 1602 1603 MVT VT = std::max(MVT::i32, RetVT.SimpleTy); 1604 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 1605 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1606 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1607 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 1608 } 1609 return ResultReg; 1610 } 1611 1612 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, 1613 unsigned LHSReg, bool LHSIsKill, 1614 uint64_t Imm) { 1615 assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) && 1616 "ISD nodes are not consecutive!"); 1617 static const unsigned OpcTable[3][2] = { 1618 { AArch64::ANDWri, AArch64::ANDXri }, 1619 { AArch64::ORRWri, AArch64::ORRXri }, 1620 { AArch64::EORWri, AArch64::EORXri } 1621 }; 1622 const TargetRegisterClass *RC; 1623 unsigned Opc; 1624 unsigned RegSize; 1625 switch (RetVT.SimpleTy) { 1626 default: 1627 return 0; 1628 case MVT::i1: 1629 case MVT::i8: 1630 case MVT::i16: 1631 case MVT::i32: { 1632 unsigned Idx = ISDOpc - ISD::AND; 1633 Opc = OpcTable[Idx][0]; 1634 RC = &AArch64::GPR32spRegClass; 1635 RegSize = 32; 1636 break; 1637 } 1638 case MVT::i64: 1639 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1640 RC = &AArch64::GPR64spRegClass; 1641 RegSize = 64; 1642 break; 1643 } 1644 1645 if (!AArch64_AM::isLogicalImmediate(Imm, RegSize)) 1646 return 0; 1647 1648 unsigned ResultReg = 1649 fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill, 1650 AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); 1651 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) { 1652 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1653 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 1654 } 1655 return ResultReg; 1656 } 1657 1658 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, 1659 unsigned LHSReg, bool LHSIsKill, 1660 unsigned RHSReg, bool RHSIsKill, 1661 uint64_t ShiftImm) { 1662 assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) && 1663 "ISD nodes are not consecutive!"); 1664 static const unsigned OpcTable[3][2] = { 1665 { AArch64::ANDWrs, AArch64::ANDXrs }, 1666 { AArch64::ORRWrs, AArch64::ORRXrs }, 1667 { AArch64::EORWrs, AArch64::EORXrs } 1668 }; 1669 1670 // Don't deal with undefined shifts. 1671 if (ShiftImm >= RetVT.getSizeInBits()) 1672 return 0; 1673 1674 const TargetRegisterClass *RC; 1675 unsigned Opc; 1676 switch (RetVT.SimpleTy) { 1677 default: 1678 return 0; 1679 case MVT::i1: 1680 case MVT::i8: 1681 case MVT::i16: 1682 case MVT::i32: 1683 Opc = OpcTable[ISDOpc - ISD::AND][0]; 1684 RC = &AArch64::GPR32RegClass; 1685 break; 1686 case MVT::i64: 1687 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1688 RC = &AArch64::GPR64RegClass; 1689 break; 1690 } 1691 unsigned ResultReg = 1692 fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 1693 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm)); 1694 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1695 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1696 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 1697 } 1698 return ResultReg; 1699 } 1700 1701 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 1702 uint64_t Imm) { 1703 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm); 1704 } 1705 1706 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, 1707 bool WantZExt, MachineMemOperand *MMO) { 1708 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 1709 return 0; 1710 1711 // Simplify this down to something we can handle. 1712 if (!simplifyAddress(Addr, VT)) 1713 return 0; 1714 1715 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1716 if (!ScaleFactor) 1717 llvm_unreachable("Unexpected value type."); 1718 1719 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 1720 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 1721 bool UseScaled = true; 1722 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 1723 UseScaled = false; 1724 ScaleFactor = 1; 1725 } 1726 1727 static const unsigned GPOpcTable[2][8][4] = { 1728 // Sign-extend. 1729 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, 1730 AArch64::LDURXi }, 1731 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, 1732 AArch64::LDURXi }, 1733 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, 1734 AArch64::LDRXui }, 1735 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, 1736 AArch64::LDRXui }, 1737 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, 1738 AArch64::LDRXroX }, 1739 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, 1740 AArch64::LDRXroX }, 1741 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, 1742 AArch64::LDRXroW }, 1743 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, 1744 AArch64::LDRXroW } 1745 }, 1746 // Zero-extend. 1747 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1748 AArch64::LDURXi }, 1749 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1750 AArch64::LDURXi }, 1751 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1752 AArch64::LDRXui }, 1753 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1754 AArch64::LDRXui }, 1755 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1756 AArch64::LDRXroX }, 1757 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1758 AArch64::LDRXroX }, 1759 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1760 AArch64::LDRXroW }, 1761 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1762 AArch64::LDRXroW } 1763 } 1764 }; 1765 1766 static const unsigned FPOpcTable[4][2] = { 1767 { AArch64::LDURSi, AArch64::LDURDi }, 1768 { AArch64::LDRSui, AArch64::LDRDui }, 1769 { AArch64::LDRSroX, AArch64::LDRDroX }, 1770 { AArch64::LDRSroW, AArch64::LDRDroW } 1771 }; 1772 1773 unsigned Opc; 1774 const TargetRegisterClass *RC; 1775 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 1776 Addr.getOffsetReg(); 1777 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 1778 if (Addr.getExtendType() == AArch64_AM::UXTW || 1779 Addr.getExtendType() == AArch64_AM::SXTW) 1780 Idx++; 1781 1782 bool IsRet64Bit = RetVT == MVT::i64; 1783 switch (VT.SimpleTy) { 1784 default: 1785 llvm_unreachable("Unexpected value type."); 1786 case MVT::i1: // Intentional fall-through. 1787 case MVT::i8: 1788 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; 1789 RC = (IsRet64Bit && !WantZExt) ? 1790 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1791 break; 1792 case MVT::i16: 1793 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; 1794 RC = (IsRet64Bit && !WantZExt) ? 1795 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1796 break; 1797 case MVT::i32: 1798 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; 1799 RC = (IsRet64Bit && !WantZExt) ? 1800 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1801 break; 1802 case MVT::i64: 1803 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; 1804 RC = &AArch64::GPR64RegClass; 1805 break; 1806 case MVT::f32: 1807 Opc = FPOpcTable[Idx][0]; 1808 RC = &AArch64::FPR32RegClass; 1809 break; 1810 case MVT::f64: 1811 Opc = FPOpcTable[Idx][1]; 1812 RC = &AArch64::FPR64RegClass; 1813 break; 1814 } 1815 1816 // Create the base instruction, then add the operands. 1817 unsigned ResultReg = createResultReg(RC); 1818 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1819 TII.get(Opc), ResultReg); 1820 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); 1821 1822 // Loading an i1 requires special handling. 1823 if (VT == MVT::i1) { 1824 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1); 1825 assert(ANDReg && "Unexpected AND instruction emission failure."); 1826 ResultReg = ANDReg; 1827 } 1828 1829 // For zero-extending loads to 64bit we emit a 32bit load and then convert 1830 // the 32bit reg to a 64bit reg. 1831 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) { 1832 unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass); 1833 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1834 TII.get(AArch64::SUBREG_TO_REG), Reg64) 1835 .addImm(0) 1836 .addReg(ResultReg, getKillRegState(true)) 1837 .addImm(AArch64::sub_32); 1838 ResultReg = Reg64; 1839 } 1840 return ResultReg; 1841 } 1842 1843 bool AArch64FastISel::selectAddSub(const Instruction *I) { 1844 MVT VT; 1845 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1846 return false; 1847 1848 if (VT.isVector()) 1849 return selectOperator(I, I->getOpcode()); 1850 1851 unsigned ResultReg; 1852 switch (I->getOpcode()) { 1853 default: 1854 llvm_unreachable("Unexpected instruction."); 1855 case Instruction::Add: 1856 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1)); 1857 break; 1858 case Instruction::Sub: 1859 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1)); 1860 break; 1861 } 1862 if (!ResultReg) 1863 return false; 1864 1865 updateValueMap(I, ResultReg); 1866 return true; 1867 } 1868 1869 bool AArch64FastISel::selectLogicalOp(const Instruction *I) { 1870 MVT VT; 1871 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1872 return false; 1873 1874 if (VT.isVector()) 1875 return selectOperator(I, I->getOpcode()); 1876 1877 unsigned ResultReg; 1878 switch (I->getOpcode()) { 1879 default: 1880 llvm_unreachable("Unexpected instruction."); 1881 case Instruction::And: 1882 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1)); 1883 break; 1884 case Instruction::Or: 1885 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1)); 1886 break; 1887 case Instruction::Xor: 1888 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1)); 1889 break; 1890 } 1891 if (!ResultReg) 1892 return false; 1893 1894 updateValueMap(I, ResultReg); 1895 return true; 1896 } 1897 1898 bool AArch64FastISel::selectLoad(const Instruction *I) { 1899 MVT VT; 1900 // Verify we have a legal type before going any further. Currently, we handle 1901 // simple types that will directly fit in a register (i32/f32/i64/f64) or 1902 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 1903 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) || 1904 cast<LoadInst>(I)->isAtomic()) 1905 return false; 1906 1907 // See if we can handle this address. 1908 Address Addr; 1909 if (!computeAddress(I->getOperand(0), Addr, I->getType())) 1910 return false; 1911 1912 // Fold the following sign-/zero-extend into the load instruction. 1913 bool WantZExt = true; 1914 MVT RetVT = VT; 1915 const Value *IntExtVal = nullptr; 1916 if (I->hasOneUse()) { 1917 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) { 1918 if (isTypeSupported(ZE->getType(), RetVT)) 1919 IntExtVal = ZE; 1920 else 1921 RetVT = VT; 1922 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) { 1923 if (isTypeSupported(SE->getType(), RetVT)) 1924 IntExtVal = SE; 1925 else 1926 RetVT = VT; 1927 WantZExt = false; 1928 } 1929 } 1930 1931 unsigned ResultReg = 1932 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I)); 1933 if (!ResultReg) 1934 return false; 1935 1936 // There are a few different cases we have to handle, because the load or the 1937 // sign-/zero-extend might not be selected by FastISel if we fall-back to 1938 // SelectionDAG. There is also an ordering issue when both instructions are in 1939 // different basic blocks. 1940 // 1.) The load instruction is selected by FastISel, but the integer extend 1941 // not. This usually happens when the integer extend is in a different 1942 // basic block and SelectionDAG took over for that basic block. 1943 // 2.) The load instruction is selected before the integer extend. This only 1944 // happens when the integer extend is in a different basic block. 1945 // 3.) The load instruction is selected by SelectionDAG and the integer extend 1946 // by FastISel. This happens if there are instructions between the load 1947 // and the integer extend that couldn't be selected by FastISel. 1948 if (IntExtVal) { 1949 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG 1950 // could select it. Emit a copy to subreg if necessary. FastISel will remove 1951 // it when it selects the integer extend. 1952 unsigned Reg = lookUpRegForValue(IntExtVal); 1953 auto *MI = MRI.getUniqueVRegDef(Reg); 1954 if (!MI) { 1955 if (RetVT == MVT::i64 && VT <= MVT::i32) { 1956 if (WantZExt) { 1957 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). 1958 std::prev(FuncInfo.InsertPt)->eraseFromParent(); 1959 ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg(); 1960 } else 1961 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, 1962 /*IsKill=*/true, 1963 AArch64::sub_32); 1964 } 1965 updateValueMap(I, ResultReg); 1966 return true; 1967 } 1968 1969 // The integer extend has already been emitted - delete all the instructions 1970 // that have been emitted by the integer extend lowering code and use the 1971 // result from the load instruction directly. 1972 while (MI) { 1973 Reg = 0; 1974 for (auto &Opnd : MI->uses()) { 1975 if (Opnd.isReg()) { 1976 Reg = Opnd.getReg(); 1977 break; 1978 } 1979 } 1980 MI->eraseFromParent(); 1981 MI = nullptr; 1982 if (Reg) 1983 MI = MRI.getUniqueVRegDef(Reg); 1984 } 1985 updateValueMap(IntExtVal, ResultReg); 1986 return true; 1987 } 1988 1989 updateValueMap(I, ResultReg); 1990 return true; 1991 } 1992 1993 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, 1994 MachineMemOperand *MMO) { 1995 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 1996 return false; 1997 1998 // Simplify this down to something we can handle. 1999 if (!simplifyAddress(Addr, VT)) 2000 return false; 2001 2002 unsigned ScaleFactor = getImplicitScaleFactor(VT); 2003 if (!ScaleFactor) 2004 llvm_unreachable("Unexpected value type."); 2005 2006 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 2007 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 2008 bool UseScaled = true; 2009 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 2010 UseScaled = false; 2011 ScaleFactor = 1; 2012 } 2013 2014 static const unsigned OpcTable[4][6] = { 2015 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, 2016 AArch64::STURSi, AArch64::STURDi }, 2017 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, 2018 AArch64::STRSui, AArch64::STRDui }, 2019 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, 2020 AArch64::STRSroX, AArch64::STRDroX }, 2021 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, 2022 AArch64::STRSroW, AArch64::STRDroW } 2023 }; 2024 2025 unsigned Opc; 2026 bool VTIsi1 = false; 2027 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 2028 Addr.getOffsetReg(); 2029 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 2030 if (Addr.getExtendType() == AArch64_AM::UXTW || 2031 Addr.getExtendType() == AArch64_AM::SXTW) 2032 Idx++; 2033 2034 switch (VT.SimpleTy) { 2035 default: llvm_unreachable("Unexpected value type."); 2036 case MVT::i1: VTIsi1 = true; 2037 case MVT::i8: Opc = OpcTable[Idx][0]; break; 2038 case MVT::i16: Opc = OpcTable[Idx][1]; break; 2039 case MVT::i32: Opc = OpcTable[Idx][2]; break; 2040 case MVT::i64: Opc = OpcTable[Idx][3]; break; 2041 case MVT::f32: Opc = OpcTable[Idx][4]; break; 2042 case MVT::f64: Opc = OpcTable[Idx][5]; break; 2043 } 2044 2045 // Storing an i1 requires special handling. 2046 if (VTIsi1 && SrcReg != AArch64::WZR) { 2047 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); 2048 assert(ANDReg && "Unexpected AND instruction emission failure."); 2049 SrcReg = ANDReg; 2050 } 2051 // Create the base instruction, then add the operands. 2052 const MCInstrDesc &II = TII.get(Opc); 2053 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2054 MachineInstrBuilder MIB = 2055 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg); 2056 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO); 2057 2058 return true; 2059 } 2060 2061 bool AArch64FastISel::selectStore(const Instruction *I) { 2062 MVT VT; 2063 const Value *Op0 = I->getOperand(0); 2064 // Verify we have a legal type before going any further. Currently, we handle 2065 // simple types that will directly fit in a register (i32/f32/i64/f64) or 2066 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 2067 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true) || 2068 cast<StoreInst>(I)->isAtomic()) 2069 return false; 2070 2071 // Get the value to be stored into a register. Use the zero register directly 2072 // when possible to avoid an unnecessary copy and a wasted register. 2073 unsigned SrcReg = 0; 2074 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) { 2075 if (CI->isZero()) 2076 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2077 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) { 2078 if (CF->isZero() && !CF->isNegative()) { 2079 VT = MVT::getIntegerVT(VT.getSizeInBits()); 2080 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2081 } 2082 } 2083 2084 if (!SrcReg) 2085 SrcReg = getRegForValue(Op0); 2086 2087 if (!SrcReg) 2088 return false; 2089 2090 // See if we can handle this address. 2091 Address Addr; 2092 if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType())) 2093 return false; 2094 2095 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))) 2096 return false; 2097 return true; 2098 } 2099 2100 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { 2101 switch (Pred) { 2102 case CmpInst::FCMP_ONE: 2103 case CmpInst::FCMP_UEQ: 2104 default: 2105 // AL is our "false" for now. The other two need more compares. 2106 return AArch64CC::AL; 2107 case CmpInst::ICMP_EQ: 2108 case CmpInst::FCMP_OEQ: 2109 return AArch64CC::EQ; 2110 case CmpInst::ICMP_SGT: 2111 case CmpInst::FCMP_OGT: 2112 return AArch64CC::GT; 2113 case CmpInst::ICMP_SGE: 2114 case CmpInst::FCMP_OGE: 2115 return AArch64CC::GE; 2116 case CmpInst::ICMP_UGT: 2117 case CmpInst::FCMP_UGT: 2118 return AArch64CC::HI; 2119 case CmpInst::FCMP_OLT: 2120 return AArch64CC::MI; 2121 case CmpInst::ICMP_ULE: 2122 case CmpInst::FCMP_OLE: 2123 return AArch64CC::LS; 2124 case CmpInst::FCMP_ORD: 2125 return AArch64CC::VC; 2126 case CmpInst::FCMP_UNO: 2127 return AArch64CC::VS; 2128 case CmpInst::FCMP_UGE: 2129 return AArch64CC::PL; 2130 case CmpInst::ICMP_SLT: 2131 case CmpInst::FCMP_ULT: 2132 return AArch64CC::LT; 2133 case CmpInst::ICMP_SLE: 2134 case CmpInst::FCMP_ULE: 2135 return AArch64CC::LE; 2136 case CmpInst::FCMP_UNE: 2137 case CmpInst::ICMP_NE: 2138 return AArch64CC::NE; 2139 case CmpInst::ICMP_UGE: 2140 return AArch64CC::HS; 2141 case CmpInst::ICMP_ULT: 2142 return AArch64CC::LO; 2143 } 2144 } 2145 2146 /// \brief Try to emit a combined compare-and-branch instruction. 2147 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { 2148 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction"); 2149 const CmpInst *CI = cast<CmpInst>(BI->getCondition()); 2150 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2151 2152 const Value *LHS = CI->getOperand(0); 2153 const Value *RHS = CI->getOperand(1); 2154 2155 MVT VT; 2156 if (!isTypeSupported(LHS->getType(), VT)) 2157 return false; 2158 2159 unsigned BW = VT.getSizeInBits(); 2160 if (BW > 64) 2161 return false; 2162 2163 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2164 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2165 2166 // Try to take advantage of fallthrough opportunities. 2167 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2168 std::swap(TBB, FBB); 2169 Predicate = CmpInst::getInversePredicate(Predicate); 2170 } 2171 2172 int TestBit = -1; 2173 bool IsCmpNE; 2174 switch (Predicate) { 2175 default: 2176 return false; 2177 case CmpInst::ICMP_EQ: 2178 case CmpInst::ICMP_NE: 2179 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue()) 2180 std::swap(LHS, RHS); 2181 2182 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2183 return false; 2184 2185 if (const auto *AI = dyn_cast<BinaryOperator>(LHS)) 2186 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) { 2187 const Value *AndLHS = AI->getOperand(0); 2188 const Value *AndRHS = AI->getOperand(1); 2189 2190 if (const auto *C = dyn_cast<ConstantInt>(AndLHS)) 2191 if (C->getValue().isPowerOf2()) 2192 std::swap(AndLHS, AndRHS); 2193 2194 if (const auto *C = dyn_cast<ConstantInt>(AndRHS)) 2195 if (C->getValue().isPowerOf2()) { 2196 TestBit = C->getValue().logBase2(); 2197 LHS = AndLHS; 2198 } 2199 } 2200 2201 if (VT == MVT::i1) 2202 TestBit = 0; 2203 2204 IsCmpNE = Predicate == CmpInst::ICMP_NE; 2205 break; 2206 case CmpInst::ICMP_SLT: 2207 case CmpInst::ICMP_SGE: 2208 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2209 return false; 2210 2211 TestBit = BW - 1; 2212 IsCmpNE = Predicate == CmpInst::ICMP_SLT; 2213 break; 2214 case CmpInst::ICMP_SGT: 2215 case CmpInst::ICMP_SLE: 2216 if (!isa<ConstantInt>(RHS)) 2217 return false; 2218 2219 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true)) 2220 return false; 2221 2222 TestBit = BW - 1; 2223 IsCmpNE = Predicate == CmpInst::ICMP_SLE; 2224 break; 2225 } // end switch 2226 2227 static const unsigned OpcTable[2][2][2] = { 2228 { {AArch64::CBZW, AArch64::CBZX }, 2229 {AArch64::CBNZW, AArch64::CBNZX} }, 2230 { {AArch64::TBZW, AArch64::TBZX }, 2231 {AArch64::TBNZW, AArch64::TBNZX} } 2232 }; 2233 2234 bool IsBitTest = TestBit != -1; 2235 bool Is64Bit = BW == 64; 2236 if (TestBit < 32 && TestBit >= 0) 2237 Is64Bit = false; 2238 2239 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; 2240 const MCInstrDesc &II = TII.get(Opc); 2241 2242 unsigned SrcReg = getRegForValue(LHS); 2243 if (!SrcReg) 2244 return false; 2245 bool SrcIsKill = hasTrivialKill(LHS); 2246 2247 if (BW == 64 && !Is64Bit) 2248 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, 2249 AArch64::sub_32); 2250 2251 if ((BW < 32) && !IsBitTest) 2252 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true); 2253 2254 // Emit the combined compare and branch instruction. 2255 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2256 MachineInstrBuilder MIB = 2257 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 2258 .addReg(SrcReg, getKillRegState(SrcIsKill)); 2259 if (IsBitTest) 2260 MIB.addImm(TestBit); 2261 MIB.addMBB(TBB); 2262 2263 finishCondBranch(BI->getParent(), TBB, FBB); 2264 return true; 2265 } 2266 2267 bool AArch64FastISel::selectBranch(const Instruction *I) { 2268 const BranchInst *BI = cast<BranchInst>(I); 2269 if (BI->isUnconditional()) { 2270 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2271 fastEmitBranch(MSucc, BI->getDebugLoc()); 2272 return true; 2273 } 2274 2275 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2276 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2277 2278 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 2279 if (CI->hasOneUse() && isValueAvailable(CI)) { 2280 // Try to optimize or fold the cmp. 2281 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2282 switch (Predicate) { 2283 default: 2284 break; 2285 case CmpInst::FCMP_FALSE: 2286 fastEmitBranch(FBB, DbgLoc); 2287 return true; 2288 case CmpInst::FCMP_TRUE: 2289 fastEmitBranch(TBB, DbgLoc); 2290 return true; 2291 } 2292 2293 // Try to emit a combined compare-and-branch first. 2294 if (emitCompareAndBranch(BI)) 2295 return true; 2296 2297 // Try to take advantage of fallthrough opportunities. 2298 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2299 std::swap(TBB, FBB); 2300 Predicate = CmpInst::getInversePredicate(Predicate); 2301 } 2302 2303 // Emit the cmp. 2304 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2305 return false; 2306 2307 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch 2308 // instruction. 2309 AArch64CC::CondCode CC = getCompareCC(Predicate); 2310 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2311 switch (Predicate) { 2312 default: 2313 break; 2314 case CmpInst::FCMP_UEQ: 2315 ExtraCC = AArch64CC::EQ; 2316 CC = AArch64CC::VS; 2317 break; 2318 case CmpInst::FCMP_ONE: 2319 ExtraCC = AArch64CC::MI; 2320 CC = AArch64CC::GT; 2321 break; 2322 } 2323 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2324 2325 // Emit the extra branch for FCMP_UEQ and FCMP_ONE. 2326 if (ExtraCC != AArch64CC::AL) { 2327 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2328 .addImm(ExtraCC) 2329 .addMBB(TBB); 2330 } 2331 2332 // Emit the branch. 2333 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2334 .addImm(CC) 2335 .addMBB(TBB); 2336 2337 finishCondBranch(BI->getParent(), TBB, FBB); 2338 return true; 2339 } 2340 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 2341 uint64_t Imm = CI->getZExtValue(); 2342 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; 2343 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B)) 2344 .addMBB(Target); 2345 2346 // Obtain the branch probability and add the target to the successor list. 2347 if (FuncInfo.BPI) { 2348 auto BranchProbability = FuncInfo.BPI->getEdgeProbability( 2349 BI->getParent(), Target->getBasicBlock()); 2350 FuncInfo.MBB->addSuccessor(Target, BranchProbability); 2351 } else 2352 FuncInfo.MBB->addSuccessorWithoutProb(Target); 2353 return true; 2354 } else { 2355 AArch64CC::CondCode CC = AArch64CC::NE; 2356 if (foldXALUIntrinsic(CC, I, BI->getCondition())) { 2357 // Fake request the condition, otherwise the intrinsic might be completely 2358 // optimized away. 2359 unsigned CondReg = getRegForValue(BI->getCondition()); 2360 if (!CondReg) 2361 return false; 2362 2363 // Emit the branch. 2364 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2365 .addImm(CC) 2366 .addMBB(TBB); 2367 2368 finishCondBranch(BI->getParent(), TBB, FBB); 2369 return true; 2370 } 2371 } 2372 2373 unsigned CondReg = getRegForValue(BI->getCondition()); 2374 if (CondReg == 0) 2375 return false; 2376 bool CondRegIsKill = hasTrivialKill(BI->getCondition()); 2377 2378 // i1 conditions come as i32 values, test the lowest bit with tb(n)z. 2379 unsigned Opcode = AArch64::TBNZW; 2380 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2381 std::swap(TBB, FBB); 2382 Opcode = AArch64::TBZW; 2383 } 2384 2385 const MCInstrDesc &II = TII.get(Opcode); 2386 unsigned ConstrainedCondReg 2387 = constrainOperandRegClass(II, CondReg, II.getNumDefs()); 2388 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 2389 .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill)) 2390 .addImm(0) 2391 .addMBB(TBB); 2392 2393 finishCondBranch(BI->getParent(), TBB, FBB); 2394 return true; 2395 } 2396 2397 bool AArch64FastISel::selectIndirectBr(const Instruction *I) { 2398 const IndirectBrInst *BI = cast<IndirectBrInst>(I); 2399 unsigned AddrReg = getRegForValue(BI->getOperand(0)); 2400 if (AddrReg == 0) 2401 return false; 2402 2403 // Emit the indirect branch. 2404 const MCInstrDesc &II = TII.get(AArch64::BR); 2405 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs()); 2406 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg); 2407 2408 // Make sure the CFG is up-to-date. 2409 for (auto *Succ : BI->successors()) 2410 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]); 2411 2412 return true; 2413 } 2414 2415 bool AArch64FastISel::selectCmp(const Instruction *I) { 2416 const CmpInst *CI = cast<CmpInst>(I); 2417 2418 // Vectors of i1 are weird: bail out. 2419 if (CI->getType()->isVectorTy()) 2420 return false; 2421 2422 // Try to optimize or fold the cmp. 2423 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2424 unsigned ResultReg = 0; 2425 switch (Predicate) { 2426 default: 2427 break; 2428 case CmpInst::FCMP_FALSE: 2429 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2430 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2431 TII.get(TargetOpcode::COPY), ResultReg) 2432 .addReg(AArch64::WZR, getKillRegState(true)); 2433 break; 2434 case CmpInst::FCMP_TRUE: 2435 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1); 2436 break; 2437 } 2438 2439 if (ResultReg) { 2440 updateValueMap(I, ResultReg); 2441 return true; 2442 } 2443 2444 // Emit the cmp. 2445 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2446 return false; 2447 2448 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2449 2450 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These 2451 // condition codes are inverted, because they are used by CSINC. 2452 static unsigned CondCodeTable[2][2] = { 2453 { AArch64CC::NE, AArch64CC::VC }, 2454 { AArch64CC::PL, AArch64CC::LE } 2455 }; 2456 unsigned *CondCodes = nullptr; 2457 switch (Predicate) { 2458 default: 2459 break; 2460 case CmpInst::FCMP_UEQ: 2461 CondCodes = &CondCodeTable[0][0]; 2462 break; 2463 case CmpInst::FCMP_ONE: 2464 CondCodes = &CondCodeTable[1][0]; 2465 break; 2466 } 2467 2468 if (CondCodes) { 2469 unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass); 2470 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2471 TmpReg1) 2472 .addReg(AArch64::WZR, getKillRegState(true)) 2473 .addReg(AArch64::WZR, getKillRegState(true)) 2474 .addImm(CondCodes[0]); 2475 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2476 ResultReg) 2477 .addReg(TmpReg1, getKillRegState(true)) 2478 .addReg(AArch64::WZR, getKillRegState(true)) 2479 .addImm(CondCodes[1]); 2480 2481 updateValueMap(I, ResultReg); 2482 return true; 2483 } 2484 2485 // Now set a register based on the comparison. 2486 AArch64CC::CondCode CC = getCompareCC(Predicate); 2487 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2488 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC); 2489 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2490 ResultReg) 2491 .addReg(AArch64::WZR, getKillRegState(true)) 2492 .addReg(AArch64::WZR, getKillRegState(true)) 2493 .addImm(invertedCC); 2494 2495 updateValueMap(I, ResultReg); 2496 return true; 2497 } 2498 2499 /// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false' 2500 /// value. 2501 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { 2502 if (!SI->getType()->isIntegerTy(1)) 2503 return false; 2504 2505 const Value *Src1Val, *Src2Val; 2506 unsigned Opc = 0; 2507 bool NeedExtraOp = false; 2508 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) { 2509 if (CI->isOne()) { 2510 Src1Val = SI->getCondition(); 2511 Src2Val = SI->getFalseValue(); 2512 Opc = AArch64::ORRWrr; 2513 } else { 2514 assert(CI->isZero()); 2515 Src1Val = SI->getFalseValue(); 2516 Src2Val = SI->getCondition(); 2517 Opc = AArch64::BICWrr; 2518 } 2519 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) { 2520 if (CI->isOne()) { 2521 Src1Val = SI->getCondition(); 2522 Src2Val = SI->getTrueValue(); 2523 Opc = AArch64::ORRWrr; 2524 NeedExtraOp = true; 2525 } else { 2526 assert(CI->isZero()); 2527 Src1Val = SI->getCondition(); 2528 Src2Val = SI->getTrueValue(); 2529 Opc = AArch64::ANDWrr; 2530 } 2531 } 2532 2533 if (!Opc) 2534 return false; 2535 2536 unsigned Src1Reg = getRegForValue(Src1Val); 2537 if (!Src1Reg) 2538 return false; 2539 bool Src1IsKill = hasTrivialKill(Src1Val); 2540 2541 unsigned Src2Reg = getRegForValue(Src2Val); 2542 if (!Src2Reg) 2543 return false; 2544 bool Src2IsKill = hasTrivialKill(Src2Val); 2545 2546 if (NeedExtraOp) { 2547 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1); 2548 Src1IsKill = true; 2549 } 2550 unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg, 2551 Src1IsKill, Src2Reg, Src2IsKill); 2552 updateValueMap(SI, ResultReg); 2553 return true; 2554 } 2555 2556 bool AArch64FastISel::selectSelect(const Instruction *I) { 2557 assert(isa<SelectInst>(I) && "Expected a select instruction."); 2558 MVT VT; 2559 if (!isTypeSupported(I->getType(), VT)) 2560 return false; 2561 2562 unsigned Opc; 2563 const TargetRegisterClass *RC; 2564 switch (VT.SimpleTy) { 2565 default: 2566 return false; 2567 case MVT::i1: 2568 case MVT::i8: 2569 case MVT::i16: 2570 case MVT::i32: 2571 Opc = AArch64::CSELWr; 2572 RC = &AArch64::GPR32RegClass; 2573 break; 2574 case MVT::i64: 2575 Opc = AArch64::CSELXr; 2576 RC = &AArch64::GPR64RegClass; 2577 break; 2578 case MVT::f32: 2579 Opc = AArch64::FCSELSrrr; 2580 RC = &AArch64::FPR32RegClass; 2581 break; 2582 case MVT::f64: 2583 Opc = AArch64::FCSELDrrr; 2584 RC = &AArch64::FPR64RegClass; 2585 break; 2586 } 2587 2588 const SelectInst *SI = cast<SelectInst>(I); 2589 const Value *Cond = SI->getCondition(); 2590 AArch64CC::CondCode CC = AArch64CC::NE; 2591 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2592 2593 if (optimizeSelect(SI)) 2594 return true; 2595 2596 // Try to pickup the flags, so we don't have to emit another compare. 2597 if (foldXALUIntrinsic(CC, I, Cond)) { 2598 // Fake request the condition to force emission of the XALU intrinsic. 2599 unsigned CondReg = getRegForValue(Cond); 2600 if (!CondReg) 2601 return false; 2602 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() && 2603 isValueAvailable(Cond)) { 2604 const auto *Cmp = cast<CmpInst>(Cond); 2605 // Try to optimize or fold the cmp. 2606 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp); 2607 const Value *FoldSelect = nullptr; 2608 switch (Predicate) { 2609 default: 2610 break; 2611 case CmpInst::FCMP_FALSE: 2612 FoldSelect = SI->getFalseValue(); 2613 break; 2614 case CmpInst::FCMP_TRUE: 2615 FoldSelect = SI->getTrueValue(); 2616 break; 2617 } 2618 2619 if (FoldSelect) { 2620 unsigned SrcReg = getRegForValue(FoldSelect); 2621 if (!SrcReg) 2622 return false; 2623 unsigned UseReg = lookUpRegForValue(SI); 2624 if (UseReg) 2625 MRI.clearKillFlags(UseReg); 2626 2627 updateValueMap(I, SrcReg); 2628 return true; 2629 } 2630 2631 // Emit the cmp. 2632 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned())) 2633 return false; 2634 2635 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction. 2636 CC = getCompareCC(Predicate); 2637 switch (Predicate) { 2638 default: 2639 break; 2640 case CmpInst::FCMP_UEQ: 2641 ExtraCC = AArch64CC::EQ; 2642 CC = AArch64CC::VS; 2643 break; 2644 case CmpInst::FCMP_ONE: 2645 ExtraCC = AArch64CC::MI; 2646 CC = AArch64CC::GT; 2647 break; 2648 } 2649 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2650 } else { 2651 unsigned CondReg = getRegForValue(Cond); 2652 if (!CondReg) 2653 return false; 2654 bool CondIsKill = hasTrivialKill(Cond); 2655 2656 const MCInstrDesc &II = TII.get(AArch64::ANDSWri); 2657 CondReg = constrainOperandRegClass(II, CondReg, 1); 2658 2659 // Emit a TST instruction (ANDS wzr, reg, #imm). 2660 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, 2661 AArch64::WZR) 2662 .addReg(CondReg, getKillRegState(CondIsKill)) 2663 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 2664 } 2665 2666 unsigned Src1Reg = getRegForValue(SI->getTrueValue()); 2667 bool Src1IsKill = hasTrivialKill(SI->getTrueValue()); 2668 2669 unsigned Src2Reg = getRegForValue(SI->getFalseValue()); 2670 bool Src2IsKill = hasTrivialKill(SI->getFalseValue()); 2671 2672 if (!Src1Reg || !Src2Reg) 2673 return false; 2674 2675 if (ExtraCC != AArch64CC::AL) { 2676 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, 2677 Src2IsKill, ExtraCC); 2678 Src2IsKill = true; 2679 } 2680 unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, 2681 Src2IsKill, CC); 2682 updateValueMap(I, ResultReg); 2683 return true; 2684 } 2685 2686 bool AArch64FastISel::selectFPExt(const Instruction *I) { 2687 Value *V = I->getOperand(0); 2688 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) 2689 return false; 2690 2691 unsigned Op = getRegForValue(V); 2692 if (Op == 0) 2693 return false; 2694 2695 unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass); 2696 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr), 2697 ResultReg).addReg(Op); 2698 updateValueMap(I, ResultReg); 2699 return true; 2700 } 2701 2702 bool AArch64FastISel::selectFPTrunc(const Instruction *I) { 2703 Value *V = I->getOperand(0); 2704 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) 2705 return false; 2706 2707 unsigned Op = getRegForValue(V); 2708 if (Op == 0) 2709 return false; 2710 2711 unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass); 2712 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr), 2713 ResultReg).addReg(Op); 2714 updateValueMap(I, ResultReg); 2715 return true; 2716 } 2717 2718 // FPToUI and FPToSI 2719 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { 2720 MVT DestVT; 2721 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2722 return false; 2723 2724 unsigned SrcReg = getRegForValue(I->getOperand(0)); 2725 if (SrcReg == 0) 2726 return false; 2727 2728 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2729 if (SrcVT == MVT::f128) 2730 return false; 2731 2732 unsigned Opc; 2733 if (SrcVT == MVT::f64) { 2734 if (Signed) 2735 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; 2736 else 2737 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr; 2738 } else { 2739 if (Signed) 2740 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; 2741 else 2742 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr; 2743 } 2744 unsigned ResultReg = createResultReg( 2745 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); 2746 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 2747 .addReg(SrcReg); 2748 updateValueMap(I, ResultReg); 2749 return true; 2750 } 2751 2752 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { 2753 MVT DestVT; 2754 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2755 return false; 2756 assert ((DestVT == MVT::f32 || DestVT == MVT::f64) && 2757 "Unexpected value type."); 2758 2759 unsigned SrcReg = getRegForValue(I->getOperand(0)); 2760 if (!SrcReg) 2761 return false; 2762 bool SrcIsKill = hasTrivialKill(I->getOperand(0)); 2763 2764 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2765 2766 // Handle sign-extension. 2767 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { 2768 SrcReg = 2769 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); 2770 if (!SrcReg) 2771 return false; 2772 SrcIsKill = true; 2773 } 2774 2775 unsigned Opc; 2776 if (SrcVT == MVT::i64) { 2777 if (Signed) 2778 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; 2779 else 2780 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri; 2781 } else { 2782 if (Signed) 2783 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri; 2784 else 2785 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri; 2786 } 2787 2788 unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg, 2789 SrcIsKill); 2790 updateValueMap(I, ResultReg); 2791 return true; 2792 } 2793 2794 bool AArch64FastISel::fastLowerArguments() { 2795 if (!FuncInfo.CanLowerReturn) 2796 return false; 2797 2798 const Function *F = FuncInfo.Fn; 2799 if (F->isVarArg()) 2800 return false; 2801 2802 CallingConv::ID CC = F->getCallingConv(); 2803 if (CC != CallingConv::C) 2804 return false; 2805 2806 // Only handle simple cases of up to 8 GPR and FPR each. 2807 unsigned GPRCnt = 0; 2808 unsigned FPRCnt = 0; 2809 unsigned Idx = 0; 2810 for (auto const &Arg : F->args()) { 2811 // The first argument is at index 1. 2812 ++Idx; 2813 if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) || 2814 F->getAttributes().hasAttribute(Idx, Attribute::InReg) || 2815 F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || 2816 F->getAttributes().hasAttribute(Idx, Attribute::Nest)) 2817 return false; 2818 2819 Type *ArgTy = Arg.getType(); 2820 if (ArgTy->isStructTy() || ArgTy->isArrayTy()) 2821 return false; 2822 2823 EVT ArgVT = TLI.getValueType(DL, ArgTy); 2824 if (!ArgVT.isSimple()) 2825 return false; 2826 2827 MVT VT = ArgVT.getSimpleVT().SimpleTy; 2828 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8()) 2829 return false; 2830 2831 if (VT.isVector() && 2832 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian())) 2833 return false; 2834 2835 if (VT >= MVT::i1 && VT <= MVT::i64) 2836 ++GPRCnt; 2837 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() || 2838 VT.is128BitVector()) 2839 ++FPRCnt; 2840 else 2841 return false; 2842 2843 if (GPRCnt > 8 || FPRCnt > 8) 2844 return false; 2845 } 2846 2847 static const MCPhysReg Registers[6][8] = { 2848 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, 2849 AArch64::W5, AArch64::W6, AArch64::W7 }, 2850 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, 2851 AArch64::X5, AArch64::X6, AArch64::X7 }, 2852 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, 2853 AArch64::H5, AArch64::H6, AArch64::H7 }, 2854 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, 2855 AArch64::S5, AArch64::S6, AArch64::S7 }, 2856 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, 2857 AArch64::D5, AArch64::D6, AArch64::D7 }, 2858 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, 2859 AArch64::Q5, AArch64::Q6, AArch64::Q7 } 2860 }; 2861 2862 unsigned GPRIdx = 0; 2863 unsigned FPRIdx = 0; 2864 for (auto const &Arg : F->args()) { 2865 MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); 2866 unsigned SrcReg; 2867 const TargetRegisterClass *RC; 2868 if (VT >= MVT::i1 && VT <= MVT::i32) { 2869 SrcReg = Registers[0][GPRIdx++]; 2870 RC = &AArch64::GPR32RegClass; 2871 VT = MVT::i32; 2872 } else if (VT == MVT::i64) { 2873 SrcReg = Registers[1][GPRIdx++]; 2874 RC = &AArch64::GPR64RegClass; 2875 } else if (VT == MVT::f16) { 2876 SrcReg = Registers[2][FPRIdx++]; 2877 RC = &AArch64::FPR16RegClass; 2878 } else if (VT == MVT::f32) { 2879 SrcReg = Registers[3][FPRIdx++]; 2880 RC = &AArch64::FPR32RegClass; 2881 } else if ((VT == MVT::f64) || VT.is64BitVector()) { 2882 SrcReg = Registers[4][FPRIdx++]; 2883 RC = &AArch64::FPR64RegClass; 2884 } else if (VT.is128BitVector()) { 2885 SrcReg = Registers[5][FPRIdx++]; 2886 RC = &AArch64::FPR128RegClass; 2887 } else 2888 llvm_unreachable("Unexpected value type."); 2889 2890 unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); 2891 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. 2892 // Without this, EmitLiveInCopies may eliminate the livein if its only 2893 // use is a bitcast (which isn't turned into an instruction). 2894 unsigned ResultReg = createResultReg(RC); 2895 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2896 TII.get(TargetOpcode::COPY), ResultReg) 2897 .addReg(DstReg, getKillRegState(true)); 2898 updateValueMap(&Arg, ResultReg); 2899 } 2900 return true; 2901 } 2902 2903 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, 2904 SmallVectorImpl<MVT> &OutVTs, 2905 unsigned &NumBytes) { 2906 CallingConv::ID CC = CLI.CallConv; 2907 SmallVector<CCValAssign, 16> ArgLocs; 2908 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); 2909 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); 2910 2911 // Get a count of how many bytes are to be pushed on the stack. 2912 NumBytes = CCInfo.getNextStackOffset(); 2913 2914 // Issue CALLSEQ_START 2915 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 2916 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) 2917 .addImm(NumBytes); 2918 2919 // Process the args. 2920 for (CCValAssign &VA : ArgLocs) { 2921 const Value *ArgVal = CLI.OutVals[VA.getValNo()]; 2922 MVT ArgVT = OutVTs[VA.getValNo()]; 2923 2924 unsigned ArgReg = getRegForValue(ArgVal); 2925 if (!ArgReg) 2926 return false; 2927 2928 // Handle arg promotion: SExt, ZExt, AExt. 2929 switch (VA.getLocInfo()) { 2930 case CCValAssign::Full: 2931 break; 2932 case CCValAssign::SExt: { 2933 MVT DestVT = VA.getLocVT(); 2934 MVT SrcVT = ArgVT; 2935 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); 2936 if (!ArgReg) 2937 return false; 2938 break; 2939 } 2940 case CCValAssign::AExt: 2941 // Intentional fall-through. 2942 case CCValAssign::ZExt: { 2943 MVT DestVT = VA.getLocVT(); 2944 MVT SrcVT = ArgVT; 2945 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); 2946 if (!ArgReg) 2947 return false; 2948 break; 2949 } 2950 default: 2951 llvm_unreachable("Unknown arg promotion!"); 2952 } 2953 2954 // Now copy/store arg to correct locations. 2955 if (VA.isRegLoc() && !VA.needsCustom()) { 2956 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2957 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); 2958 CLI.OutRegs.push_back(VA.getLocReg()); 2959 } else if (VA.needsCustom()) { 2960 // FIXME: Handle custom args. 2961 return false; 2962 } else { 2963 assert(VA.isMemLoc() && "Assuming store on stack."); 2964 2965 // Don't emit stores for undef values. 2966 if (isa<UndefValue>(ArgVal)) 2967 continue; 2968 2969 // Need to store on the stack. 2970 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; 2971 2972 unsigned BEAlign = 0; 2973 if (ArgSize < 8 && !Subtarget->isLittleEndian()) 2974 BEAlign = 8 - ArgSize; 2975 2976 Address Addr; 2977 Addr.setKind(Address::RegBase); 2978 Addr.setReg(AArch64::SP); 2979 Addr.setOffset(VA.getLocMemOffset() + BEAlign); 2980 2981 unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType()); 2982 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( 2983 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), 2984 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); 2985 2986 if (!emitStore(ArgVT, ArgReg, Addr, MMO)) 2987 return false; 2988 } 2989 } 2990 return true; 2991 } 2992 2993 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, 2994 unsigned NumBytes) { 2995 CallingConv::ID CC = CLI.CallConv; 2996 2997 // Issue CALLSEQ_END 2998 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 2999 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) 3000 .addImm(NumBytes).addImm(0); 3001 3002 // Now the return value. 3003 if (RetVT != MVT::isVoid) { 3004 SmallVector<CCValAssign, 16> RVLocs; 3005 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); 3006 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC)); 3007 3008 // Only handle a single return value. 3009 if (RVLocs.size() != 1) 3010 return false; 3011 3012 // Copy all of the result registers out of their specified physreg. 3013 MVT CopyVT = RVLocs[0].getValVT(); 3014 3015 // TODO: Handle big-endian results 3016 if (CopyVT.isVector() && !Subtarget->isLittleEndian()) 3017 return false; 3018 3019 unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); 3020 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3021 TII.get(TargetOpcode::COPY), ResultReg) 3022 .addReg(RVLocs[0].getLocReg()); 3023 CLI.InRegs.push_back(RVLocs[0].getLocReg()); 3024 3025 CLI.ResultReg = ResultReg; 3026 CLI.NumResultRegs = 1; 3027 } 3028 3029 return true; 3030 } 3031 3032 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { 3033 CallingConv::ID CC = CLI.CallConv; 3034 bool IsTailCall = CLI.IsTailCall; 3035 bool IsVarArg = CLI.IsVarArg; 3036 const Value *Callee = CLI.Callee; 3037 MCSymbol *Symbol = CLI.Symbol; 3038 3039 if (!Callee && !Symbol) 3040 return false; 3041 3042 // Allow SelectionDAG isel to handle tail calls. 3043 if (IsTailCall) 3044 return false; 3045 3046 CodeModel::Model CM = TM.getCodeModel(); 3047 // Only support the small and large code model. 3048 if (CM != CodeModel::Small && CM != CodeModel::Large) 3049 return false; 3050 3051 // FIXME: Add large code model support for ELF. 3052 if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) 3053 return false; 3054 3055 // Let SDISel handle vararg functions. 3056 if (IsVarArg) 3057 return false; 3058 3059 // FIXME: Only handle *simple* calls for now. 3060 MVT RetVT; 3061 if (CLI.RetTy->isVoidTy()) 3062 RetVT = MVT::isVoid; 3063 else if (!isTypeLegal(CLI.RetTy, RetVT)) 3064 return false; 3065 3066 for (auto Flag : CLI.OutFlags) 3067 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal()) 3068 return false; 3069 3070 // Set up the argument vectors. 3071 SmallVector<MVT, 16> OutVTs; 3072 OutVTs.reserve(CLI.OutVals.size()); 3073 3074 for (auto *Val : CLI.OutVals) { 3075 MVT VT; 3076 if (!isTypeLegal(Val->getType(), VT) && 3077 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) 3078 return false; 3079 3080 // We don't handle vector parameters yet. 3081 if (VT.isVector() || VT.getSizeInBits() > 64) 3082 return false; 3083 3084 OutVTs.push_back(VT); 3085 } 3086 3087 Address Addr; 3088 if (Callee && !computeCallAddress(Callee, Addr)) 3089 return false; 3090 3091 // Handle the arguments now that we've gotten them. 3092 unsigned NumBytes; 3093 if (!processCallArgs(CLI, OutVTs, NumBytes)) 3094 return false; 3095 3096 // Issue the call. 3097 MachineInstrBuilder MIB; 3098 if (CM == CodeModel::Small) { 3099 const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL); 3100 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II); 3101 if (Symbol) 3102 MIB.addSym(Symbol, 0); 3103 else if (Addr.getGlobalValue()) 3104 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0); 3105 else if (Addr.getReg()) { 3106 unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0); 3107 MIB.addReg(Reg); 3108 } else 3109 return false; 3110 } else { 3111 unsigned CallReg = 0; 3112 if (Symbol) { 3113 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 3114 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 3115 ADRPReg) 3116 .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE); 3117 3118 CallReg = createResultReg(&AArch64::GPR64RegClass); 3119 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3120 TII.get(AArch64::LDRXui), CallReg) 3121 .addReg(ADRPReg) 3122 .addSym(Symbol, 3123 AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3124 } else if (Addr.getGlobalValue()) 3125 CallReg = materializeGV(Addr.getGlobalValue()); 3126 else if (Addr.getReg()) 3127 CallReg = Addr.getReg(); 3128 3129 if (!CallReg) 3130 return false; 3131 3132 const MCInstrDesc &II = TII.get(AArch64::BLR); 3133 CallReg = constrainOperandRegClass(II, CallReg, 0); 3134 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg); 3135 } 3136 3137 // Add implicit physical register uses to the call. 3138 for (auto Reg : CLI.OutRegs) 3139 MIB.addReg(Reg, RegState::Implicit); 3140 3141 // Add a register mask with the call-preserved registers. 3142 // Proper defs for return values will be added by setPhysRegsDeadExcept(). 3143 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); 3144 3145 CLI.Call = MIB; 3146 3147 // Finish off the call including any return values. 3148 return finishCall(CLI, RetVT, NumBytes); 3149 } 3150 3151 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) { 3152 if (Alignment) 3153 return Len / Alignment <= 4; 3154 else 3155 return Len < 32; 3156 } 3157 3158 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, 3159 uint64_t Len, unsigned Alignment) { 3160 // Make sure we don't bloat code by inlining very large memcpy's. 3161 if (!isMemCpySmall(Len, Alignment)) 3162 return false; 3163 3164 int64_t UnscaledOffset = 0; 3165 Address OrigDest = Dest; 3166 Address OrigSrc = Src; 3167 3168 while (Len) { 3169 MVT VT; 3170 if (!Alignment || Alignment >= 8) { 3171 if (Len >= 8) 3172 VT = MVT::i64; 3173 else if (Len >= 4) 3174 VT = MVT::i32; 3175 else if (Len >= 2) 3176 VT = MVT::i16; 3177 else { 3178 VT = MVT::i8; 3179 } 3180 } else { 3181 // Bound based on alignment. 3182 if (Len >= 4 && Alignment == 4) 3183 VT = MVT::i32; 3184 else if (Len >= 2 && Alignment == 2) 3185 VT = MVT::i16; 3186 else { 3187 VT = MVT::i8; 3188 } 3189 } 3190 3191 unsigned ResultReg = emitLoad(VT, VT, Src); 3192 if (!ResultReg) 3193 return false; 3194 3195 if (!emitStore(VT, ResultReg, Dest)) 3196 return false; 3197 3198 int64_t Size = VT.getSizeInBits() / 8; 3199 Len -= Size; 3200 UnscaledOffset += Size; 3201 3202 // We need to recompute the unscaled offset for each iteration. 3203 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); 3204 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); 3205 } 3206 3207 return true; 3208 } 3209 3210 /// \brief Check if it is possible to fold the condition from the XALU intrinsic 3211 /// into the user. The condition code will only be updated on success. 3212 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, 3213 const Instruction *I, 3214 const Value *Cond) { 3215 if (!isa<ExtractValueInst>(Cond)) 3216 return false; 3217 3218 const auto *EV = cast<ExtractValueInst>(Cond); 3219 if (!isa<IntrinsicInst>(EV->getAggregateOperand())) 3220 return false; 3221 3222 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); 3223 MVT RetVT; 3224 const Function *Callee = II->getCalledFunction(); 3225 Type *RetTy = 3226 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); 3227 if (!isTypeLegal(RetTy, RetVT)) 3228 return false; 3229 3230 if (RetVT != MVT::i32 && RetVT != MVT::i64) 3231 return false; 3232 3233 const Value *LHS = II->getArgOperand(0); 3234 const Value *RHS = II->getArgOperand(1); 3235 3236 // Canonicalize immediate to the RHS. 3237 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && 3238 isCommutativeIntrinsic(II)) 3239 std::swap(LHS, RHS); 3240 3241 // Simplify multiplies. 3242 Intrinsic::ID IID = II->getIntrinsicID(); 3243 switch (IID) { 3244 default: 3245 break; 3246 case Intrinsic::smul_with_overflow: 3247 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3248 if (C->getValue() == 2) 3249 IID = Intrinsic::sadd_with_overflow; 3250 break; 3251 case Intrinsic::umul_with_overflow: 3252 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3253 if (C->getValue() == 2) 3254 IID = Intrinsic::uadd_with_overflow; 3255 break; 3256 } 3257 3258 AArch64CC::CondCode TmpCC; 3259 switch (IID) { 3260 default: 3261 return false; 3262 case Intrinsic::sadd_with_overflow: 3263 case Intrinsic::ssub_with_overflow: 3264 TmpCC = AArch64CC::VS; 3265 break; 3266 case Intrinsic::uadd_with_overflow: 3267 TmpCC = AArch64CC::HS; 3268 break; 3269 case Intrinsic::usub_with_overflow: 3270 TmpCC = AArch64CC::LO; 3271 break; 3272 case Intrinsic::smul_with_overflow: 3273 case Intrinsic::umul_with_overflow: 3274 TmpCC = AArch64CC::NE; 3275 break; 3276 } 3277 3278 // Check if both instructions are in the same basic block. 3279 if (!isValueAvailable(II)) 3280 return false; 3281 3282 // Make sure nothing is in the way 3283 BasicBlock::const_iterator Start(I); 3284 BasicBlock::const_iterator End(II); 3285 for (auto Itr = std::prev(Start); Itr != End; --Itr) { 3286 // We only expect extractvalue instructions between the intrinsic and the 3287 // instruction to be selected. 3288 if (!isa<ExtractValueInst>(Itr)) 3289 return false; 3290 3291 // Check that the extractvalue operand comes from the intrinsic. 3292 const auto *EVI = cast<ExtractValueInst>(Itr); 3293 if (EVI->getAggregateOperand() != II) 3294 return false; 3295 } 3296 3297 CC = TmpCC; 3298 return true; 3299 } 3300 3301 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { 3302 // FIXME: Handle more intrinsics. 3303 switch (II->getIntrinsicID()) { 3304 default: return false; 3305 case Intrinsic::frameaddress: { 3306 MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo(); 3307 MFI->setFrameAddressIsTaken(true); 3308 3309 const AArch64RegisterInfo *RegInfo = 3310 static_cast<const AArch64RegisterInfo *>(Subtarget->getRegisterInfo()); 3311 unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); 3312 unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3313 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3314 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr); 3315 // Recursively load frame address 3316 // ldr x0, [fp] 3317 // ldr x0, [x0] 3318 // ldr x0, [x0] 3319 // ... 3320 unsigned DestReg; 3321 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue(); 3322 while (Depth--) { 3323 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass, 3324 SrcReg, /*IsKill=*/true, 0); 3325 assert(DestReg && "Unexpected LDR instruction emission failure."); 3326 SrcReg = DestReg; 3327 } 3328 3329 updateValueMap(II, SrcReg); 3330 return true; 3331 } 3332 case Intrinsic::memcpy: 3333 case Intrinsic::memmove: { 3334 const auto *MTI = cast<MemTransferInst>(II); 3335 // Don't handle volatile. 3336 if (MTI->isVolatile()) 3337 return false; 3338 3339 // Disable inlining for memmove before calls to ComputeAddress. Otherwise, 3340 // we would emit dead code because we don't currently handle memmoves. 3341 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); 3342 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) { 3343 // Small memcpy's are common enough that we want to do them without a call 3344 // if possible. 3345 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue(); 3346 unsigned Alignment = MTI->getAlignment(); 3347 if (isMemCpySmall(Len, Alignment)) { 3348 Address Dest, Src; 3349 if (!computeAddress(MTI->getRawDest(), Dest) || 3350 !computeAddress(MTI->getRawSource(), Src)) 3351 return false; 3352 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment)) 3353 return true; 3354 } 3355 } 3356 3357 if (!MTI->getLength()->getType()->isIntegerTy(64)) 3358 return false; 3359 3360 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255) 3361 // Fast instruction selection doesn't support the special 3362 // address spaces. 3363 return false; 3364 3365 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove"; 3366 return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2); 3367 } 3368 case Intrinsic::memset: { 3369 const MemSetInst *MSI = cast<MemSetInst>(II); 3370 // Don't handle volatile. 3371 if (MSI->isVolatile()) 3372 return false; 3373 3374 if (!MSI->getLength()->getType()->isIntegerTy(64)) 3375 return false; 3376 3377 if (MSI->getDestAddressSpace() > 255) 3378 // Fast instruction selection doesn't support the special 3379 // address spaces. 3380 return false; 3381 3382 return lowerCallTo(II, "memset", II->getNumArgOperands() - 2); 3383 } 3384 case Intrinsic::sin: 3385 case Intrinsic::cos: 3386 case Intrinsic::pow: { 3387 MVT RetVT; 3388 if (!isTypeLegal(II->getType(), RetVT)) 3389 return false; 3390 3391 if (RetVT != MVT::f32 && RetVT != MVT::f64) 3392 return false; 3393 3394 static const RTLIB::Libcall LibCallTable[3][2] = { 3395 { RTLIB::SIN_F32, RTLIB::SIN_F64 }, 3396 { RTLIB::COS_F32, RTLIB::COS_F64 }, 3397 { RTLIB::POW_F32, RTLIB::POW_F64 } 3398 }; 3399 RTLIB::Libcall LC; 3400 bool Is64Bit = RetVT == MVT::f64; 3401 switch (II->getIntrinsicID()) { 3402 default: 3403 llvm_unreachable("Unexpected intrinsic."); 3404 case Intrinsic::sin: 3405 LC = LibCallTable[0][Is64Bit]; 3406 break; 3407 case Intrinsic::cos: 3408 LC = LibCallTable[1][Is64Bit]; 3409 break; 3410 case Intrinsic::pow: 3411 LC = LibCallTable[2][Is64Bit]; 3412 break; 3413 } 3414 3415 ArgListTy Args; 3416 Args.reserve(II->getNumArgOperands()); 3417 3418 // Populate the argument list. 3419 for (auto &Arg : II->arg_operands()) { 3420 ArgListEntry Entry; 3421 Entry.Val = Arg; 3422 Entry.Ty = Arg->getType(); 3423 Args.push_back(Entry); 3424 } 3425 3426 CallLoweringInfo CLI; 3427 MCContext &Ctx = MF->getContext(); 3428 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(), 3429 TLI.getLibcallName(LC), std::move(Args)); 3430 if (!lowerCallTo(CLI)) 3431 return false; 3432 updateValueMap(II, CLI.ResultReg); 3433 return true; 3434 } 3435 case Intrinsic::fabs: { 3436 MVT VT; 3437 if (!isTypeLegal(II->getType(), VT)) 3438 return false; 3439 3440 unsigned Opc; 3441 switch (VT.SimpleTy) { 3442 default: 3443 return false; 3444 case MVT::f32: 3445 Opc = AArch64::FABSSr; 3446 break; 3447 case MVT::f64: 3448 Opc = AArch64::FABSDr; 3449 break; 3450 } 3451 unsigned SrcReg = getRegForValue(II->getOperand(0)); 3452 if (!SrcReg) 3453 return false; 3454 bool SrcRegIsKill = hasTrivialKill(II->getOperand(0)); 3455 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 3456 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 3457 .addReg(SrcReg, getKillRegState(SrcRegIsKill)); 3458 updateValueMap(II, ResultReg); 3459 return true; 3460 } 3461 case Intrinsic::trap: { 3462 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) 3463 .addImm(1); 3464 return true; 3465 } 3466 case Intrinsic::sqrt: { 3467 Type *RetTy = II->getCalledFunction()->getReturnType(); 3468 3469 MVT VT; 3470 if (!isTypeLegal(RetTy, VT)) 3471 return false; 3472 3473 unsigned Op0Reg = getRegForValue(II->getOperand(0)); 3474 if (!Op0Reg) 3475 return false; 3476 bool Op0IsKill = hasTrivialKill(II->getOperand(0)); 3477 3478 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill); 3479 if (!ResultReg) 3480 return false; 3481 3482 updateValueMap(II, ResultReg); 3483 return true; 3484 } 3485 case Intrinsic::sadd_with_overflow: 3486 case Intrinsic::uadd_with_overflow: 3487 case Intrinsic::ssub_with_overflow: 3488 case Intrinsic::usub_with_overflow: 3489 case Intrinsic::smul_with_overflow: 3490 case Intrinsic::umul_with_overflow: { 3491 // This implements the basic lowering of the xalu with overflow intrinsics. 3492 const Function *Callee = II->getCalledFunction(); 3493 auto *Ty = cast<StructType>(Callee->getReturnType()); 3494 Type *RetTy = Ty->getTypeAtIndex(0U); 3495 3496 MVT VT; 3497 if (!isTypeLegal(RetTy, VT)) 3498 return false; 3499 3500 if (VT != MVT::i32 && VT != MVT::i64) 3501 return false; 3502 3503 const Value *LHS = II->getArgOperand(0); 3504 const Value *RHS = II->getArgOperand(1); 3505 // Canonicalize immediate to the RHS. 3506 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && 3507 isCommutativeIntrinsic(II)) 3508 std::swap(LHS, RHS); 3509 3510 // Simplify multiplies. 3511 Intrinsic::ID IID = II->getIntrinsicID(); 3512 switch (IID) { 3513 default: 3514 break; 3515 case Intrinsic::smul_with_overflow: 3516 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3517 if (C->getValue() == 2) { 3518 IID = Intrinsic::sadd_with_overflow; 3519 RHS = LHS; 3520 } 3521 break; 3522 case Intrinsic::umul_with_overflow: 3523 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3524 if (C->getValue() == 2) { 3525 IID = Intrinsic::uadd_with_overflow; 3526 RHS = LHS; 3527 } 3528 break; 3529 } 3530 3531 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0; 3532 AArch64CC::CondCode CC = AArch64CC::Invalid; 3533 switch (IID) { 3534 default: llvm_unreachable("Unexpected intrinsic!"); 3535 case Intrinsic::sadd_with_overflow: 3536 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3537 CC = AArch64CC::VS; 3538 break; 3539 case Intrinsic::uadd_with_overflow: 3540 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3541 CC = AArch64CC::HS; 3542 break; 3543 case Intrinsic::ssub_with_overflow: 3544 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3545 CC = AArch64CC::VS; 3546 break; 3547 case Intrinsic::usub_with_overflow: 3548 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3549 CC = AArch64CC::LO; 3550 break; 3551 case Intrinsic::smul_with_overflow: { 3552 CC = AArch64CC::NE; 3553 unsigned LHSReg = getRegForValue(LHS); 3554 if (!LHSReg) 3555 return false; 3556 bool LHSIsKill = hasTrivialKill(LHS); 3557 3558 unsigned RHSReg = getRegForValue(RHS); 3559 if (!RHSReg) 3560 return false; 3561 bool RHSIsKill = hasTrivialKill(RHS); 3562 3563 if (VT == MVT::i32) { 3564 MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 3565 unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg, 3566 /*IsKill=*/false, 32); 3567 MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, 3568 AArch64::sub_32); 3569 ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true, 3570 AArch64::sub_32); 3571 emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, 3572 AArch64_AM::ASR, 31, /*WantResult=*/false); 3573 } else { 3574 assert(VT == MVT::i64 && "Unexpected value type."); 3575 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3576 // reused in the next instruction. 3577 MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, 3578 /*IsKill=*/false); 3579 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill, 3580 RHSReg, RHSIsKill); 3581 emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, 3582 AArch64_AM::ASR, 63, /*WantResult=*/false); 3583 } 3584 break; 3585 } 3586 case Intrinsic::umul_with_overflow: { 3587 CC = AArch64CC::NE; 3588 unsigned LHSReg = getRegForValue(LHS); 3589 if (!LHSReg) 3590 return false; 3591 bool LHSIsKill = hasTrivialKill(LHS); 3592 3593 unsigned RHSReg = getRegForValue(RHS); 3594 if (!RHSReg) 3595 return false; 3596 bool RHSIsKill = hasTrivialKill(RHS); 3597 3598 if (VT == MVT::i32) { 3599 MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 3600 emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg, 3601 /*IsKill=*/false, AArch64_AM::LSR, 32, 3602 /*WantResult=*/false); 3603 MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, 3604 AArch64::sub_32); 3605 } else { 3606 assert(VT == MVT::i64 && "Unexpected value type."); 3607 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3608 // reused in the next instruction. 3609 MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, 3610 /*IsKill=*/false); 3611 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill, 3612 RHSReg, RHSIsKill); 3613 emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg, 3614 /*IsKill=*/false, /*WantResult=*/false); 3615 } 3616 break; 3617 } 3618 } 3619 3620 if (MulReg) { 3621 ResultReg1 = createResultReg(TLI.getRegClassFor(VT)); 3622 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3623 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg); 3624 } 3625 3626 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass, 3627 AArch64::WZR, /*IsKill=*/true, AArch64::WZR, 3628 /*IsKill=*/true, getInvertedCondCode(CC)); 3629 (void)ResultReg2; 3630 assert((ResultReg1 + 1) == ResultReg2 && 3631 "Nonconsecutive result registers."); 3632 updateValueMap(II, ResultReg1, 2); 3633 return true; 3634 } 3635 } 3636 return false; 3637 } 3638 3639 bool AArch64FastISel::selectRet(const Instruction *I) { 3640 const ReturnInst *Ret = cast<ReturnInst>(I); 3641 const Function &F = *I->getParent()->getParent(); 3642 3643 if (!FuncInfo.CanLowerReturn) 3644 return false; 3645 3646 if (F.isVarArg()) 3647 return false; 3648 3649 if (TLI.supportSplitCSR(FuncInfo.MF)) 3650 return false; 3651 3652 // Build a list of return value registers. 3653 SmallVector<unsigned, 4> RetRegs; 3654 3655 if (Ret->getNumOperands() > 0) { 3656 CallingConv::ID CC = F.getCallingConv(); 3657 SmallVector<ISD::OutputArg, 4> Outs; 3658 GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL); 3659 3660 // Analyze operands of the call, assigning locations to each operand. 3661 SmallVector<CCValAssign, 16> ValLocs; 3662 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); 3663 CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS 3664 : RetCC_AArch64_AAPCS; 3665 CCInfo.AnalyzeReturn(Outs, RetCC); 3666 3667 // Only handle a single return value for now. 3668 if (ValLocs.size() != 1) 3669 return false; 3670 3671 CCValAssign &VA = ValLocs[0]; 3672 const Value *RV = Ret->getOperand(0); 3673 3674 // Don't bother handling odd stuff for now. 3675 if ((VA.getLocInfo() != CCValAssign::Full) && 3676 (VA.getLocInfo() != CCValAssign::BCvt)) 3677 return false; 3678 3679 // Only handle register returns for now. 3680 if (!VA.isRegLoc()) 3681 return false; 3682 3683 unsigned Reg = getRegForValue(RV); 3684 if (Reg == 0) 3685 return false; 3686 3687 unsigned SrcReg = Reg + VA.getValNo(); 3688 unsigned DestReg = VA.getLocReg(); 3689 // Avoid a cross-class copy. This is very unlikely. 3690 if (!MRI.getRegClass(SrcReg)->contains(DestReg)) 3691 return false; 3692 3693 EVT RVEVT = TLI.getValueType(DL, RV->getType()); 3694 if (!RVEVT.isSimple()) 3695 return false; 3696 3697 // Vectors (of > 1 lane) in big endian need tricky handling. 3698 if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 && 3699 !Subtarget->isLittleEndian()) 3700 return false; 3701 3702 MVT RVVT = RVEVT.getSimpleVT(); 3703 if (RVVT == MVT::f128) 3704 return false; 3705 3706 MVT DestVT = VA.getValVT(); 3707 // Special handling for extended integers. 3708 if (RVVT != DestVT) { 3709 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) 3710 return false; 3711 3712 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) 3713 return false; 3714 3715 bool IsZExt = Outs[0].Flags.isZExt(); 3716 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt); 3717 if (SrcReg == 0) 3718 return false; 3719 } 3720 3721 // Make the copy. 3722 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3723 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); 3724 3725 // Add register to return instruction. 3726 RetRegs.push_back(VA.getLocReg()); 3727 } 3728 3729 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3730 TII.get(AArch64::RET_ReallyLR)); 3731 for (unsigned RetReg : RetRegs) 3732 MIB.addReg(RetReg, RegState::Implicit); 3733 return true; 3734 } 3735 3736 bool AArch64FastISel::selectTrunc(const Instruction *I) { 3737 Type *DestTy = I->getType(); 3738 Value *Op = I->getOperand(0); 3739 Type *SrcTy = Op->getType(); 3740 3741 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true); 3742 EVT DestEVT = TLI.getValueType(DL, DestTy, true); 3743 if (!SrcEVT.isSimple()) 3744 return false; 3745 if (!DestEVT.isSimple()) 3746 return false; 3747 3748 MVT SrcVT = SrcEVT.getSimpleVT(); 3749 MVT DestVT = DestEVT.getSimpleVT(); 3750 3751 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && 3752 SrcVT != MVT::i8) 3753 return false; 3754 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && 3755 DestVT != MVT::i1) 3756 return false; 3757 3758 unsigned SrcReg = getRegForValue(Op); 3759 if (!SrcReg) 3760 return false; 3761 bool SrcIsKill = hasTrivialKill(Op); 3762 3763 // If we're truncating from i64 to a smaller non-legal type then generate an 3764 // AND. Otherwise, we know the high bits are undefined and a truncate only 3765 // generate a COPY. We cannot mark the source register also as result 3766 // register, because this can incorrectly transfer the kill flag onto the 3767 // source register. 3768 unsigned ResultReg; 3769 if (SrcVT == MVT::i64) { 3770 uint64_t Mask = 0; 3771 switch (DestVT.SimpleTy) { 3772 default: 3773 // Trunc i64 to i32 is handled by the target-independent fast-isel. 3774 return false; 3775 case MVT::i1: 3776 Mask = 0x1; 3777 break; 3778 case MVT::i8: 3779 Mask = 0xff; 3780 break; 3781 case MVT::i16: 3782 Mask = 0xffff; 3783 break; 3784 } 3785 // Issue an extract_subreg to get the lower 32-bits. 3786 unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, 3787 AArch64::sub_32); 3788 // Create the AND instruction which performs the actual truncation. 3789 ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask); 3790 assert(ResultReg && "Unexpected AND instruction emission failure."); 3791 } else { 3792 ResultReg = createResultReg(&AArch64::GPR32RegClass); 3793 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3794 TII.get(TargetOpcode::COPY), ResultReg) 3795 .addReg(SrcReg, getKillRegState(SrcIsKill)); 3796 } 3797 3798 updateValueMap(I, ResultReg); 3799 return true; 3800 } 3801 3802 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) { 3803 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || 3804 DestVT == MVT::i64) && 3805 "Unexpected value type."); 3806 // Handle i8 and i16 as i32. 3807 if (DestVT == MVT::i8 || DestVT == MVT::i16) 3808 DestVT = MVT::i32; 3809 3810 if (IsZExt) { 3811 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); 3812 assert(ResultReg && "Unexpected AND instruction emission failure."); 3813 if (DestVT == MVT::i64) { 3814 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the 3815 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. 3816 unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3817 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3818 TII.get(AArch64::SUBREG_TO_REG), Reg64) 3819 .addImm(0) 3820 .addReg(ResultReg) 3821 .addImm(AArch64::sub_32); 3822 ResultReg = Reg64; 3823 } 3824 return ResultReg; 3825 } else { 3826 if (DestVT == MVT::i64) { 3827 // FIXME: We're SExt i1 to i64. 3828 return 0; 3829 } 3830 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg, 3831 /*TODO:IsKill=*/false, 0, 0); 3832 } 3833 } 3834 3835 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 3836 unsigned Op1, bool Op1IsKill) { 3837 unsigned Opc, ZReg; 3838 switch (RetVT.SimpleTy) { 3839 default: return 0; 3840 case MVT::i8: 3841 case MVT::i16: 3842 case MVT::i32: 3843 RetVT = MVT::i32; 3844 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break; 3845 case MVT::i64: 3846 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break; 3847 } 3848 3849 const TargetRegisterClass *RC = 3850 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 3851 return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill, 3852 /*IsKill=*/ZReg, true); 3853 } 3854 3855 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 3856 unsigned Op1, bool Op1IsKill) { 3857 if (RetVT != MVT::i64) 3858 return 0; 3859 3860 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass, 3861 Op0, Op0IsKill, Op1, Op1IsKill, 3862 AArch64::XZR, /*IsKill=*/true); 3863 } 3864 3865 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 3866 unsigned Op1, bool Op1IsKill) { 3867 if (RetVT != MVT::i64) 3868 return 0; 3869 3870 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass, 3871 Op0, Op0IsKill, Op1, Op1IsKill, 3872 AArch64::XZR, /*IsKill=*/true); 3873 } 3874 3875 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 3876 unsigned Op1Reg, bool Op1IsKill) { 3877 unsigned Opc = 0; 3878 bool NeedTrunc = false; 3879 uint64_t Mask = 0; 3880 switch (RetVT.SimpleTy) { 3881 default: return 0; 3882 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break; 3883 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break; 3884 case MVT::i32: Opc = AArch64::LSLVWr; break; 3885 case MVT::i64: Opc = AArch64::LSLVXr; break; 3886 } 3887 3888 const TargetRegisterClass *RC = 3889 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 3890 if (NeedTrunc) { 3891 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 3892 Op1IsKill = true; 3893 } 3894 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 3895 Op1IsKill); 3896 if (NeedTrunc) 3897 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 3898 return ResultReg; 3899 } 3900 3901 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 3902 bool Op0IsKill, uint64_t Shift, 3903 bool IsZExt) { 3904 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 3905 "Unexpected source/return type pair."); 3906 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 3907 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 3908 "Unexpected source value type."); 3909 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 3910 RetVT == MVT::i64) && "Unexpected return value type."); 3911 3912 bool Is64Bit = (RetVT == MVT::i64); 3913 unsigned RegSize = Is64Bit ? 64 : 32; 3914 unsigned DstBits = RetVT.getSizeInBits(); 3915 unsigned SrcBits = SrcVT.getSizeInBits(); 3916 const TargetRegisterClass *RC = 3917 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 3918 3919 // Just emit a copy for "zero" shifts. 3920 if (Shift == 0) { 3921 if (RetVT == SrcVT) { 3922 unsigned ResultReg = createResultReg(RC); 3923 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3924 TII.get(TargetOpcode::COPY), ResultReg) 3925 .addReg(Op0, getKillRegState(Op0IsKill)); 3926 return ResultReg; 3927 } else 3928 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 3929 } 3930 3931 // Don't deal with undefined shifts. 3932 if (Shift >= DstBits) 3933 return 0; 3934 3935 // For immediate shifts we can fold the zero-/sign-extension into the shift. 3936 // {S|U}BFM Wd, Wn, #r, #s 3937 // Wd<32+s-r,32-r> = Wn<s:0> when r > s 3938 3939 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 3940 // %2 = shl i16 %1, 4 3941 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7 3942 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext 3943 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext 3944 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext 3945 3946 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 3947 // %2 = shl i16 %1, 8 3948 // Wd<32+7-24,32-24> = Wn<7:0> 3949 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext 3950 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext 3951 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext 3952 3953 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 3954 // %2 = shl i16 %1, 12 3955 // Wd<32+3-20,32-20> = Wn<3:0> 3956 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext 3957 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext 3958 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext 3959 3960 unsigned ImmR = RegSize - Shift; 3961 // Limit the width to the length of the source type. 3962 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift); 3963 static const unsigned OpcTable[2][2] = { 3964 {AArch64::SBFMWri, AArch64::SBFMXri}, 3965 {AArch64::UBFMWri, AArch64::UBFMXri} 3966 }; 3967 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 3968 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 3969 unsigned TmpReg = MRI.createVirtualRegister(RC); 3970 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3971 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 3972 .addImm(0) 3973 .addReg(Op0, getKillRegState(Op0IsKill)) 3974 .addImm(AArch64::sub_32); 3975 Op0 = TmpReg; 3976 Op0IsKill = true; 3977 } 3978 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 3979 } 3980 3981 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 3982 unsigned Op1Reg, bool Op1IsKill) { 3983 unsigned Opc = 0; 3984 bool NeedTrunc = false; 3985 uint64_t Mask = 0; 3986 switch (RetVT.SimpleTy) { 3987 default: return 0; 3988 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break; 3989 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break; 3990 case MVT::i32: Opc = AArch64::LSRVWr; break; 3991 case MVT::i64: Opc = AArch64::LSRVXr; break; 3992 } 3993 3994 const TargetRegisterClass *RC = 3995 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 3996 if (NeedTrunc) { 3997 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask); 3998 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 3999 Op0IsKill = Op1IsKill = true; 4000 } 4001 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 4002 Op1IsKill); 4003 if (NeedTrunc) 4004 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 4005 return ResultReg; 4006 } 4007 4008 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4009 bool Op0IsKill, uint64_t Shift, 4010 bool IsZExt) { 4011 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4012 "Unexpected source/return type pair."); 4013 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4014 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4015 "Unexpected source value type."); 4016 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4017 RetVT == MVT::i64) && "Unexpected return value type."); 4018 4019 bool Is64Bit = (RetVT == MVT::i64); 4020 unsigned RegSize = Is64Bit ? 64 : 32; 4021 unsigned DstBits = RetVT.getSizeInBits(); 4022 unsigned SrcBits = SrcVT.getSizeInBits(); 4023 const TargetRegisterClass *RC = 4024 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4025 4026 // Just emit a copy for "zero" shifts. 4027 if (Shift == 0) { 4028 if (RetVT == SrcVT) { 4029 unsigned ResultReg = createResultReg(RC); 4030 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4031 TII.get(TargetOpcode::COPY), ResultReg) 4032 .addReg(Op0, getKillRegState(Op0IsKill)); 4033 return ResultReg; 4034 } else 4035 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4036 } 4037 4038 // Don't deal with undefined shifts. 4039 if (Shift >= DstBits) 4040 return 0; 4041 4042 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4043 // {S|U}BFM Wd, Wn, #r, #s 4044 // Wd<s-r:0> = Wn<s:r> when r <= s 4045 4046 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4047 // %2 = lshr i16 %1, 4 4048 // Wd<7-4:0> = Wn<7:4> 4049 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext 4050 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4051 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4052 4053 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4054 // %2 = lshr i16 %1, 8 4055 // Wd<7-7,0> = Wn<7:7> 4056 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext 4057 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4058 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4059 4060 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4061 // %2 = lshr i16 %1, 12 4062 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4063 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext 4064 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4065 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4066 4067 if (Shift >= SrcBits && IsZExt) 4068 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4069 4070 // It is not possible to fold a sign-extend into the LShr instruction. In this 4071 // case emit a sign-extend. 4072 if (!IsZExt) { 4073 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4074 if (!Op0) 4075 return 0; 4076 Op0IsKill = true; 4077 SrcVT = RetVT; 4078 SrcBits = SrcVT.getSizeInBits(); 4079 IsZExt = true; 4080 } 4081 4082 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4083 unsigned ImmS = SrcBits - 1; 4084 static const unsigned OpcTable[2][2] = { 4085 {AArch64::SBFMWri, AArch64::SBFMXri}, 4086 {AArch64::UBFMWri, AArch64::UBFMXri} 4087 }; 4088 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4089 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4090 unsigned TmpReg = MRI.createVirtualRegister(RC); 4091 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4092 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4093 .addImm(0) 4094 .addReg(Op0, getKillRegState(Op0IsKill)) 4095 .addImm(AArch64::sub_32); 4096 Op0 = TmpReg; 4097 Op0IsKill = true; 4098 } 4099 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 4100 } 4101 4102 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 4103 unsigned Op1Reg, bool Op1IsKill) { 4104 unsigned Opc = 0; 4105 bool NeedTrunc = false; 4106 uint64_t Mask = 0; 4107 switch (RetVT.SimpleTy) { 4108 default: return 0; 4109 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break; 4110 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break; 4111 case MVT::i32: Opc = AArch64::ASRVWr; break; 4112 case MVT::i64: Opc = AArch64::ASRVXr; break; 4113 } 4114 4115 const TargetRegisterClass *RC = 4116 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4117 if (NeedTrunc) { 4118 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false); 4119 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 4120 Op0IsKill = Op1IsKill = true; 4121 } 4122 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 4123 Op1IsKill); 4124 if (NeedTrunc) 4125 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 4126 return ResultReg; 4127 } 4128 4129 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4130 bool Op0IsKill, uint64_t Shift, 4131 bool IsZExt) { 4132 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4133 "Unexpected source/return type pair."); 4134 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4135 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4136 "Unexpected source value type."); 4137 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4138 RetVT == MVT::i64) && "Unexpected return value type."); 4139 4140 bool Is64Bit = (RetVT == MVT::i64); 4141 unsigned RegSize = Is64Bit ? 64 : 32; 4142 unsigned DstBits = RetVT.getSizeInBits(); 4143 unsigned SrcBits = SrcVT.getSizeInBits(); 4144 const TargetRegisterClass *RC = 4145 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4146 4147 // Just emit a copy for "zero" shifts. 4148 if (Shift == 0) { 4149 if (RetVT == SrcVT) { 4150 unsigned ResultReg = createResultReg(RC); 4151 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4152 TII.get(TargetOpcode::COPY), ResultReg) 4153 .addReg(Op0, getKillRegState(Op0IsKill)); 4154 return ResultReg; 4155 } else 4156 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4157 } 4158 4159 // Don't deal with undefined shifts. 4160 if (Shift >= DstBits) 4161 return 0; 4162 4163 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4164 // {S|U}BFM Wd, Wn, #r, #s 4165 // Wd<s-r:0> = Wn<s:r> when r <= s 4166 4167 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4168 // %2 = ashr i16 %1, 4 4169 // Wd<7-4:0> = Wn<7:4> 4170 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext 4171 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4172 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4173 4174 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4175 // %2 = ashr i16 %1, 8 4176 // Wd<7-7,0> = Wn<7:7> 4177 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4178 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4179 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4180 4181 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4182 // %2 = ashr i16 %1, 12 4183 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4184 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4185 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4186 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4187 4188 if (Shift >= SrcBits && IsZExt) 4189 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4190 4191 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4192 unsigned ImmS = SrcBits - 1; 4193 static const unsigned OpcTable[2][2] = { 4194 {AArch64::SBFMWri, AArch64::SBFMXri}, 4195 {AArch64::UBFMWri, AArch64::UBFMXri} 4196 }; 4197 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4198 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4199 unsigned TmpReg = MRI.createVirtualRegister(RC); 4200 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4201 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4202 .addImm(0) 4203 .addReg(Op0, getKillRegState(Op0IsKill)) 4204 .addImm(AArch64::sub_32); 4205 Op0 = TmpReg; 4206 Op0IsKill = true; 4207 } 4208 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 4209 } 4210 4211 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, 4212 bool IsZExt) { 4213 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); 4214 4215 // FastISel does not have plumbing to deal with extensions where the SrcVT or 4216 // DestVT are odd things, so test to make sure that they are both types we can 4217 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise 4218 // bail out to SelectionDAG. 4219 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && 4220 (DestVT != MVT::i32) && (DestVT != MVT::i64)) || 4221 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && 4222 (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) 4223 return 0; 4224 4225 unsigned Opc; 4226 unsigned Imm = 0; 4227 4228 switch (SrcVT.SimpleTy) { 4229 default: 4230 return 0; 4231 case MVT::i1: 4232 return emiti1Ext(SrcReg, DestVT, IsZExt); 4233 case MVT::i8: 4234 if (DestVT == MVT::i64) 4235 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4236 else 4237 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4238 Imm = 7; 4239 break; 4240 case MVT::i16: 4241 if (DestVT == MVT::i64) 4242 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4243 else 4244 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4245 Imm = 15; 4246 break; 4247 case MVT::i32: 4248 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); 4249 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4250 Imm = 31; 4251 break; 4252 } 4253 4254 // Handle i8 and i16 as i32. 4255 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4256 DestVT = MVT::i32; 4257 else if (DestVT == MVT::i64) { 4258 unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4259 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4260 TII.get(AArch64::SUBREG_TO_REG), Src64) 4261 .addImm(0) 4262 .addReg(SrcReg) 4263 .addImm(AArch64::sub_32); 4264 SrcReg = Src64; 4265 } 4266 4267 const TargetRegisterClass *RC = 4268 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4269 return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm); 4270 } 4271 4272 static bool isZExtLoad(const MachineInstr *LI) { 4273 switch (LI->getOpcode()) { 4274 default: 4275 return false; 4276 case AArch64::LDURBBi: 4277 case AArch64::LDURHHi: 4278 case AArch64::LDURWi: 4279 case AArch64::LDRBBui: 4280 case AArch64::LDRHHui: 4281 case AArch64::LDRWui: 4282 case AArch64::LDRBBroX: 4283 case AArch64::LDRHHroX: 4284 case AArch64::LDRWroX: 4285 case AArch64::LDRBBroW: 4286 case AArch64::LDRHHroW: 4287 case AArch64::LDRWroW: 4288 return true; 4289 } 4290 } 4291 4292 static bool isSExtLoad(const MachineInstr *LI) { 4293 switch (LI->getOpcode()) { 4294 default: 4295 return false; 4296 case AArch64::LDURSBWi: 4297 case AArch64::LDURSHWi: 4298 case AArch64::LDURSBXi: 4299 case AArch64::LDURSHXi: 4300 case AArch64::LDURSWi: 4301 case AArch64::LDRSBWui: 4302 case AArch64::LDRSHWui: 4303 case AArch64::LDRSBXui: 4304 case AArch64::LDRSHXui: 4305 case AArch64::LDRSWui: 4306 case AArch64::LDRSBWroX: 4307 case AArch64::LDRSHWroX: 4308 case AArch64::LDRSBXroX: 4309 case AArch64::LDRSHXroX: 4310 case AArch64::LDRSWroX: 4311 case AArch64::LDRSBWroW: 4312 case AArch64::LDRSHWroW: 4313 case AArch64::LDRSBXroW: 4314 case AArch64::LDRSHXroW: 4315 case AArch64::LDRSWroW: 4316 return true; 4317 } 4318 } 4319 4320 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, 4321 MVT SrcVT) { 4322 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)); 4323 if (!LI || !LI->hasOneUse()) 4324 return false; 4325 4326 // Check if the load instruction has already been selected. 4327 unsigned Reg = lookUpRegForValue(LI); 4328 if (!Reg) 4329 return false; 4330 4331 MachineInstr *MI = MRI.getUniqueVRegDef(Reg); 4332 if (!MI) 4333 return false; 4334 4335 // Check if the correct load instruction has been emitted - SelectionDAG might 4336 // have emitted a zero-extending load, but we need a sign-extending load. 4337 bool IsZExt = isa<ZExtInst>(I); 4338 const auto *LoadMI = MI; 4339 if (LoadMI->getOpcode() == TargetOpcode::COPY && 4340 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) { 4341 unsigned LoadReg = MI->getOperand(1).getReg(); 4342 LoadMI = MRI.getUniqueVRegDef(LoadReg); 4343 assert(LoadMI && "Expected valid instruction"); 4344 } 4345 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI))) 4346 return false; 4347 4348 // Nothing to be done. 4349 if (RetVT != MVT::i64 || SrcVT > MVT::i32) { 4350 updateValueMap(I, Reg); 4351 return true; 4352 } 4353 4354 if (IsZExt) { 4355 unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass); 4356 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4357 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4358 .addImm(0) 4359 .addReg(Reg, getKillRegState(true)) 4360 .addImm(AArch64::sub_32); 4361 Reg = Reg64; 4362 } else { 4363 assert((MI->getOpcode() == TargetOpcode::COPY && 4364 MI->getOperand(1).getSubReg() == AArch64::sub_32) && 4365 "Expected copy instruction"); 4366 Reg = MI->getOperand(1).getReg(); 4367 MI->eraseFromParent(); 4368 } 4369 updateValueMap(I, Reg); 4370 return true; 4371 } 4372 4373 bool AArch64FastISel::selectIntExt(const Instruction *I) { 4374 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 4375 "Unexpected integer extend instruction."); 4376 MVT RetVT; 4377 MVT SrcVT; 4378 if (!isTypeSupported(I->getType(), RetVT)) 4379 return false; 4380 4381 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT)) 4382 return false; 4383 4384 // Try to optimize already sign-/zero-extended values from load instructions. 4385 if (optimizeIntExtLoad(I, RetVT, SrcVT)) 4386 return true; 4387 4388 unsigned SrcReg = getRegForValue(I->getOperand(0)); 4389 if (!SrcReg) 4390 return false; 4391 bool SrcIsKill = hasTrivialKill(I->getOperand(0)); 4392 4393 // Try to optimize already sign-/zero-extended values from function arguments. 4394 bool IsZExt = isa<ZExtInst>(I); 4395 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) { 4396 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { 4397 if (RetVT == MVT::i64 && SrcVT != MVT::i64) { 4398 unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass); 4399 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4400 TII.get(AArch64::SUBREG_TO_REG), ResultReg) 4401 .addImm(0) 4402 .addReg(SrcReg, getKillRegState(SrcIsKill)) 4403 .addImm(AArch64::sub_32); 4404 SrcReg = ResultReg; 4405 } 4406 // Conservatively clear all kill flags from all uses, because we are 4407 // replacing a sign-/zero-extend instruction at IR level with a nop at MI 4408 // level. The result of the instruction at IR level might have been 4409 // trivially dead, which is now not longer true. 4410 unsigned UseReg = lookUpRegForValue(I); 4411 if (UseReg) 4412 MRI.clearKillFlags(UseReg); 4413 4414 updateValueMap(I, SrcReg); 4415 return true; 4416 } 4417 } 4418 4419 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt); 4420 if (!ResultReg) 4421 return false; 4422 4423 updateValueMap(I, ResultReg); 4424 return true; 4425 } 4426 4427 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { 4428 EVT DestEVT = TLI.getValueType(DL, I->getType(), true); 4429 if (!DestEVT.isSimple()) 4430 return false; 4431 4432 MVT DestVT = DestEVT.getSimpleVT(); 4433 if (DestVT != MVT::i64 && DestVT != MVT::i32) 4434 return false; 4435 4436 unsigned DivOpc; 4437 bool Is64bit = (DestVT == MVT::i64); 4438 switch (ISDOpcode) { 4439 default: 4440 return false; 4441 case ISD::SREM: 4442 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr; 4443 break; 4444 case ISD::UREM: 4445 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr; 4446 break; 4447 } 4448 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr; 4449 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 4450 if (!Src0Reg) 4451 return false; 4452 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 4453 4454 unsigned Src1Reg = getRegForValue(I->getOperand(1)); 4455 if (!Src1Reg) 4456 return false; 4457 bool Src1IsKill = hasTrivialKill(I->getOperand(1)); 4458 4459 const TargetRegisterClass *RC = 4460 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4461 unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false, 4462 Src1Reg, /*IsKill=*/false); 4463 assert(QuotReg && "Unexpected DIV instruction emission failure."); 4464 // The remainder is computed as numerator - (quotient * denominator) using the 4465 // MSUB instruction. 4466 unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true, 4467 Src1Reg, Src1IsKill, Src0Reg, 4468 Src0IsKill); 4469 updateValueMap(I, ResultReg); 4470 return true; 4471 } 4472 4473 bool AArch64FastISel::selectMul(const Instruction *I) { 4474 MVT VT; 4475 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 4476 return false; 4477 4478 if (VT.isVector()) 4479 return selectBinaryOp(I, ISD::MUL); 4480 4481 const Value *Src0 = I->getOperand(0); 4482 const Value *Src1 = I->getOperand(1); 4483 if (const auto *C = dyn_cast<ConstantInt>(Src0)) 4484 if (C->getValue().isPowerOf2()) 4485 std::swap(Src0, Src1); 4486 4487 // Try to simplify to a shift instruction. 4488 if (const auto *C = dyn_cast<ConstantInt>(Src1)) 4489 if (C->getValue().isPowerOf2()) { 4490 uint64_t ShiftVal = C->getValue().logBase2(); 4491 MVT SrcVT = VT; 4492 bool IsZExt = true; 4493 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) { 4494 if (!isIntExtFree(ZExt)) { 4495 MVT VT; 4496 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) { 4497 SrcVT = VT; 4498 IsZExt = true; 4499 Src0 = ZExt->getOperand(0); 4500 } 4501 } 4502 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) { 4503 if (!isIntExtFree(SExt)) { 4504 MVT VT; 4505 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) { 4506 SrcVT = VT; 4507 IsZExt = false; 4508 Src0 = SExt->getOperand(0); 4509 } 4510 } 4511 } 4512 4513 unsigned Src0Reg = getRegForValue(Src0); 4514 if (!Src0Reg) 4515 return false; 4516 bool Src0IsKill = hasTrivialKill(Src0); 4517 4518 unsigned ResultReg = 4519 emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt); 4520 4521 if (ResultReg) { 4522 updateValueMap(I, ResultReg); 4523 return true; 4524 } 4525 } 4526 4527 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 4528 if (!Src0Reg) 4529 return false; 4530 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 4531 4532 unsigned Src1Reg = getRegForValue(I->getOperand(1)); 4533 if (!Src1Reg) 4534 return false; 4535 bool Src1IsKill = hasTrivialKill(I->getOperand(1)); 4536 4537 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill); 4538 4539 if (!ResultReg) 4540 return false; 4541 4542 updateValueMap(I, ResultReg); 4543 return true; 4544 } 4545 4546 bool AArch64FastISel::selectShift(const Instruction *I) { 4547 MVT RetVT; 4548 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true)) 4549 return false; 4550 4551 if (RetVT.isVector()) 4552 return selectOperator(I, I->getOpcode()); 4553 4554 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) { 4555 unsigned ResultReg = 0; 4556 uint64_t ShiftVal = C->getZExtValue(); 4557 MVT SrcVT = RetVT; 4558 bool IsZExt = I->getOpcode() != Instruction::AShr; 4559 const Value *Op0 = I->getOperand(0); 4560 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) { 4561 if (!isIntExtFree(ZExt)) { 4562 MVT TmpVT; 4563 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) { 4564 SrcVT = TmpVT; 4565 IsZExt = true; 4566 Op0 = ZExt->getOperand(0); 4567 } 4568 } 4569 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) { 4570 if (!isIntExtFree(SExt)) { 4571 MVT TmpVT; 4572 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) { 4573 SrcVT = TmpVT; 4574 IsZExt = false; 4575 Op0 = SExt->getOperand(0); 4576 } 4577 } 4578 } 4579 4580 unsigned Op0Reg = getRegForValue(Op0); 4581 if (!Op0Reg) 4582 return false; 4583 bool Op0IsKill = hasTrivialKill(Op0); 4584 4585 switch (I->getOpcode()) { 4586 default: llvm_unreachable("Unexpected instruction."); 4587 case Instruction::Shl: 4588 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 4589 break; 4590 case Instruction::AShr: 4591 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 4592 break; 4593 case Instruction::LShr: 4594 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 4595 break; 4596 } 4597 if (!ResultReg) 4598 return false; 4599 4600 updateValueMap(I, ResultReg); 4601 return true; 4602 } 4603 4604 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 4605 if (!Op0Reg) 4606 return false; 4607 bool Op0IsKill = hasTrivialKill(I->getOperand(0)); 4608 4609 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 4610 if (!Op1Reg) 4611 return false; 4612 bool Op1IsKill = hasTrivialKill(I->getOperand(1)); 4613 4614 unsigned ResultReg = 0; 4615 switch (I->getOpcode()) { 4616 default: llvm_unreachable("Unexpected instruction."); 4617 case Instruction::Shl: 4618 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 4619 break; 4620 case Instruction::AShr: 4621 ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 4622 break; 4623 case Instruction::LShr: 4624 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 4625 break; 4626 } 4627 4628 if (!ResultReg) 4629 return false; 4630 4631 updateValueMap(I, ResultReg); 4632 return true; 4633 } 4634 4635 bool AArch64FastISel::selectBitCast(const Instruction *I) { 4636 MVT RetVT, SrcVT; 4637 4638 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT)) 4639 return false; 4640 if (!isTypeLegal(I->getType(), RetVT)) 4641 return false; 4642 4643 unsigned Opc; 4644 if (RetVT == MVT::f32 && SrcVT == MVT::i32) 4645 Opc = AArch64::FMOVWSr; 4646 else if (RetVT == MVT::f64 && SrcVT == MVT::i64) 4647 Opc = AArch64::FMOVXDr; 4648 else if (RetVT == MVT::i32 && SrcVT == MVT::f32) 4649 Opc = AArch64::FMOVSWr; 4650 else if (RetVT == MVT::i64 && SrcVT == MVT::f64) 4651 Opc = AArch64::FMOVDXr; 4652 else 4653 return false; 4654 4655 const TargetRegisterClass *RC = nullptr; 4656 switch (RetVT.SimpleTy) { 4657 default: llvm_unreachable("Unexpected value type."); 4658 case MVT::i32: RC = &AArch64::GPR32RegClass; break; 4659 case MVT::i64: RC = &AArch64::GPR64RegClass; break; 4660 case MVT::f32: RC = &AArch64::FPR32RegClass; break; 4661 case MVT::f64: RC = &AArch64::FPR64RegClass; break; 4662 } 4663 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 4664 if (!Op0Reg) 4665 return false; 4666 bool Op0IsKill = hasTrivialKill(I->getOperand(0)); 4667 unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill); 4668 4669 if (!ResultReg) 4670 return false; 4671 4672 updateValueMap(I, ResultReg); 4673 return true; 4674 } 4675 4676 bool AArch64FastISel::selectFRem(const Instruction *I) { 4677 MVT RetVT; 4678 if (!isTypeLegal(I->getType(), RetVT)) 4679 return false; 4680 4681 RTLIB::Libcall LC; 4682 switch (RetVT.SimpleTy) { 4683 default: 4684 return false; 4685 case MVT::f32: 4686 LC = RTLIB::REM_F32; 4687 break; 4688 case MVT::f64: 4689 LC = RTLIB::REM_F64; 4690 break; 4691 } 4692 4693 ArgListTy Args; 4694 Args.reserve(I->getNumOperands()); 4695 4696 // Populate the argument list. 4697 for (auto &Arg : I->operands()) { 4698 ArgListEntry Entry; 4699 Entry.Val = Arg; 4700 Entry.Ty = Arg->getType(); 4701 Args.push_back(Entry); 4702 } 4703 4704 CallLoweringInfo CLI; 4705 MCContext &Ctx = MF->getContext(); 4706 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(), 4707 TLI.getLibcallName(LC), std::move(Args)); 4708 if (!lowerCallTo(CLI)) 4709 return false; 4710 updateValueMap(I, CLI.ResultReg); 4711 return true; 4712 } 4713 4714 bool AArch64FastISel::selectSDiv(const Instruction *I) { 4715 MVT VT; 4716 if (!isTypeLegal(I->getType(), VT)) 4717 return false; 4718 4719 if (!isa<ConstantInt>(I->getOperand(1))) 4720 return selectBinaryOp(I, ISD::SDIV); 4721 4722 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue(); 4723 if ((VT != MVT::i32 && VT != MVT::i64) || !C || 4724 !(C.isPowerOf2() || (-C).isPowerOf2())) 4725 return selectBinaryOp(I, ISD::SDIV); 4726 4727 unsigned Lg2 = C.countTrailingZeros(); 4728 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 4729 if (!Src0Reg) 4730 return false; 4731 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 4732 4733 if (cast<BinaryOperator>(I)->isExact()) { 4734 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2); 4735 if (!ResultReg) 4736 return false; 4737 updateValueMap(I, ResultReg); 4738 return true; 4739 } 4740 4741 int64_t Pow2MinusOne = (1ULL << Lg2) - 1; 4742 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne); 4743 if (!AddReg) 4744 return false; 4745 4746 // (Src0 < 0) ? Pow2 - 1 : 0; 4747 if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0)) 4748 return false; 4749 4750 unsigned SelectOpc; 4751 const TargetRegisterClass *RC; 4752 if (VT == MVT::i64) { 4753 SelectOpc = AArch64::CSELXr; 4754 RC = &AArch64::GPR64RegClass; 4755 } else { 4756 SelectOpc = AArch64::CSELWr; 4757 RC = &AArch64::GPR32RegClass; 4758 } 4759 unsigned SelectReg = 4760 fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg, 4761 Src0IsKill, AArch64CC::LT); 4762 if (!SelectReg) 4763 return false; 4764 4765 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also 4766 // negate the result. 4767 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 4768 unsigned ResultReg; 4769 if (C.isNegative()) 4770 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true, 4771 SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2); 4772 else 4773 ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2); 4774 4775 if (!ResultReg) 4776 return false; 4777 4778 updateValueMap(I, ResultReg); 4779 return true; 4780 } 4781 4782 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We 4783 /// have to duplicate it for AArch64, because otherwise we would fail during the 4784 /// sign-extend emission. 4785 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) { 4786 unsigned IdxN = getRegForValue(Idx); 4787 if (IdxN == 0) 4788 // Unhandled operand. Halt "fast" selection and bail. 4789 return std::pair<unsigned, bool>(0, false); 4790 4791 bool IdxNIsKill = hasTrivialKill(Idx); 4792 4793 // If the index is smaller or larger than intptr_t, truncate or extend it. 4794 MVT PtrVT = TLI.getPointerTy(DL); 4795 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); 4796 if (IdxVT.bitsLT(PtrVT)) { 4797 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false); 4798 IdxNIsKill = true; 4799 } else if (IdxVT.bitsGT(PtrVT)) 4800 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64"); 4801 return std::pair<unsigned, bool>(IdxN, IdxNIsKill); 4802 } 4803 4804 /// This is mostly a copy of the existing FastISel GEP code, but we have to 4805 /// duplicate it for AArch64, because otherwise we would bail out even for 4806 /// simple cases. This is because the standard fastEmit functions don't cover 4807 /// MUL at all and ADD is lowered very inefficientily. 4808 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { 4809 unsigned N = getRegForValue(I->getOperand(0)); 4810 if (!N) 4811 return false; 4812 bool NIsKill = hasTrivialKill(I->getOperand(0)); 4813 4814 // Keep a running tab of the total offset to coalesce multiple N = N + Offset 4815 // into a single N = N + TotalOffset. 4816 uint64_t TotalOffs = 0; 4817 Type *Ty = I->getOperand(0)->getType(); 4818 MVT VT = TLI.getPointerTy(DL); 4819 for (auto OI = std::next(I->op_begin()), E = I->op_end(); OI != E; ++OI) { 4820 const Value *Idx = *OI; 4821 if (auto *StTy = dyn_cast<StructType>(Ty)) { 4822 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); 4823 // N = N + Offset 4824 if (Field) 4825 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); 4826 Ty = StTy->getElementType(Field); 4827 } else { 4828 Ty = cast<SequentialType>(Ty)->getElementType(); 4829 // If this is a constant subscript, handle it quickly. 4830 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { 4831 if (CI->isZero()) 4832 continue; 4833 // N = N + Offset 4834 TotalOffs += 4835 DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue(); 4836 continue; 4837 } 4838 if (TotalOffs) { 4839 N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); 4840 if (!N) 4841 return false; 4842 NIsKill = true; 4843 TotalOffs = 0; 4844 } 4845 4846 // N = N + Idx * ElementSize; 4847 uint64_t ElementSize = DL.getTypeAllocSize(Ty); 4848 std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); 4849 unsigned IdxN = Pair.first; 4850 bool IdxNIsKill = Pair.second; 4851 if (!IdxN) 4852 return false; 4853 4854 if (ElementSize != 1) { 4855 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize); 4856 if (!C) 4857 return false; 4858 IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true); 4859 if (!IdxN) 4860 return false; 4861 IdxNIsKill = true; 4862 } 4863 N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); 4864 if (!N) 4865 return false; 4866 } 4867 } 4868 if (TotalOffs) { 4869 N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); 4870 if (!N) 4871 return false; 4872 } 4873 updateValueMap(I, N); 4874 return true; 4875 } 4876 4877 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { 4878 switch (I->getOpcode()) { 4879 default: 4880 break; 4881 case Instruction::Add: 4882 case Instruction::Sub: 4883 return selectAddSub(I); 4884 case Instruction::Mul: 4885 return selectMul(I); 4886 case Instruction::SDiv: 4887 return selectSDiv(I); 4888 case Instruction::SRem: 4889 if (!selectBinaryOp(I, ISD::SREM)) 4890 return selectRem(I, ISD::SREM); 4891 return true; 4892 case Instruction::URem: 4893 if (!selectBinaryOp(I, ISD::UREM)) 4894 return selectRem(I, ISD::UREM); 4895 return true; 4896 case Instruction::Shl: 4897 case Instruction::LShr: 4898 case Instruction::AShr: 4899 return selectShift(I); 4900 case Instruction::And: 4901 case Instruction::Or: 4902 case Instruction::Xor: 4903 return selectLogicalOp(I); 4904 case Instruction::Br: 4905 return selectBranch(I); 4906 case Instruction::IndirectBr: 4907 return selectIndirectBr(I); 4908 case Instruction::BitCast: 4909 if (!FastISel::selectBitCast(I)) 4910 return selectBitCast(I); 4911 return true; 4912 case Instruction::FPToSI: 4913 if (!selectCast(I, ISD::FP_TO_SINT)) 4914 return selectFPToInt(I, /*Signed=*/true); 4915 return true; 4916 case Instruction::FPToUI: 4917 return selectFPToInt(I, /*Signed=*/false); 4918 case Instruction::ZExt: 4919 case Instruction::SExt: 4920 return selectIntExt(I); 4921 case Instruction::Trunc: 4922 if (!selectCast(I, ISD::TRUNCATE)) 4923 return selectTrunc(I); 4924 return true; 4925 case Instruction::FPExt: 4926 return selectFPExt(I); 4927 case Instruction::FPTrunc: 4928 return selectFPTrunc(I); 4929 case Instruction::SIToFP: 4930 if (!selectCast(I, ISD::SINT_TO_FP)) 4931 return selectIntToFP(I, /*Signed=*/true); 4932 return true; 4933 case Instruction::UIToFP: 4934 return selectIntToFP(I, /*Signed=*/false); 4935 case Instruction::Load: 4936 return selectLoad(I); 4937 case Instruction::Store: 4938 return selectStore(I); 4939 case Instruction::FCmp: 4940 case Instruction::ICmp: 4941 return selectCmp(I); 4942 case Instruction::Select: 4943 return selectSelect(I); 4944 case Instruction::Ret: 4945 return selectRet(I); 4946 case Instruction::FRem: 4947 return selectFRem(I); 4948 case Instruction::GetElementPtr: 4949 return selectGetElementPtr(I); 4950 } 4951 4952 // fall-back to target-independent instruction selection. 4953 return selectOperator(I, I->getOpcode()); 4954 // Silence warnings. 4955 (void)&CC_AArch64_DarwinPCS_VarArg; 4956 } 4957 4958 namespace llvm { 4959 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, 4960 const TargetLibraryInfo *LibInfo) { 4961 return new AArch64FastISel(FuncInfo, LibInfo); 4962 } 4963 } 4964