1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the AArch64-specific support for the FastISel class. Some 11 // of the target-specific code is generated by tablegen in the file 12 // AArch64GenFastISel.inc, which is #included here. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AArch64.h" 17 #include "AArch64TargetMachine.h" 18 #include "AArch64Subtarget.h" 19 #include "MCTargetDesc/AArch64AddressingModes.h" 20 #include "llvm/CodeGen/CallingConvLower.h" 21 #include "llvm/CodeGen/FastISel.h" 22 #include "llvm/CodeGen/FunctionLoweringInfo.h" 23 #include "llvm/CodeGen/MachineConstantPool.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineInstrBuilder.h" 26 #include "llvm/CodeGen/MachineRegisterInfo.h" 27 #include "llvm/IR/CallingConv.h" 28 #include "llvm/IR/DataLayout.h" 29 #include "llvm/IR/DerivedTypes.h" 30 #include "llvm/IR/Function.h" 31 #include "llvm/IR/GetElementPtrTypeIterator.h" 32 #include "llvm/IR/GlobalAlias.h" 33 #include "llvm/IR/GlobalVariable.h" 34 #include "llvm/IR/Instructions.h" 35 #include "llvm/IR/IntrinsicInst.h" 36 #include "llvm/IR/Operator.h" 37 #include "llvm/Support/CommandLine.h" 38 using namespace llvm; 39 40 namespace { 41 42 class AArch64FastISel : public FastISel { 43 44 class Address { 45 public: 46 typedef enum { 47 RegBase, 48 FrameIndexBase 49 } BaseKind; 50 51 private: 52 BaseKind Kind; 53 union { 54 unsigned Reg; 55 int FI; 56 } Base; 57 int64_t Offset; 58 59 public: 60 Address() : Kind(RegBase), Offset(0) { Base.Reg = 0; } 61 void setKind(BaseKind K) { Kind = K; } 62 BaseKind getKind() const { return Kind; } 63 bool isRegBase() const { return Kind == RegBase; } 64 bool isFIBase() const { return Kind == FrameIndexBase; } 65 void setReg(unsigned Reg) { 66 assert(isRegBase() && "Invalid base register access!"); 67 Base.Reg = Reg; 68 } 69 unsigned getReg() const { 70 assert(isRegBase() && "Invalid base register access!"); 71 return Base.Reg; 72 } 73 void setFI(unsigned FI) { 74 assert(isFIBase() && "Invalid base frame index access!"); 75 Base.FI = FI; 76 } 77 unsigned getFI() const { 78 assert(isFIBase() && "Invalid base frame index access!"); 79 return Base.FI; 80 } 81 void setOffset(int64_t O) { Offset = O; } 82 int64_t getOffset() { return Offset; } 83 84 bool isValid() { return isFIBase() || (isRegBase() && getReg() != 0); } 85 }; 86 87 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 88 /// make the right decision when generating code for different targets. 89 const AArch64Subtarget *Subtarget; 90 LLVMContext *Context; 91 92 private: 93 // Selection routines. 94 bool SelectLoad(const Instruction *I); 95 bool SelectStore(const Instruction *I); 96 bool SelectBranch(const Instruction *I); 97 bool SelectIndirectBr(const Instruction *I); 98 bool SelectCmp(const Instruction *I); 99 bool SelectSelect(const Instruction *I); 100 bool SelectFPExt(const Instruction *I); 101 bool SelectFPTrunc(const Instruction *I); 102 bool SelectFPToInt(const Instruction *I, bool Signed); 103 bool SelectIntToFP(const Instruction *I, bool Signed); 104 bool SelectRem(const Instruction *I, unsigned ISDOpcode); 105 bool SelectCall(const Instruction *I, const char *IntrMemName); 106 bool SelectIntrinsicCall(const IntrinsicInst &I); 107 bool SelectRet(const Instruction *I); 108 bool SelectTrunc(const Instruction *I); 109 bool SelectIntExt(const Instruction *I); 110 bool SelectMul(const Instruction *I); 111 112 // Utility helper routines. 113 bool isTypeLegal(Type *Ty, MVT &VT); 114 bool isLoadStoreTypeLegal(Type *Ty, MVT &VT); 115 bool ComputeAddress(const Value *Obj, Address &Addr); 116 bool SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor, 117 bool UseUnscaled); 118 void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, 119 unsigned Flags, bool UseUnscaled); 120 bool IsMemCpySmall(uint64_t Len, unsigned Alignment); 121 bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, 122 unsigned Alignment); 123 // Emit functions. 124 bool EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt); 125 bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr, 126 bool UseUnscaled = false); 127 bool EmitStore(MVT VT, unsigned SrcReg, Address Addr, 128 bool UseUnscaled = false); 129 unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); 130 unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); 131 132 unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT); 133 unsigned AArch64MaterializeGV(const GlobalValue *GV); 134 135 // Call handling routines. 136 private: 137 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; 138 bool ProcessCallArgs(SmallVectorImpl<Value *> &Args, 139 SmallVectorImpl<unsigned> &ArgRegs, 140 SmallVectorImpl<MVT> &ArgVTs, 141 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, 142 SmallVectorImpl<unsigned> &RegArgs, CallingConv::ID CC, 143 unsigned &NumBytes); 144 bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, 145 const Instruction *I, CallingConv::ID CC, unsigned &NumBytes); 146 147 public: 148 // Backend specific FastISel code. 149 unsigned TargetMaterializeAlloca(const AllocaInst *AI) override; 150 unsigned TargetMaterializeConstant(const Constant *C) override; 151 152 explicit AArch64FastISel(FunctionLoweringInfo &funcInfo, 153 const TargetLibraryInfo *libInfo) 154 : FastISel(funcInfo, libInfo) { 155 Subtarget = &TM.getSubtarget<AArch64Subtarget>(); 156 Context = &funcInfo.Fn->getContext(); 157 } 158 159 bool TargetSelectInstruction(const Instruction *I) override; 160 161 #include "AArch64GenFastISel.inc" 162 }; 163 164 } // end anonymous namespace 165 166 #include "AArch64GenCallingConv.inc" 167 168 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { 169 if (CC == CallingConv::WebKit_JS) 170 return CC_AArch64_WebKit_JS; 171 return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS; 172 } 173 174 unsigned AArch64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) { 175 assert(TLI.getValueType(AI->getType(), true) == MVT::i64 && 176 "Alloca should always return a pointer."); 177 178 // Don't handle dynamic allocas. 179 if (!FuncInfo.StaticAllocaMap.count(AI)) 180 return 0; 181 182 DenseMap<const AllocaInst *, int>::iterator SI = 183 FuncInfo.StaticAllocaMap.find(AI); 184 185 if (SI != FuncInfo.StaticAllocaMap.end()) { 186 unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass); 187 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 188 ResultReg) 189 .addFrameIndex(SI->second) 190 .addImm(0) 191 .addImm(0); 192 return ResultReg; 193 } 194 195 return 0; 196 } 197 198 unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) { 199 if (VT != MVT::f32 && VT != MVT::f64) 200 return 0; 201 202 const APFloat Val = CFP->getValueAPF(); 203 bool is64bit = (VT == MVT::f64); 204 205 // This checks to see if we can use FMOV instructions to materialize 206 // a constant, otherwise we have to materialize via the constant pool. 207 if (TLI.isFPImmLegal(Val, VT)) { 208 int Imm; 209 unsigned Opc; 210 if (is64bit) { 211 Imm = AArch64_AM::getFP64Imm(Val); 212 Opc = AArch64::FMOVDi; 213 } else { 214 Imm = AArch64_AM::getFP32Imm(Val); 215 Opc = AArch64::FMOVSi; 216 } 217 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 218 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 219 .addImm(Imm); 220 return ResultReg; 221 } 222 223 // Materialize via constant pool. MachineConstantPool wants an explicit 224 // alignment. 225 unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); 226 if (Align == 0) 227 Align = DL.getTypeAllocSize(CFP->getType()); 228 229 unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); 230 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 231 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 232 ADRPReg).addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGE); 233 234 unsigned Opc = is64bit ? AArch64::LDRDui : AArch64::LDRSui; 235 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 236 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 237 .addReg(ADRPReg) 238 .addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 239 return ResultReg; 240 } 241 242 unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) { 243 // We can't handle thread-local variables quickly yet. 244 if (GV->isThreadLocal()) 245 return 0; 246 247 // MachO still uses GOT for large code-model accesses, but ELF requires 248 // movz/movk sequences, which FastISel doesn't handle yet. 249 if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO()) 250 return 0; 251 252 unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); 253 254 EVT DestEVT = TLI.getValueType(GV->getType(), true); 255 if (!DestEVT.isSimple()) 256 return 0; 257 258 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 259 unsigned ResultReg; 260 261 if (OpFlags & AArch64II::MO_GOT) { 262 // ADRP + LDRX 263 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 264 ADRPReg) 265 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE); 266 267 ResultReg = createResultReg(&AArch64::GPR64RegClass); 268 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui), 269 ResultReg) 270 .addReg(ADRPReg) 271 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | 272 AArch64II::MO_NC); 273 } else { 274 // ADRP + ADDX 275 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 276 ADRPReg).addGlobalAddress(GV, 0, AArch64II::MO_PAGE); 277 278 ResultReg = createResultReg(&AArch64::GPR64spRegClass); 279 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 280 ResultReg) 281 .addReg(ADRPReg) 282 .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC) 283 .addImm(0); 284 } 285 return ResultReg; 286 } 287 288 unsigned AArch64FastISel::TargetMaterializeConstant(const Constant *C) { 289 EVT CEVT = TLI.getValueType(C->getType(), true); 290 291 // Only handle simple types. 292 if (!CEVT.isSimple()) 293 return 0; 294 MVT VT = CEVT.getSimpleVT(); 295 296 // FIXME: Handle ConstantInt. 297 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 298 return AArch64MaterializeFP(CFP, VT); 299 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 300 return AArch64MaterializeGV(GV); 301 302 return 0; 303 } 304 305 // Computes the address to get to an object. 306 bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) { 307 const User *U = nullptr; 308 unsigned Opcode = Instruction::UserOp1; 309 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 310 // Don't walk into other basic blocks unless the object is an alloca from 311 // another block, otherwise it may not have a virtual register assigned. 312 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 313 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 314 Opcode = I->getOpcode(); 315 U = I; 316 } 317 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 318 Opcode = C->getOpcode(); 319 U = C; 320 } 321 322 if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType())) 323 if (Ty->getAddressSpace() > 255) 324 // Fast instruction selection doesn't support the special 325 // address spaces. 326 return false; 327 328 switch (Opcode) { 329 default: 330 break; 331 case Instruction::BitCast: { 332 // Look through bitcasts. 333 return ComputeAddress(U->getOperand(0), Addr); 334 } 335 case Instruction::IntToPtr: { 336 // Look past no-op inttoptrs. 337 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 338 return ComputeAddress(U->getOperand(0), Addr); 339 break; 340 } 341 case Instruction::PtrToInt: { 342 // Look past no-op ptrtoints. 343 if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) 344 return ComputeAddress(U->getOperand(0), Addr); 345 break; 346 } 347 case Instruction::GetElementPtr: { 348 Address SavedAddr = Addr; 349 uint64_t TmpOffset = Addr.getOffset(); 350 351 // Iterate through the GEP folding the constants into offsets where 352 // we can. 353 gep_type_iterator GTI = gep_type_begin(U); 354 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; 355 ++i, ++GTI) { 356 const Value *Op = *i; 357 if (StructType *STy = dyn_cast<StructType>(*GTI)) { 358 const StructLayout *SL = DL.getStructLayout(STy); 359 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 360 TmpOffset += SL->getElementOffset(Idx); 361 } else { 362 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); 363 for (;;) { 364 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 365 // Constant-offset addressing. 366 TmpOffset += CI->getSExtValue() * S; 367 break; 368 } 369 if (canFoldAddIntoGEP(U, Op)) { 370 // A compatible add with a constant operand. Fold the constant. 371 ConstantInt *CI = 372 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 373 TmpOffset += CI->getSExtValue() * S; 374 // Iterate on the other operand. 375 Op = cast<AddOperator>(Op)->getOperand(0); 376 continue; 377 } 378 // Unsupported 379 goto unsupported_gep; 380 } 381 } 382 } 383 384 // Try to grab the base operand now. 385 Addr.setOffset(TmpOffset); 386 if (ComputeAddress(U->getOperand(0), Addr)) 387 return true; 388 389 // We failed, restore everything and try the other options. 390 Addr = SavedAddr; 391 392 unsupported_gep: 393 break; 394 } 395 case Instruction::Alloca: { 396 const AllocaInst *AI = cast<AllocaInst>(Obj); 397 DenseMap<const AllocaInst *, int>::iterator SI = 398 FuncInfo.StaticAllocaMap.find(AI); 399 if (SI != FuncInfo.StaticAllocaMap.end()) { 400 Addr.setKind(Address::FrameIndexBase); 401 Addr.setFI(SI->second); 402 return true; 403 } 404 break; 405 } 406 } 407 408 // Try to get this in a register if nothing else has worked. 409 if (!Addr.isValid()) 410 Addr.setReg(getRegForValue(Obj)); 411 return Addr.isValid(); 412 } 413 414 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { 415 EVT evt = TLI.getValueType(Ty, true); 416 417 // Only handle simple types. 418 if (evt == MVT::Other || !evt.isSimple()) 419 return false; 420 VT = evt.getSimpleVT(); 421 422 // This is a legal type, but it's not something we handle in fast-isel. 423 if (VT == MVT::f128) 424 return false; 425 426 // Handle all other legal types, i.e. a register that will directly hold this 427 // value. 428 return TLI.isTypeLegal(VT); 429 } 430 431 bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) { 432 if (isTypeLegal(Ty, VT)) 433 return true; 434 435 // If this is a type than can be sign or zero-extended to a basic operation 436 // go ahead and accept it now. For stores, this reflects truncation. 437 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) 438 return true; 439 440 return false; 441 } 442 443 bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT, 444 int64_t ScaleFactor, bool UseUnscaled) { 445 bool needsLowering = false; 446 int64_t Offset = Addr.getOffset(); 447 switch (VT.SimpleTy) { 448 default: 449 return false; 450 case MVT::i1: 451 case MVT::i8: 452 case MVT::i16: 453 case MVT::i32: 454 case MVT::i64: 455 case MVT::f32: 456 case MVT::f64: 457 if (!UseUnscaled) 458 // Using scaled, 12-bit, unsigned immediate offsets. 459 needsLowering = ((Offset & 0xfff) != Offset); 460 else 461 // Using unscaled, 9-bit, signed immediate offsets. 462 needsLowering = (Offset > 256 || Offset < -256); 463 break; 464 } 465 466 //If this is a stack pointer and the offset needs to be simplified then put 467 // the alloca address into a register, set the base type back to register and 468 // continue. This should almost never happen. 469 if (needsLowering && Addr.getKind() == Address::FrameIndexBase) { 470 unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass); 471 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 472 ResultReg) 473 .addFrameIndex(Addr.getFI()) 474 .addImm(0) 475 .addImm(0); 476 Addr.setKind(Address::RegBase); 477 Addr.setReg(ResultReg); 478 } 479 480 // Since the offset is too large for the load/store instruction get the 481 // reg+offset into a register. 482 if (needsLowering) { 483 uint64_t UnscaledOffset = Addr.getOffset() * ScaleFactor; 484 unsigned ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(), false, 485 UnscaledOffset, MVT::i64); 486 if (ResultReg == 0) 487 return false; 488 Addr.setReg(ResultReg); 489 Addr.setOffset(0); 490 } 491 return true; 492 } 493 494 void AArch64FastISel::AddLoadStoreOperands(Address &Addr, 495 const MachineInstrBuilder &MIB, 496 unsigned Flags, bool UseUnscaled) { 497 int64_t Offset = Addr.getOffset(); 498 // Frame base works a bit differently. Handle it separately. 499 if (Addr.getKind() == Address::FrameIndexBase) { 500 int FI = Addr.getFI(); 501 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size 502 // and alignment should be based on the VT. 503 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( 504 MachinePointerInfo::getFixedStack(FI, Offset), Flags, 505 MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); 506 // Now add the rest of the operands. 507 MIB.addFrameIndex(FI).addImm(Offset).addMemOperand(MMO); 508 } else { 509 // Now add the rest of the operands. 510 MIB.addReg(Addr.getReg()); 511 MIB.addImm(Offset); 512 } 513 } 514 515 bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr, 516 bool UseUnscaled) { 517 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 518 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 519 if (!UseUnscaled && Addr.getOffset() < 0) 520 UseUnscaled = true; 521 522 unsigned Opc; 523 const TargetRegisterClass *RC; 524 bool VTIsi1 = false; 525 int64_t ScaleFactor = 0; 526 switch (VT.SimpleTy) { 527 default: 528 return false; 529 case MVT::i1: 530 VTIsi1 = true; 531 // Intentional fall-through. 532 case MVT::i8: 533 Opc = UseUnscaled ? AArch64::LDURBBi : AArch64::LDRBBui; 534 RC = &AArch64::GPR32RegClass; 535 ScaleFactor = 1; 536 break; 537 case MVT::i16: 538 Opc = UseUnscaled ? AArch64::LDURHHi : AArch64::LDRHHui; 539 RC = &AArch64::GPR32RegClass; 540 ScaleFactor = 2; 541 break; 542 case MVT::i32: 543 Opc = UseUnscaled ? AArch64::LDURWi : AArch64::LDRWui; 544 RC = &AArch64::GPR32RegClass; 545 ScaleFactor = 4; 546 break; 547 case MVT::i64: 548 Opc = UseUnscaled ? AArch64::LDURXi : AArch64::LDRXui; 549 RC = &AArch64::GPR64RegClass; 550 ScaleFactor = 8; 551 break; 552 case MVT::f32: 553 Opc = UseUnscaled ? AArch64::LDURSi : AArch64::LDRSui; 554 RC = TLI.getRegClassFor(VT); 555 ScaleFactor = 4; 556 break; 557 case MVT::f64: 558 Opc = UseUnscaled ? AArch64::LDURDi : AArch64::LDRDui; 559 RC = TLI.getRegClassFor(VT); 560 ScaleFactor = 8; 561 break; 562 } 563 // Scale the offset. 564 if (!UseUnscaled) { 565 int64_t Offset = Addr.getOffset(); 566 if (Offset & (ScaleFactor - 1)) 567 // Retry using an unscaled, 9-bit, signed immediate offset. 568 return EmitLoad(VT, ResultReg, Addr, /*UseUnscaled*/ true); 569 570 Addr.setOffset(Offset / ScaleFactor); 571 } 572 573 // Simplify this down to something we can handle. 574 if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled)) 575 return false; 576 577 // Create the base instruction, then add the operands. 578 ResultReg = createResultReg(RC); 579 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 580 TII.get(Opc), ResultReg); 581 AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, UseUnscaled); 582 583 // Loading an i1 requires special handling. 584 if (VTIsi1) { 585 MRI.constrainRegClass(ResultReg, &AArch64::GPR32RegClass); 586 unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass); 587 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri), 588 ANDReg) 589 .addReg(ResultReg) 590 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 591 ResultReg = ANDReg; 592 } 593 return true; 594 } 595 596 bool AArch64FastISel::SelectLoad(const Instruction *I) { 597 MVT VT; 598 // Verify we have a legal type before going any further. Currently, we handle 599 // simple types that will directly fit in a register (i32/f32/i64/f64) or 600 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 601 if (!isLoadStoreTypeLegal(I->getType(), VT) || cast<LoadInst>(I)->isAtomic()) 602 return false; 603 604 // See if we can handle this address. 605 Address Addr; 606 if (!ComputeAddress(I->getOperand(0), Addr)) 607 return false; 608 609 unsigned ResultReg; 610 if (!EmitLoad(VT, ResultReg, Addr)) 611 return false; 612 613 UpdateValueMap(I, ResultReg); 614 return true; 615 } 616 617 bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr, 618 bool UseUnscaled) { 619 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 620 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 621 if (!UseUnscaled && Addr.getOffset() < 0) 622 UseUnscaled = true; 623 624 unsigned StrOpc; 625 bool VTIsi1 = false; 626 int64_t ScaleFactor = 0; 627 // Using scaled, 12-bit, unsigned immediate offsets. 628 switch (VT.SimpleTy) { 629 default: 630 return false; 631 case MVT::i1: 632 VTIsi1 = true; 633 case MVT::i8: 634 StrOpc = UseUnscaled ? AArch64::STURBBi : AArch64::STRBBui; 635 ScaleFactor = 1; 636 break; 637 case MVT::i16: 638 StrOpc = UseUnscaled ? AArch64::STURHHi : AArch64::STRHHui; 639 ScaleFactor = 2; 640 break; 641 case MVT::i32: 642 StrOpc = UseUnscaled ? AArch64::STURWi : AArch64::STRWui; 643 ScaleFactor = 4; 644 break; 645 case MVT::i64: 646 StrOpc = UseUnscaled ? AArch64::STURXi : AArch64::STRXui; 647 ScaleFactor = 8; 648 break; 649 case MVT::f32: 650 StrOpc = UseUnscaled ? AArch64::STURSi : AArch64::STRSui; 651 ScaleFactor = 4; 652 break; 653 case MVT::f64: 654 StrOpc = UseUnscaled ? AArch64::STURDi : AArch64::STRDui; 655 ScaleFactor = 8; 656 break; 657 } 658 // Scale the offset. 659 if (!UseUnscaled) { 660 int64_t Offset = Addr.getOffset(); 661 if (Offset & (ScaleFactor - 1)) 662 // Retry using an unscaled, 9-bit, signed immediate offset. 663 return EmitStore(VT, SrcReg, Addr, /*UseUnscaled*/ true); 664 665 Addr.setOffset(Offset / ScaleFactor); 666 } 667 668 // Simplify this down to something we can handle. 669 if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled)) 670 return false; 671 672 // Storing an i1 requires special handling. 673 if (VTIsi1) { 674 MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass); 675 unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass); 676 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri), 677 ANDReg) 678 .addReg(SrcReg) 679 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 680 SrcReg = ANDReg; 681 } 682 // Create the base instruction, then add the operands. 683 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 684 TII.get(StrOpc)).addReg(SrcReg); 685 AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, UseUnscaled); 686 return true; 687 } 688 689 bool AArch64FastISel::SelectStore(const Instruction *I) { 690 MVT VT; 691 Value *Op0 = I->getOperand(0); 692 // Verify we have a legal type before going any further. Currently, we handle 693 // simple types that will directly fit in a register (i32/f32/i64/f64) or 694 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 695 if (!isLoadStoreTypeLegal(Op0->getType(), VT) || 696 cast<StoreInst>(I)->isAtomic()) 697 return false; 698 699 // Get the value to be stored into a register. 700 unsigned SrcReg = getRegForValue(Op0); 701 if (SrcReg == 0) 702 return false; 703 704 // See if we can handle this address. 705 Address Addr; 706 if (!ComputeAddress(I->getOperand(1), Addr)) 707 return false; 708 709 if (!EmitStore(VT, SrcReg, Addr)) 710 return false; 711 return true; 712 } 713 714 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { 715 switch (Pred) { 716 case CmpInst::FCMP_ONE: 717 case CmpInst::FCMP_UEQ: 718 default: 719 // AL is our "false" for now. The other two need more compares. 720 return AArch64CC::AL; 721 case CmpInst::ICMP_EQ: 722 case CmpInst::FCMP_OEQ: 723 return AArch64CC::EQ; 724 case CmpInst::ICMP_SGT: 725 case CmpInst::FCMP_OGT: 726 return AArch64CC::GT; 727 case CmpInst::ICMP_SGE: 728 case CmpInst::FCMP_OGE: 729 return AArch64CC::GE; 730 case CmpInst::ICMP_UGT: 731 case CmpInst::FCMP_UGT: 732 return AArch64CC::HI; 733 case CmpInst::FCMP_OLT: 734 return AArch64CC::MI; 735 case CmpInst::ICMP_ULE: 736 case CmpInst::FCMP_OLE: 737 return AArch64CC::LS; 738 case CmpInst::FCMP_ORD: 739 return AArch64CC::VC; 740 case CmpInst::FCMP_UNO: 741 return AArch64CC::VS; 742 case CmpInst::FCMP_UGE: 743 return AArch64CC::PL; 744 case CmpInst::ICMP_SLT: 745 case CmpInst::FCMP_ULT: 746 return AArch64CC::LT; 747 case CmpInst::ICMP_SLE: 748 case CmpInst::FCMP_ULE: 749 return AArch64CC::LE; 750 case CmpInst::FCMP_UNE: 751 case CmpInst::ICMP_NE: 752 return AArch64CC::NE; 753 case CmpInst::ICMP_UGE: 754 return AArch64CC::HS; 755 case CmpInst::ICMP_ULT: 756 return AArch64CC::LO; 757 } 758 } 759 760 bool AArch64FastISel::SelectBranch(const Instruction *I) { 761 const BranchInst *BI = cast<BranchInst>(I); 762 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 763 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 764 765 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 766 if (CI->hasOneUse() && (CI->getParent() == I->getParent())) { 767 // We may not handle every CC for now. 768 AArch64CC::CondCode CC = getCompareCC(CI->getPredicate()); 769 if (CC == AArch64CC::AL) 770 return false; 771 772 // Emit the cmp. 773 if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 774 return false; 775 776 // Emit the branch. 777 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 778 .addImm(CC) 779 .addMBB(TBB); 780 FuncInfo.MBB->addSuccessor(TBB); 781 782 FastEmitBranch(FBB, DbgLoc); 783 return true; 784 } 785 } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) { 786 MVT SrcVT; 787 if (TI->hasOneUse() && TI->getParent() == I->getParent() && 788 (isLoadStoreTypeLegal(TI->getOperand(0)->getType(), SrcVT))) { 789 unsigned CondReg = getRegForValue(TI->getOperand(0)); 790 if (CondReg == 0) 791 return false; 792 793 // Issue an extract_subreg to get the lower 32-bits. 794 if (SrcVT == MVT::i64) 795 CondReg = FastEmitInst_extractsubreg(MVT::i32, CondReg, /*Kill=*/true, 796 AArch64::sub_32); 797 798 MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass); 799 unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass); 800 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 801 TII.get(AArch64::ANDWri), ANDReg) 802 .addReg(CondReg) 803 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 804 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 805 TII.get(AArch64::SUBSWri)) 806 .addReg(ANDReg) 807 .addReg(ANDReg) 808 .addImm(0) 809 .addImm(0); 810 811 unsigned CC = AArch64CC::NE; 812 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 813 std::swap(TBB, FBB); 814 CC = AArch64CC::EQ; 815 } 816 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 817 .addImm(CC) 818 .addMBB(TBB); 819 FuncInfo.MBB->addSuccessor(TBB); 820 FastEmitBranch(FBB, DbgLoc); 821 return true; 822 } 823 } else if (const ConstantInt *CI = 824 dyn_cast<ConstantInt>(BI->getCondition())) { 825 uint64_t Imm = CI->getZExtValue(); 826 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; 827 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B)) 828 .addMBB(Target); 829 FuncInfo.MBB->addSuccessor(Target); 830 return true; 831 } 832 833 unsigned CondReg = getRegForValue(BI->getCondition()); 834 if (CondReg == 0) 835 return false; 836 837 // We've been divorced from our compare! Our block was split, and 838 // now our compare lives in a predecessor block. We musn't 839 // re-compare here, as the children of the compare aren't guaranteed 840 // live across the block boundary (we *could* check for this). 841 // Regardless, the compare has been done in the predecessor block, 842 // and it left a value for us in a virtual register. Ergo, we test 843 // the one-bit value left in the virtual register. 844 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri), 845 AArch64::WZR) 846 .addReg(CondReg) 847 .addImm(0) 848 .addImm(0); 849 850 unsigned CC = AArch64CC::NE; 851 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 852 std::swap(TBB, FBB); 853 CC = AArch64CC::EQ; 854 } 855 856 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 857 .addImm(CC) 858 .addMBB(TBB); 859 FuncInfo.MBB->addSuccessor(TBB); 860 FastEmitBranch(FBB, DbgLoc); 861 return true; 862 } 863 864 bool AArch64FastISel::SelectIndirectBr(const Instruction *I) { 865 const IndirectBrInst *BI = cast<IndirectBrInst>(I); 866 unsigned AddrReg = getRegForValue(BI->getOperand(0)); 867 if (AddrReg == 0) 868 return false; 869 870 // Emit the indirect branch. 871 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BR)) 872 .addReg(AddrReg); 873 874 // Make sure the CFG is up-to-date. 875 for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i) 876 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]); 877 878 return true; 879 } 880 881 bool AArch64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) { 882 Type *Ty = Src1Value->getType(); 883 EVT SrcEVT = TLI.getValueType(Ty, true); 884 if (!SrcEVT.isSimple()) 885 return false; 886 MVT SrcVT = SrcEVT.getSimpleVT(); 887 888 // Check to see if the 2nd operand is a constant that we can encode directly 889 // in the compare. 890 uint64_t Imm; 891 bool UseImm = false; 892 bool isNegativeImm = false; 893 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) { 894 if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 || 895 SrcVT == MVT::i8 || SrcVT == MVT::i1) { 896 const APInt &CIVal = ConstInt->getValue(); 897 898 Imm = (isZExt) ? CIVal.getZExtValue() : CIVal.getSExtValue(); 899 if (CIVal.isNegative()) { 900 isNegativeImm = true; 901 Imm = -Imm; 902 } 903 // FIXME: We can handle more immediates using shifts. 904 UseImm = ((Imm & 0xfff) == Imm); 905 } 906 } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) { 907 if (SrcVT == MVT::f32 || SrcVT == MVT::f64) 908 if (ConstFP->isZero() && !ConstFP->isNegative()) 909 UseImm = true; 910 } 911 912 unsigned ZReg; 913 unsigned CmpOpc; 914 bool isICmp = true; 915 bool needsExt = false; 916 switch (SrcVT.SimpleTy) { 917 default: 918 return false; 919 case MVT::i1: 920 case MVT::i8: 921 case MVT::i16: 922 needsExt = true; 923 // Intentional fall-through. 924 case MVT::i32: 925 ZReg = AArch64::WZR; 926 if (UseImm) 927 CmpOpc = isNegativeImm ? AArch64::ADDSWri : AArch64::SUBSWri; 928 else 929 CmpOpc = AArch64::SUBSWrr; 930 break; 931 case MVT::i64: 932 ZReg = AArch64::XZR; 933 if (UseImm) 934 CmpOpc = isNegativeImm ? AArch64::ADDSXri : AArch64::SUBSXri; 935 else 936 CmpOpc = AArch64::SUBSXrr; 937 break; 938 case MVT::f32: 939 isICmp = false; 940 CmpOpc = UseImm ? AArch64::FCMPSri : AArch64::FCMPSrr; 941 break; 942 case MVT::f64: 943 isICmp = false; 944 CmpOpc = UseImm ? AArch64::FCMPDri : AArch64::FCMPDrr; 945 break; 946 } 947 948 unsigned SrcReg1 = getRegForValue(Src1Value); 949 if (SrcReg1 == 0) 950 return false; 951 952 unsigned SrcReg2; 953 if (!UseImm) { 954 SrcReg2 = getRegForValue(Src2Value); 955 if (SrcReg2 == 0) 956 return false; 957 } 958 959 // We have i1, i8, or i16, we need to either zero extend or sign extend. 960 if (needsExt) { 961 SrcReg1 = EmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt); 962 if (SrcReg1 == 0) 963 return false; 964 if (!UseImm) { 965 SrcReg2 = EmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt); 966 if (SrcReg2 == 0) 967 return false; 968 } 969 } 970 971 if (isICmp) { 972 if (UseImm) 973 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) 974 .addReg(ZReg) 975 .addReg(SrcReg1) 976 .addImm(Imm) 977 .addImm(0); 978 else 979 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) 980 .addReg(ZReg) 981 .addReg(SrcReg1) 982 .addReg(SrcReg2); 983 } else { 984 if (UseImm) 985 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) 986 .addReg(SrcReg1); 987 else 988 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) 989 .addReg(SrcReg1) 990 .addReg(SrcReg2); 991 } 992 return true; 993 } 994 995 bool AArch64FastISel::SelectCmp(const Instruction *I) { 996 const CmpInst *CI = cast<CmpInst>(I); 997 998 // We may not handle every CC for now. 999 AArch64CC::CondCode CC = getCompareCC(CI->getPredicate()); 1000 if (CC == AArch64CC::AL) 1001 return false; 1002 1003 // Emit the cmp. 1004 if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 1005 return false; 1006 1007 // Now set a register based on the comparison. 1008 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC); 1009 unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass); 1010 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 1011 ResultReg) 1012 .addReg(AArch64::WZR) 1013 .addReg(AArch64::WZR) 1014 .addImm(invertedCC); 1015 1016 UpdateValueMap(I, ResultReg); 1017 return true; 1018 } 1019 1020 bool AArch64FastISel::SelectSelect(const Instruction *I) { 1021 const SelectInst *SI = cast<SelectInst>(I); 1022 1023 EVT DestEVT = TLI.getValueType(SI->getType(), true); 1024 if (!DestEVT.isSimple()) 1025 return false; 1026 1027 MVT DestVT = DestEVT.getSimpleVT(); 1028 if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 && 1029 DestVT != MVT::f64) 1030 return false; 1031 1032 unsigned CondReg = getRegForValue(SI->getCondition()); 1033 if (CondReg == 0) 1034 return false; 1035 unsigned TrueReg = getRegForValue(SI->getTrueValue()); 1036 if (TrueReg == 0) 1037 return false; 1038 unsigned FalseReg = getRegForValue(SI->getFalseValue()); 1039 if (FalseReg == 0) 1040 return false; 1041 1042 1043 MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass); 1044 unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass); 1045 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri), 1046 ANDReg) 1047 .addReg(CondReg) 1048 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 1049 1050 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri)) 1051 .addReg(ANDReg) 1052 .addReg(ANDReg) 1053 .addImm(0) 1054 .addImm(0); 1055 1056 unsigned SelectOpc; 1057 switch (DestVT.SimpleTy) { 1058 default: 1059 return false; 1060 case MVT::i32: 1061 SelectOpc = AArch64::CSELWr; 1062 break; 1063 case MVT::i64: 1064 SelectOpc = AArch64::CSELXr; 1065 break; 1066 case MVT::f32: 1067 SelectOpc = AArch64::FCSELSrrr; 1068 break; 1069 case MVT::f64: 1070 SelectOpc = AArch64::FCSELDrrr; 1071 break; 1072 } 1073 1074 unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT)); 1075 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SelectOpc), 1076 ResultReg) 1077 .addReg(TrueReg) 1078 .addReg(FalseReg) 1079 .addImm(AArch64CC::NE); 1080 1081 UpdateValueMap(I, ResultReg); 1082 return true; 1083 } 1084 1085 bool AArch64FastISel::SelectFPExt(const Instruction *I) { 1086 Value *V = I->getOperand(0); 1087 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) 1088 return false; 1089 1090 unsigned Op = getRegForValue(V); 1091 if (Op == 0) 1092 return false; 1093 1094 unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass); 1095 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr), 1096 ResultReg).addReg(Op); 1097 UpdateValueMap(I, ResultReg); 1098 return true; 1099 } 1100 1101 bool AArch64FastISel::SelectFPTrunc(const Instruction *I) { 1102 Value *V = I->getOperand(0); 1103 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) 1104 return false; 1105 1106 unsigned Op = getRegForValue(V); 1107 if (Op == 0) 1108 return false; 1109 1110 unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass); 1111 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr), 1112 ResultReg).addReg(Op); 1113 UpdateValueMap(I, ResultReg); 1114 return true; 1115 } 1116 1117 // FPToUI and FPToSI 1118 bool AArch64FastISel::SelectFPToInt(const Instruction *I, bool Signed) { 1119 MVT DestVT; 1120 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 1121 return false; 1122 1123 unsigned SrcReg = getRegForValue(I->getOperand(0)); 1124 if (SrcReg == 0) 1125 return false; 1126 1127 EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true); 1128 if (SrcVT == MVT::f128) 1129 return false; 1130 1131 unsigned Opc; 1132 if (SrcVT == MVT::f64) { 1133 if (Signed) 1134 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; 1135 else 1136 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr; 1137 } else { 1138 if (Signed) 1139 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; 1140 else 1141 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr; 1142 } 1143 unsigned ResultReg = createResultReg( 1144 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); 1145 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 1146 .addReg(SrcReg); 1147 UpdateValueMap(I, ResultReg); 1148 return true; 1149 } 1150 1151 bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) { 1152 MVT DestVT; 1153 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 1154 return false; 1155 assert ((DestVT == MVT::f32 || DestVT == MVT::f64) && 1156 "Unexpected value type."); 1157 1158 unsigned SrcReg = getRegForValue(I->getOperand(0)); 1159 if (SrcReg == 0) 1160 return false; 1161 1162 EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true); 1163 1164 // Handle sign-extension. 1165 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { 1166 SrcReg = 1167 EmitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); 1168 if (SrcReg == 0) 1169 return false; 1170 } 1171 1172 MRI.constrainRegClass(SrcReg, SrcVT == MVT::i64 ? &AArch64::GPR64RegClass 1173 : &AArch64::GPR32RegClass); 1174 1175 unsigned Opc; 1176 if (SrcVT == MVT::i64) { 1177 if (Signed) 1178 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; 1179 else 1180 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri; 1181 } else { 1182 if (Signed) 1183 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri; 1184 else 1185 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri; 1186 } 1187 1188 unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT)); 1189 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 1190 .addReg(SrcReg); 1191 UpdateValueMap(I, ResultReg); 1192 return true; 1193 } 1194 1195 bool AArch64FastISel::ProcessCallArgs( 1196 SmallVectorImpl<Value *> &Args, SmallVectorImpl<unsigned> &ArgRegs, 1197 SmallVectorImpl<MVT> &ArgVTs, SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, 1198 SmallVectorImpl<unsigned> &RegArgs, CallingConv::ID CC, 1199 unsigned &NumBytes) { 1200 SmallVector<CCValAssign, 16> ArgLocs; 1201 CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context); 1202 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC)); 1203 1204 // Get a count of how many bytes are to be pushed on the stack. 1205 NumBytes = CCInfo.getNextStackOffset(); 1206 1207 // Issue CALLSEQ_START 1208 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 1209 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) 1210 .addImm(NumBytes); 1211 1212 // Process the args. 1213 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1214 CCValAssign &VA = ArgLocs[i]; 1215 unsigned Arg = ArgRegs[VA.getValNo()]; 1216 MVT ArgVT = ArgVTs[VA.getValNo()]; 1217 1218 // Handle arg promotion: SExt, ZExt, AExt. 1219 switch (VA.getLocInfo()) { 1220 case CCValAssign::Full: 1221 break; 1222 case CCValAssign::SExt: { 1223 MVT DestVT = VA.getLocVT(); 1224 MVT SrcVT = ArgVT; 1225 Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ false); 1226 if (Arg == 0) 1227 return false; 1228 break; 1229 } 1230 case CCValAssign::AExt: 1231 // Intentional fall-through. 1232 case CCValAssign::ZExt: { 1233 MVT DestVT = VA.getLocVT(); 1234 MVT SrcVT = ArgVT; 1235 Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ true); 1236 if (Arg == 0) 1237 return false; 1238 break; 1239 } 1240 default: 1241 llvm_unreachable("Unknown arg promotion!"); 1242 } 1243 1244 // Now copy/store arg to correct locations. 1245 if (VA.isRegLoc() && !VA.needsCustom()) { 1246 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1247 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg); 1248 RegArgs.push_back(VA.getLocReg()); 1249 } else if (VA.needsCustom()) { 1250 // FIXME: Handle custom args. 1251 return false; 1252 } else { 1253 assert(VA.isMemLoc() && "Assuming store on stack."); 1254 1255 // Need to store on the stack. 1256 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; 1257 1258 unsigned BEAlign = 0; 1259 if (ArgSize < 8 && !Subtarget->isLittleEndian()) 1260 BEAlign = 8 - ArgSize; 1261 1262 Address Addr; 1263 Addr.setKind(Address::RegBase); 1264 Addr.setReg(AArch64::SP); 1265 Addr.setOffset(VA.getLocMemOffset() + BEAlign); 1266 1267 if (!EmitStore(ArgVT, Arg, Addr)) 1268 return false; 1269 } 1270 } 1271 return true; 1272 } 1273 1274 bool AArch64FastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, 1275 const Instruction *I, CallingConv::ID CC, 1276 unsigned &NumBytes) { 1277 // Issue CALLSEQ_END 1278 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 1279 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) 1280 .addImm(NumBytes) 1281 .addImm(0); 1282 1283 // Now the return value. 1284 if (RetVT != MVT::isVoid) { 1285 SmallVector<CCValAssign, 16> RVLocs; 1286 CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context); 1287 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC)); 1288 1289 // Only handle a single return value. 1290 if (RVLocs.size() != 1) 1291 return false; 1292 1293 // Copy all of the result registers out of their specified physreg. 1294 MVT CopyVT = RVLocs[0].getValVT(); 1295 unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); 1296 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1297 TII.get(TargetOpcode::COPY), 1298 ResultReg).addReg(RVLocs[0].getLocReg()); 1299 UsedRegs.push_back(RVLocs[0].getLocReg()); 1300 1301 // Finally update the result. 1302 UpdateValueMap(I, ResultReg); 1303 } 1304 1305 return true; 1306 } 1307 1308 bool AArch64FastISel::SelectCall(const Instruction *I, 1309 const char *IntrMemName = nullptr) { 1310 const CallInst *CI = cast<CallInst>(I); 1311 const Value *Callee = CI->getCalledValue(); 1312 1313 // Don't handle inline asm or intrinsics. 1314 if (isa<InlineAsm>(Callee)) 1315 return false; 1316 1317 // Only handle global variable Callees. 1318 const GlobalValue *GV = dyn_cast<GlobalValue>(Callee); 1319 if (!GV) 1320 return false; 1321 1322 // Check the calling convention. 1323 ImmutableCallSite CS(CI); 1324 CallingConv::ID CC = CS.getCallingConv(); 1325 1326 // Let SDISel handle vararg functions. 1327 PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); 1328 FunctionType *FTy = cast<FunctionType>(PT->getElementType()); 1329 if (FTy->isVarArg()) 1330 return false; 1331 1332 // Handle *simple* calls for now. 1333 MVT RetVT; 1334 Type *RetTy = I->getType(); 1335 if (RetTy->isVoidTy()) 1336 RetVT = MVT::isVoid; 1337 else if (!isTypeLegal(RetTy, RetVT)) 1338 return false; 1339 1340 // Set up the argument vectors. 1341 SmallVector<Value *, 8> Args; 1342 SmallVector<unsigned, 8> ArgRegs; 1343 SmallVector<MVT, 8> ArgVTs; 1344 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 1345 Args.reserve(CS.arg_size()); 1346 ArgRegs.reserve(CS.arg_size()); 1347 ArgVTs.reserve(CS.arg_size()); 1348 ArgFlags.reserve(CS.arg_size()); 1349 1350 for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); 1351 i != e; ++i) { 1352 // If we're lowering a memory intrinsic instead of a regular call, skip the 1353 // last two arguments, which shouldn't be passed to the underlying function. 1354 if (IntrMemName && e - i <= 2) 1355 break; 1356 1357 unsigned Arg = getRegForValue(*i); 1358 if (Arg == 0) 1359 return false; 1360 1361 ISD::ArgFlagsTy Flags; 1362 unsigned AttrInd = i - CS.arg_begin() + 1; 1363 if (CS.paramHasAttr(AttrInd, Attribute::SExt)) 1364 Flags.setSExt(); 1365 if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) 1366 Flags.setZExt(); 1367 1368 // FIXME: Only handle *easy* calls for now. 1369 if (CS.paramHasAttr(AttrInd, Attribute::InReg) || 1370 CS.paramHasAttr(AttrInd, Attribute::StructRet) || 1371 CS.paramHasAttr(AttrInd, Attribute::Nest) || 1372 CS.paramHasAttr(AttrInd, Attribute::ByVal)) 1373 return false; 1374 1375 MVT ArgVT; 1376 Type *ArgTy = (*i)->getType(); 1377 if (!isTypeLegal(ArgTy, ArgVT) && 1378 !(ArgVT == MVT::i1 || ArgVT == MVT::i8 || ArgVT == MVT::i16)) 1379 return false; 1380 1381 // We don't handle vector parameters yet. 1382 if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64) 1383 return false; 1384 1385 unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); 1386 Flags.setOrigAlign(OriginalAlignment); 1387 1388 Args.push_back(*i); 1389 ArgRegs.push_back(Arg); 1390 ArgVTs.push_back(ArgVT); 1391 ArgFlags.push_back(Flags); 1392 } 1393 1394 // Handle the arguments now that we've gotten them. 1395 SmallVector<unsigned, 4> RegArgs; 1396 unsigned NumBytes; 1397 if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) 1398 return false; 1399 1400 // Issue the call. 1401 MachineInstrBuilder MIB; 1402 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BL)); 1403 if (!IntrMemName) 1404 MIB.addGlobalAddress(GV, 0, 0); 1405 else 1406 MIB.addExternalSymbol(IntrMemName, 0); 1407 1408 // Add implicit physical register uses to the call. 1409 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 1410 MIB.addReg(RegArgs[i], RegState::Implicit); 1411 1412 // Add a register mask with the call-preserved registers. 1413 // Proper defs for return values will be added by setPhysRegsDeadExcept(). 1414 MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv())); 1415 1416 // Finish off the call including any return values. 1417 SmallVector<unsigned, 4> UsedRegs; 1418 if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) 1419 return false; 1420 1421 // Set all unused physreg defs as dead. 1422 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); 1423 1424 return true; 1425 } 1426 1427 bool AArch64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) { 1428 if (Alignment) 1429 return Len / Alignment <= 4; 1430 else 1431 return Len < 32; 1432 } 1433 1434 bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src, 1435 uint64_t Len, unsigned Alignment) { 1436 // Make sure we don't bloat code by inlining very large memcpy's. 1437 if (!IsMemCpySmall(Len, Alignment)) 1438 return false; 1439 1440 int64_t UnscaledOffset = 0; 1441 Address OrigDest = Dest; 1442 Address OrigSrc = Src; 1443 1444 while (Len) { 1445 MVT VT; 1446 if (!Alignment || Alignment >= 8) { 1447 if (Len >= 8) 1448 VT = MVT::i64; 1449 else if (Len >= 4) 1450 VT = MVT::i32; 1451 else if (Len >= 2) 1452 VT = MVT::i16; 1453 else { 1454 VT = MVT::i8; 1455 } 1456 } else { 1457 // Bound based on alignment. 1458 if (Len >= 4 && Alignment == 4) 1459 VT = MVT::i32; 1460 else if (Len >= 2 && Alignment == 2) 1461 VT = MVT::i16; 1462 else { 1463 VT = MVT::i8; 1464 } 1465 } 1466 1467 bool RV; 1468 unsigned ResultReg; 1469 RV = EmitLoad(VT, ResultReg, Src); 1470 if (!RV) 1471 return false; 1472 1473 RV = EmitStore(VT, ResultReg, Dest); 1474 if (!RV) 1475 return false; 1476 1477 int64_t Size = VT.getSizeInBits() / 8; 1478 Len -= Size; 1479 UnscaledOffset += Size; 1480 1481 // We need to recompute the unscaled offset for each iteration. 1482 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); 1483 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); 1484 } 1485 1486 return true; 1487 } 1488 1489 bool AArch64FastISel::SelectIntrinsicCall(const IntrinsicInst &I) { 1490 // FIXME: Handle more intrinsics. 1491 switch (I.getIntrinsicID()) { 1492 default: 1493 return false; 1494 case Intrinsic::memcpy: 1495 case Intrinsic::memmove: { 1496 const MemTransferInst &MTI = cast<MemTransferInst>(I); 1497 // Don't handle volatile. 1498 if (MTI.isVolatile()) 1499 return false; 1500 1501 // Disable inlining for memmove before calls to ComputeAddress. Otherwise, 1502 // we would emit dead code because we don't currently handle memmoves. 1503 bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy); 1504 if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) { 1505 // Small memcpy's are common enough that we want to do them without a call 1506 // if possible. 1507 uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue(); 1508 unsigned Alignment = MTI.getAlignment(); 1509 if (IsMemCpySmall(Len, Alignment)) { 1510 Address Dest, Src; 1511 if (!ComputeAddress(MTI.getRawDest(), Dest) || 1512 !ComputeAddress(MTI.getRawSource(), Src)) 1513 return false; 1514 if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment)) 1515 return true; 1516 } 1517 } 1518 1519 if (!MTI.getLength()->getType()->isIntegerTy(64)) 1520 return false; 1521 1522 if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255) 1523 // Fast instruction selection doesn't support the special 1524 // address spaces. 1525 return false; 1526 1527 const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove"; 1528 return SelectCall(&I, IntrMemName); 1529 } 1530 case Intrinsic::memset: { 1531 const MemSetInst &MSI = cast<MemSetInst>(I); 1532 // Don't handle volatile. 1533 if (MSI.isVolatile()) 1534 return false; 1535 1536 if (!MSI.getLength()->getType()->isIntegerTy(64)) 1537 return false; 1538 1539 if (MSI.getDestAddressSpace() > 255) 1540 // Fast instruction selection doesn't support the special 1541 // address spaces. 1542 return false; 1543 1544 return SelectCall(&I, "memset"); 1545 } 1546 case Intrinsic::trap: { 1547 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) 1548 .addImm(1); 1549 return true; 1550 } 1551 } 1552 return false; 1553 } 1554 1555 bool AArch64FastISel::SelectRet(const Instruction *I) { 1556 const ReturnInst *Ret = cast<ReturnInst>(I); 1557 const Function &F = *I->getParent()->getParent(); 1558 1559 if (!FuncInfo.CanLowerReturn) 1560 return false; 1561 1562 if (F.isVarArg()) 1563 return false; 1564 1565 // Build a list of return value registers. 1566 SmallVector<unsigned, 4> RetRegs; 1567 1568 if (Ret->getNumOperands() > 0) { 1569 CallingConv::ID CC = F.getCallingConv(); 1570 SmallVector<ISD::OutputArg, 4> Outs; 1571 GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); 1572 1573 // Analyze operands of the call, assigning locations to each operand. 1574 SmallVector<CCValAssign, 16> ValLocs; 1575 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, 1576 I->getContext()); 1577 CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS 1578 : RetCC_AArch64_AAPCS; 1579 CCInfo.AnalyzeReturn(Outs, RetCC); 1580 1581 // Only handle a single return value for now. 1582 if (ValLocs.size() != 1) 1583 return false; 1584 1585 CCValAssign &VA = ValLocs[0]; 1586 const Value *RV = Ret->getOperand(0); 1587 1588 // Don't bother handling odd stuff for now. 1589 if (VA.getLocInfo() != CCValAssign::Full) 1590 return false; 1591 // Only handle register returns for now. 1592 if (!VA.isRegLoc()) 1593 return false; 1594 unsigned Reg = getRegForValue(RV); 1595 if (Reg == 0) 1596 return false; 1597 1598 unsigned SrcReg = Reg + VA.getValNo(); 1599 unsigned DestReg = VA.getLocReg(); 1600 // Avoid a cross-class copy. This is very unlikely. 1601 if (!MRI.getRegClass(SrcReg)->contains(DestReg)) 1602 return false; 1603 1604 EVT RVEVT = TLI.getValueType(RV->getType()); 1605 if (!RVEVT.isSimple()) 1606 return false; 1607 1608 // Vectors (of > 1 lane) in big endian need tricky handling. 1609 if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1) 1610 return false; 1611 1612 MVT RVVT = RVEVT.getSimpleVT(); 1613 if (RVVT == MVT::f128) 1614 return false; 1615 MVT DestVT = VA.getValVT(); 1616 // Special handling for extended integers. 1617 if (RVVT != DestVT) { 1618 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) 1619 return false; 1620 1621 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) 1622 return false; 1623 1624 bool isZExt = Outs[0].Flags.isZExt(); 1625 SrcReg = EmitIntExt(RVVT, SrcReg, DestVT, isZExt); 1626 if (SrcReg == 0) 1627 return false; 1628 } 1629 1630 // Make the copy. 1631 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1632 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); 1633 1634 // Add register to return instruction. 1635 RetRegs.push_back(VA.getLocReg()); 1636 } 1637 1638 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1639 TII.get(AArch64::RET_ReallyLR)); 1640 for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) 1641 MIB.addReg(RetRegs[i], RegState::Implicit); 1642 return true; 1643 } 1644 1645 bool AArch64FastISel::SelectTrunc(const Instruction *I) { 1646 Type *DestTy = I->getType(); 1647 Value *Op = I->getOperand(0); 1648 Type *SrcTy = Op->getType(); 1649 1650 EVT SrcEVT = TLI.getValueType(SrcTy, true); 1651 EVT DestEVT = TLI.getValueType(DestTy, true); 1652 if (!SrcEVT.isSimple()) 1653 return false; 1654 if (!DestEVT.isSimple()) 1655 return false; 1656 1657 MVT SrcVT = SrcEVT.getSimpleVT(); 1658 MVT DestVT = DestEVT.getSimpleVT(); 1659 1660 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && 1661 SrcVT != MVT::i8) 1662 return false; 1663 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && 1664 DestVT != MVT::i1) 1665 return false; 1666 1667 unsigned SrcReg = getRegForValue(Op); 1668 if (!SrcReg) 1669 return false; 1670 1671 // If we're truncating from i64 to a smaller non-legal type then generate an 1672 // AND. Otherwise, we know the high bits are undefined and a truncate doesn't 1673 // generate any code. 1674 if (SrcVT == MVT::i64) { 1675 uint64_t Mask = 0; 1676 switch (DestVT.SimpleTy) { 1677 default: 1678 // Trunc i64 to i32 is handled by the target-independent fast-isel. 1679 return false; 1680 case MVT::i1: 1681 Mask = 0x1; 1682 break; 1683 case MVT::i8: 1684 Mask = 0xff; 1685 break; 1686 case MVT::i16: 1687 Mask = 0xffff; 1688 break; 1689 } 1690 // Issue an extract_subreg to get the lower 32-bits. 1691 unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, /*Kill=*/true, 1692 AArch64::sub_32); 1693 MRI.constrainRegClass(Reg32, &AArch64::GPR32RegClass); 1694 // Create the AND instruction which performs the actual truncation. 1695 unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass); 1696 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri), 1697 ANDReg) 1698 .addReg(Reg32) 1699 .addImm(AArch64_AM::encodeLogicalImmediate(Mask, 32)); 1700 SrcReg = ANDReg; 1701 } 1702 1703 UpdateValueMap(I, SrcReg); 1704 return true; 1705 } 1706 1707 unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) { 1708 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || 1709 DestVT == MVT::i64) && 1710 "Unexpected value type."); 1711 // Handle i8 and i16 as i32. 1712 if (DestVT == MVT::i8 || DestVT == MVT::i16) 1713 DestVT = MVT::i32; 1714 1715 if (isZExt) { 1716 MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass); 1717 unsigned ResultReg = createResultReg(&AArch64::GPR32spRegClass); 1718 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri), 1719 ResultReg) 1720 .addReg(SrcReg) 1721 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 1722 1723 if (DestVT == MVT::i64) { 1724 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the 1725 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. 1726 unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 1727 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1728 TII.get(AArch64::SUBREG_TO_REG), Reg64) 1729 .addImm(0) 1730 .addReg(ResultReg) 1731 .addImm(AArch64::sub_32); 1732 ResultReg = Reg64; 1733 } 1734 return ResultReg; 1735 } else { 1736 if (DestVT == MVT::i64) { 1737 // FIXME: We're SExt i1 to i64. 1738 return 0; 1739 } 1740 unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass); 1741 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SBFMWri), 1742 ResultReg) 1743 .addReg(SrcReg) 1744 .addImm(0) 1745 .addImm(0); 1746 return ResultReg; 1747 } 1748 } 1749 1750 unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, 1751 bool isZExt) { 1752 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); 1753 1754 // FastISel does not have plumbing to deal with extensions where the SrcVT or 1755 // DestVT are odd things, so test to make sure that they are both types we can 1756 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise 1757 // bail out to SelectionDAG. 1758 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && 1759 (DestVT != MVT::i32) && (DestVT != MVT::i64)) || 1760 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && 1761 (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) 1762 return 0; 1763 1764 unsigned Opc; 1765 unsigned Imm = 0; 1766 1767 switch (SrcVT.SimpleTy) { 1768 default: 1769 return 0; 1770 case MVT::i1: 1771 return Emiti1Ext(SrcReg, DestVT, isZExt); 1772 case MVT::i8: 1773 if (DestVT == MVT::i64) 1774 Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 1775 else 1776 Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 1777 Imm = 7; 1778 break; 1779 case MVT::i16: 1780 if (DestVT == MVT::i64) 1781 Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 1782 else 1783 Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 1784 Imm = 15; 1785 break; 1786 case MVT::i32: 1787 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); 1788 Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 1789 Imm = 31; 1790 break; 1791 } 1792 1793 // Handle i8 and i16 as i32. 1794 if (DestVT == MVT::i8 || DestVT == MVT::i16) 1795 DestVT = MVT::i32; 1796 else if (DestVT == MVT::i64) { 1797 unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 1798 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1799 TII.get(AArch64::SUBREG_TO_REG), Src64) 1800 .addImm(0) 1801 .addReg(SrcReg) 1802 .addImm(AArch64::sub_32); 1803 SrcReg = Src64; 1804 } 1805 1806 unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT)); 1807 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 1808 .addReg(SrcReg) 1809 .addImm(0) 1810 .addImm(Imm); 1811 1812 return ResultReg; 1813 } 1814 1815 bool AArch64FastISel::SelectIntExt(const Instruction *I) { 1816 // On ARM, in general, integer casts don't involve legal types; this code 1817 // handles promotable integers. The high bits for a type smaller than 1818 // the register size are assumed to be undefined. 1819 Type *DestTy = I->getType(); 1820 Value *Src = I->getOperand(0); 1821 Type *SrcTy = Src->getType(); 1822 1823 bool isZExt = isa<ZExtInst>(I); 1824 unsigned SrcReg = getRegForValue(Src); 1825 if (!SrcReg) 1826 return false; 1827 1828 EVT SrcEVT = TLI.getValueType(SrcTy, true); 1829 EVT DestEVT = TLI.getValueType(DestTy, true); 1830 if (!SrcEVT.isSimple()) 1831 return false; 1832 if (!DestEVT.isSimple()) 1833 return false; 1834 1835 MVT SrcVT = SrcEVT.getSimpleVT(); 1836 MVT DestVT = DestEVT.getSimpleVT(); 1837 unsigned ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt); 1838 if (ResultReg == 0) 1839 return false; 1840 UpdateValueMap(I, ResultReg); 1841 return true; 1842 } 1843 1844 bool AArch64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) { 1845 EVT DestEVT = TLI.getValueType(I->getType(), true); 1846 if (!DestEVT.isSimple()) 1847 return false; 1848 1849 MVT DestVT = DestEVT.getSimpleVT(); 1850 if (DestVT != MVT::i64 && DestVT != MVT::i32) 1851 return false; 1852 1853 unsigned DivOpc; 1854 bool is64bit = (DestVT == MVT::i64); 1855 switch (ISDOpcode) { 1856 default: 1857 return false; 1858 case ISD::SREM: 1859 DivOpc = is64bit ? AArch64::SDIVXr : AArch64::SDIVWr; 1860 break; 1861 case ISD::UREM: 1862 DivOpc = is64bit ? AArch64::UDIVXr : AArch64::UDIVWr; 1863 break; 1864 } 1865 unsigned MSubOpc = is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr; 1866 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 1867 if (!Src0Reg) 1868 return false; 1869 1870 unsigned Src1Reg = getRegForValue(I->getOperand(1)); 1871 if (!Src1Reg) 1872 return false; 1873 1874 unsigned QuotReg = createResultReg(TLI.getRegClassFor(DestVT)); 1875 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(DivOpc), QuotReg) 1876 .addReg(Src0Reg) 1877 .addReg(Src1Reg); 1878 // The remainder is computed as numerator - (quotient * denominator) using the 1879 // MSUB instruction. 1880 unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT)); 1881 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MSubOpc), ResultReg) 1882 .addReg(QuotReg) 1883 .addReg(Src1Reg) 1884 .addReg(Src0Reg); 1885 UpdateValueMap(I, ResultReg); 1886 return true; 1887 } 1888 1889 bool AArch64FastISel::SelectMul(const Instruction *I) { 1890 EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true); 1891 if (!SrcEVT.isSimple()) 1892 return false; 1893 MVT SrcVT = SrcEVT.getSimpleVT(); 1894 1895 // Must be simple value type. Don't handle vectors. 1896 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && 1897 SrcVT != MVT::i8) 1898 return false; 1899 1900 unsigned Opc; 1901 unsigned ZReg; 1902 switch (SrcVT.SimpleTy) { 1903 default: 1904 return false; 1905 case MVT::i8: 1906 case MVT::i16: 1907 case MVT::i32: 1908 ZReg = AArch64::WZR; 1909 Opc = AArch64::MADDWrrr; 1910 SrcVT = MVT::i32; 1911 break; 1912 case MVT::i64: 1913 ZReg = AArch64::XZR; 1914 Opc = AArch64::MADDXrrr; 1915 break; 1916 } 1917 1918 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 1919 if (!Src0Reg) 1920 return false; 1921 1922 unsigned Src1Reg = getRegForValue(I->getOperand(1)); 1923 if (!Src1Reg) 1924 return false; 1925 1926 // Create the base instruction, then add the operands. 1927 unsigned ResultReg = createResultReg(TLI.getRegClassFor(SrcVT)); 1928 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 1929 .addReg(Src0Reg) 1930 .addReg(Src1Reg) 1931 .addReg(ZReg); 1932 UpdateValueMap(I, ResultReg); 1933 return true; 1934 } 1935 1936 bool AArch64FastISel::TargetSelectInstruction(const Instruction *I) { 1937 switch (I->getOpcode()) { 1938 default: 1939 break; 1940 case Instruction::Load: 1941 return SelectLoad(I); 1942 case Instruction::Store: 1943 return SelectStore(I); 1944 case Instruction::Br: 1945 return SelectBranch(I); 1946 case Instruction::IndirectBr: 1947 return SelectIndirectBr(I); 1948 case Instruction::FCmp: 1949 case Instruction::ICmp: 1950 return SelectCmp(I); 1951 case Instruction::Select: 1952 return SelectSelect(I); 1953 case Instruction::FPExt: 1954 return SelectFPExt(I); 1955 case Instruction::FPTrunc: 1956 return SelectFPTrunc(I); 1957 case Instruction::FPToSI: 1958 return SelectFPToInt(I, /*Signed=*/true); 1959 case Instruction::FPToUI: 1960 return SelectFPToInt(I, /*Signed=*/false); 1961 case Instruction::SIToFP: 1962 return SelectIntToFP(I, /*Signed=*/true); 1963 case Instruction::UIToFP: 1964 return SelectIntToFP(I, /*Signed=*/false); 1965 case Instruction::SRem: 1966 return SelectRem(I, ISD::SREM); 1967 case Instruction::URem: 1968 return SelectRem(I, ISD::UREM); 1969 case Instruction::Call: 1970 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) 1971 return SelectIntrinsicCall(*II); 1972 return SelectCall(I); 1973 case Instruction::Ret: 1974 return SelectRet(I); 1975 case Instruction::Trunc: 1976 return SelectTrunc(I); 1977 case Instruction::ZExt: 1978 case Instruction::SExt: 1979 return SelectIntExt(I); 1980 case Instruction::Mul: 1981 // FIXME: This really should be handled by the target-independent selector. 1982 return SelectMul(I); 1983 } 1984 return false; 1985 // Silence warnings. 1986 (void)&CC_AArch64_DarwinPCS_VarArg; 1987 } 1988 1989 namespace llvm { 1990 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &funcInfo, 1991 const TargetLibraryInfo *libInfo) { 1992 return new AArch64FastISel(funcInfo, libInfo); 1993 } 1994 } 1995