1 //===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the ARM-specific support for the FastISel class. Some 11 // of the target-specific code is generated by tablegen in the file 12 // ARMGenFastISel.inc, which is #included here. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "ARM.h" 17 #include "ARMBaseInstrInfo.h" 18 #include "ARMCallingConv.h" 19 #include "ARMConstantPoolValue.h" 20 #include "ARMSubtarget.h" 21 #include "ARMTargetMachine.h" 22 #include "MCTargetDesc/ARMAddressingModes.h" 23 #include "llvm/CodeGen/Analysis.h" 24 #include "llvm/CodeGen/FastISel.h" 25 #include "llvm/CodeGen/FunctionLoweringInfo.h" 26 #include "llvm/CodeGen/MachineConstantPool.h" 27 #include "llvm/CodeGen/MachineFrameInfo.h" 28 #include "llvm/CodeGen/MachineInstrBuilder.h" 29 #include "llvm/CodeGen/MachineMemOperand.h" 30 #include "llvm/CodeGen/MachineModuleInfo.h" 31 #include "llvm/CodeGen/MachineRegisterInfo.h" 32 #include "llvm/IR/CallingConv.h" 33 #include "llvm/IR/DataLayout.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/GlobalVariable.h" 36 #include "llvm/IR/Instructions.h" 37 #include "llvm/IR/IntrinsicInst.h" 38 #include "llvm/IR/Module.h" 39 #include "llvm/IR/Operator.h" 40 #include "llvm/Support/CallSite.h" 41 #include "llvm/Support/CommandLine.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/GetElementPtrTypeIterator.h" 44 #include "llvm/Target/TargetInstrInfo.h" 45 #include "llvm/Target/TargetLowering.h" 46 #include "llvm/Target/TargetMachine.h" 47 #include "llvm/Target/TargetOptions.h" 48 using namespace llvm; 49 50 extern cl::opt<bool> EnableARMLongCalls; 51 52 namespace { 53 54 // All possible address modes, plus some. 55 typedef struct Address { 56 enum { 57 RegBase, 58 FrameIndexBase 59 } BaseType; 60 61 union { 62 unsigned Reg; 63 int FI; 64 } Base; 65 66 int Offset; 67 68 // Innocuous defaults for our address. 69 Address() 70 : BaseType(RegBase), Offset(0) { 71 Base.Reg = 0; 72 } 73 } Address; 74 75 class ARMFastISel : public FastISel { 76 77 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 78 /// make the right decision when generating code for different targets. 79 const ARMSubtarget *Subtarget; 80 const TargetMachine &TM; 81 const TargetInstrInfo &TII; 82 const TargetLowering &TLI; 83 ARMFunctionInfo *AFI; 84 85 // Convenience variables to avoid some queries. 86 bool isThumb2; 87 LLVMContext *Context; 88 89 public: 90 explicit ARMFastISel(FunctionLoweringInfo &funcInfo, 91 const TargetLibraryInfo *libInfo) 92 : FastISel(funcInfo, libInfo), 93 TM(funcInfo.MF->getTarget()), 94 TII(*TM.getInstrInfo()), 95 TLI(*TM.getTargetLowering()) { 96 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 97 AFI = funcInfo.MF->getInfo<ARMFunctionInfo>(); 98 isThumb2 = AFI->isThumbFunction(); 99 Context = &funcInfo.Fn->getContext(); 100 } 101 102 // Code from FastISel.cpp. 103 private: 104 unsigned FastEmitInst_(unsigned MachineInstOpcode, 105 const TargetRegisterClass *RC); 106 unsigned FastEmitInst_r(unsigned MachineInstOpcode, 107 const TargetRegisterClass *RC, 108 unsigned Op0, bool Op0IsKill); 109 unsigned FastEmitInst_rr(unsigned MachineInstOpcode, 110 const TargetRegisterClass *RC, 111 unsigned Op0, bool Op0IsKill, 112 unsigned Op1, bool Op1IsKill); 113 unsigned FastEmitInst_rrr(unsigned MachineInstOpcode, 114 const TargetRegisterClass *RC, 115 unsigned Op0, bool Op0IsKill, 116 unsigned Op1, bool Op1IsKill, 117 unsigned Op2, bool Op2IsKill); 118 unsigned FastEmitInst_ri(unsigned MachineInstOpcode, 119 const TargetRegisterClass *RC, 120 unsigned Op0, bool Op0IsKill, 121 uint64_t Imm); 122 unsigned FastEmitInst_rf(unsigned MachineInstOpcode, 123 const TargetRegisterClass *RC, 124 unsigned Op0, bool Op0IsKill, 125 const ConstantFP *FPImm); 126 unsigned FastEmitInst_rri(unsigned MachineInstOpcode, 127 const TargetRegisterClass *RC, 128 unsigned Op0, bool Op0IsKill, 129 unsigned Op1, bool Op1IsKill, 130 uint64_t Imm); 131 unsigned FastEmitInst_i(unsigned MachineInstOpcode, 132 const TargetRegisterClass *RC, 133 uint64_t Imm); 134 unsigned FastEmitInst_ii(unsigned MachineInstOpcode, 135 const TargetRegisterClass *RC, 136 uint64_t Imm1, uint64_t Imm2); 137 138 unsigned FastEmitInst_extractsubreg(MVT RetVT, 139 unsigned Op0, bool Op0IsKill, 140 uint32_t Idx); 141 142 // Backend specific FastISel code. 143 private: 144 virtual bool TargetSelectInstruction(const Instruction *I); 145 virtual unsigned TargetMaterializeConstant(const Constant *C); 146 virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); 147 virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo, 148 const LoadInst *LI); 149 virtual bool FastLowerArguments(); 150 private: 151 #include "ARMGenFastISel.inc" 152 153 // Instruction selection routines. 154 private: 155 bool SelectLoad(const Instruction *I); 156 bool SelectStore(const Instruction *I); 157 bool SelectBranch(const Instruction *I); 158 bool SelectIndirectBr(const Instruction *I); 159 bool SelectCmp(const Instruction *I); 160 bool SelectFPExt(const Instruction *I); 161 bool SelectFPTrunc(const Instruction *I); 162 bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode); 163 bool SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode); 164 bool SelectIToFP(const Instruction *I, bool isSigned); 165 bool SelectFPToI(const Instruction *I, bool isSigned); 166 bool SelectDiv(const Instruction *I, bool isSigned); 167 bool SelectRem(const Instruction *I, bool isSigned); 168 bool SelectCall(const Instruction *I, const char *IntrMemName); 169 bool SelectIntrinsicCall(const IntrinsicInst &I); 170 bool SelectSelect(const Instruction *I); 171 bool SelectRet(const Instruction *I); 172 bool SelectTrunc(const Instruction *I); 173 bool SelectIntExt(const Instruction *I); 174 bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy); 175 176 // Utility routines. 177 private: 178 bool isTypeLegal(Type *Ty, MVT &VT); 179 bool isLoadTypeLegal(Type *Ty, MVT &VT); 180 bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, 181 bool isZExt); 182 bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, 183 unsigned Alignment = 0, bool isZExt = true, 184 bool allocReg = true); 185 bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, 186 unsigned Alignment = 0); 187 bool ARMComputeAddress(const Value *Obj, Address &Addr); 188 void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3); 189 bool ARMIsMemCpySmall(uint64_t Len); 190 bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, 191 unsigned Alignment); 192 unsigned ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); 193 unsigned ARMMaterializeFP(const ConstantFP *CFP, MVT VT); 194 unsigned ARMMaterializeInt(const Constant *C, MVT VT); 195 unsigned ARMMaterializeGV(const GlobalValue *GV, MVT VT); 196 unsigned ARMMoveToFPReg(MVT VT, unsigned SrcReg); 197 unsigned ARMMoveToIntReg(MVT VT, unsigned SrcReg); 198 unsigned ARMSelectCallOp(bool UseReg); 199 unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT); 200 201 // Call handling routines. 202 private: 203 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, 204 bool Return, 205 bool isVarArg); 206 bool ProcessCallArgs(SmallVectorImpl<Value*> &Args, 207 SmallVectorImpl<unsigned> &ArgRegs, 208 SmallVectorImpl<MVT> &ArgVTs, 209 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, 210 SmallVectorImpl<unsigned> &RegArgs, 211 CallingConv::ID CC, 212 unsigned &NumBytes, 213 bool isVarArg); 214 unsigned getLibcallReg(const Twine &Name); 215 bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, 216 const Instruction *I, CallingConv::ID CC, 217 unsigned &NumBytes, bool isVarArg); 218 bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call); 219 220 // OptionalDef handling routines. 221 private: 222 bool isARMNEONPred(const MachineInstr *MI); 223 bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR); 224 const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); 225 void AddLoadStoreOperands(MVT VT, Address &Addr, 226 const MachineInstrBuilder &MIB, 227 unsigned Flags, bool useAM3); 228 }; 229 230 } // end anonymous namespace 231 232 #include "ARMGenCallingConv.inc" 233 234 // DefinesOptionalPredicate - This is different from DefinesPredicate in that 235 // we don't care about implicit defs here, just places we'll need to add a 236 // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR. 237 bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { 238 if (!MI->hasOptionalDef()) 239 return false; 240 241 // Look to see if our OptionalDef is defining CPSR or CCR. 242 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 243 const MachineOperand &MO = MI->getOperand(i); 244 if (!MO.isReg() || !MO.isDef()) continue; 245 if (MO.getReg() == ARM::CPSR) 246 *CPSR = true; 247 } 248 return true; 249 } 250 251 bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) { 252 const MCInstrDesc &MCID = MI->getDesc(); 253 254 // If we're a thumb2 or not NEON function we were handled via isPredicable. 255 if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON || 256 AFI->isThumb2Function()) 257 return false; 258 259 for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) 260 if (MCID.OpInfo[i].isPredicate()) 261 return true; 262 263 return false; 264 } 265 266 // If the machine is predicable go ahead and add the predicate operands, if 267 // it needs default CC operands add those. 268 // TODO: If we want to support thumb1 then we'll need to deal with optional 269 // CPSR defs that need to be added before the remaining operands. See s_cc_out 270 // for descriptions why. 271 const MachineInstrBuilder & 272 ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { 273 MachineInstr *MI = &*MIB; 274 275 // Do we use a predicate? or... 276 // Are we NEON in ARM mode and have a predicate operand? If so, I know 277 // we're not predicable but add it anyways. 278 if (TII.isPredicable(MI) || isARMNEONPred(MI)) 279 AddDefaultPred(MIB); 280 281 // Do we optionally set a predicate? Preds is size > 0 iff the predicate 282 // defines CPSR. All other OptionalDefines in ARM are the CCR register. 283 bool CPSR = false; 284 if (DefinesOptionalPredicate(MI, &CPSR)) { 285 if (CPSR) 286 AddDefaultT1CC(MIB); 287 else 288 AddDefaultCC(MIB); 289 } 290 return MIB; 291 } 292 293 unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode, 294 const TargetRegisterClass* RC) { 295 unsigned ResultReg = createResultReg(RC); 296 const MCInstrDesc &II = TII.get(MachineInstOpcode); 297 298 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)); 299 return ResultReg; 300 } 301 302 unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, 303 const TargetRegisterClass *RC, 304 unsigned Op0, bool Op0IsKill) { 305 unsigned ResultReg = createResultReg(RC); 306 const MCInstrDesc &II = TII.get(MachineInstOpcode); 307 308 if (II.getNumDefs() >= 1) { 309 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 310 .addReg(Op0, Op0IsKill * RegState::Kill)); 311 } else { 312 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 313 .addReg(Op0, Op0IsKill * RegState::Kill)); 314 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 315 TII.get(TargetOpcode::COPY), ResultReg) 316 .addReg(II.ImplicitDefs[0])); 317 } 318 return ResultReg; 319 } 320 321 unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, 322 const TargetRegisterClass *RC, 323 unsigned Op0, bool Op0IsKill, 324 unsigned Op1, bool Op1IsKill) { 325 unsigned ResultReg = createResultReg(RC); 326 const MCInstrDesc &II = TII.get(MachineInstOpcode); 327 328 if (II.getNumDefs() >= 1) { 329 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 330 .addReg(Op0, Op0IsKill * RegState::Kill) 331 .addReg(Op1, Op1IsKill * RegState::Kill)); 332 } else { 333 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 334 .addReg(Op0, Op0IsKill * RegState::Kill) 335 .addReg(Op1, Op1IsKill * RegState::Kill)); 336 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 337 TII.get(TargetOpcode::COPY), ResultReg) 338 .addReg(II.ImplicitDefs[0])); 339 } 340 return ResultReg; 341 } 342 343 unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, 344 const TargetRegisterClass *RC, 345 unsigned Op0, bool Op0IsKill, 346 unsigned Op1, bool Op1IsKill, 347 unsigned Op2, bool Op2IsKill) { 348 unsigned ResultReg = createResultReg(RC); 349 const MCInstrDesc &II = TII.get(MachineInstOpcode); 350 351 if (II.getNumDefs() >= 1) { 352 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 353 .addReg(Op0, Op0IsKill * RegState::Kill) 354 .addReg(Op1, Op1IsKill * RegState::Kill) 355 .addReg(Op2, Op2IsKill * RegState::Kill)); 356 } else { 357 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 358 .addReg(Op0, Op0IsKill * RegState::Kill) 359 .addReg(Op1, Op1IsKill * RegState::Kill) 360 .addReg(Op2, Op2IsKill * RegState::Kill)); 361 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 362 TII.get(TargetOpcode::COPY), ResultReg) 363 .addReg(II.ImplicitDefs[0])); 364 } 365 return ResultReg; 366 } 367 368 unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, 369 const TargetRegisterClass *RC, 370 unsigned Op0, bool Op0IsKill, 371 uint64_t Imm) { 372 unsigned ResultReg = createResultReg(RC); 373 const MCInstrDesc &II = TII.get(MachineInstOpcode); 374 375 if (II.getNumDefs() >= 1) { 376 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 377 .addReg(Op0, Op0IsKill * RegState::Kill) 378 .addImm(Imm)); 379 } else { 380 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 381 .addReg(Op0, Op0IsKill * RegState::Kill) 382 .addImm(Imm)); 383 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 384 TII.get(TargetOpcode::COPY), ResultReg) 385 .addReg(II.ImplicitDefs[0])); 386 } 387 return ResultReg; 388 } 389 390 unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode, 391 const TargetRegisterClass *RC, 392 unsigned Op0, bool Op0IsKill, 393 const ConstantFP *FPImm) { 394 unsigned ResultReg = createResultReg(RC); 395 const MCInstrDesc &II = TII.get(MachineInstOpcode); 396 397 if (II.getNumDefs() >= 1) { 398 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 399 .addReg(Op0, Op0IsKill * RegState::Kill) 400 .addFPImm(FPImm)); 401 } else { 402 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 403 .addReg(Op0, Op0IsKill * RegState::Kill) 404 .addFPImm(FPImm)); 405 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 406 TII.get(TargetOpcode::COPY), ResultReg) 407 .addReg(II.ImplicitDefs[0])); 408 } 409 return ResultReg; 410 } 411 412 unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, 413 const TargetRegisterClass *RC, 414 unsigned Op0, bool Op0IsKill, 415 unsigned Op1, bool Op1IsKill, 416 uint64_t Imm) { 417 unsigned ResultReg = createResultReg(RC); 418 const MCInstrDesc &II = TII.get(MachineInstOpcode); 419 420 if (II.getNumDefs() >= 1) { 421 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 422 .addReg(Op0, Op0IsKill * RegState::Kill) 423 .addReg(Op1, Op1IsKill * RegState::Kill) 424 .addImm(Imm)); 425 } else { 426 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 427 .addReg(Op0, Op0IsKill * RegState::Kill) 428 .addReg(Op1, Op1IsKill * RegState::Kill) 429 .addImm(Imm)); 430 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 431 TII.get(TargetOpcode::COPY), ResultReg) 432 .addReg(II.ImplicitDefs[0])); 433 } 434 return ResultReg; 435 } 436 437 unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode, 438 const TargetRegisterClass *RC, 439 uint64_t Imm) { 440 unsigned ResultReg = createResultReg(RC); 441 const MCInstrDesc &II = TII.get(MachineInstOpcode); 442 443 if (II.getNumDefs() >= 1) { 444 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 445 .addImm(Imm)); 446 } else { 447 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 448 .addImm(Imm)); 449 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 450 TII.get(TargetOpcode::COPY), ResultReg) 451 .addReg(II.ImplicitDefs[0])); 452 } 453 return ResultReg; 454 } 455 456 unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode, 457 const TargetRegisterClass *RC, 458 uint64_t Imm1, uint64_t Imm2) { 459 unsigned ResultReg = createResultReg(RC); 460 const MCInstrDesc &II = TII.get(MachineInstOpcode); 461 462 if (II.getNumDefs() >= 1) { 463 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 464 .addImm(Imm1).addImm(Imm2)); 465 } else { 466 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 467 .addImm(Imm1).addImm(Imm2)); 468 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 469 TII.get(TargetOpcode::COPY), 470 ResultReg) 471 .addReg(II.ImplicitDefs[0])); 472 } 473 return ResultReg; 474 } 475 476 unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT, 477 unsigned Op0, bool Op0IsKill, 478 uint32_t Idx) { 479 unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); 480 assert(TargetRegisterInfo::isVirtualRegister(Op0) && 481 "Cannot yet extract from physregs"); 482 483 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, 484 DL, TII.get(TargetOpcode::COPY), ResultReg) 485 .addReg(Op0, getKillRegState(Op0IsKill), Idx)); 486 return ResultReg; 487 } 488 489 // TODO: Don't worry about 64-bit now, but when this is fixed remove the 490 // checks from the various callers. 491 unsigned ARMFastISel::ARMMoveToFPReg(MVT VT, unsigned SrcReg) { 492 if (VT == MVT::f64) return 0; 493 494 unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); 495 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 496 TII.get(ARM::VMOVSR), MoveReg) 497 .addReg(SrcReg)); 498 return MoveReg; 499 } 500 501 unsigned ARMFastISel::ARMMoveToIntReg(MVT VT, unsigned SrcReg) { 502 if (VT == MVT::i64) return 0; 503 504 unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); 505 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 506 TII.get(ARM::VMOVRS), MoveReg) 507 .addReg(SrcReg)); 508 return MoveReg; 509 } 510 511 // For double width floating point we need to materialize two constants 512 // (the high and the low) into integer registers then use a move to get 513 // the combined constant into an FP reg. 514 unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) { 515 const APFloat Val = CFP->getValueAPF(); 516 bool is64bit = VT == MVT::f64; 517 518 // This checks to see if we can use VFP3 instructions to materialize 519 // a constant, otherwise we have to go through the constant pool. 520 if (TLI.isFPImmLegal(Val, VT)) { 521 int Imm; 522 unsigned Opc; 523 if (is64bit) { 524 Imm = ARM_AM::getFP64Imm(Val); 525 Opc = ARM::FCONSTD; 526 } else { 527 Imm = ARM_AM::getFP32Imm(Val); 528 Opc = ARM::FCONSTS; 529 } 530 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 531 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 532 DestReg) 533 .addImm(Imm)); 534 return DestReg; 535 } 536 537 // Require VFP2 for loading fp constants. 538 if (!Subtarget->hasVFP2()) return false; 539 540 // MachineConstantPool wants an explicit alignment. 541 unsigned Align = TD.getPrefTypeAlignment(CFP->getType()); 542 if (Align == 0) { 543 // TODO: Figure out if this is correct. 544 Align = TD.getTypeAllocSize(CFP->getType()); 545 } 546 unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); 547 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 548 unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS; 549 550 // The extra reg is for addrmode5. 551 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 552 DestReg) 553 .addConstantPoolIndex(Idx) 554 .addReg(0)); 555 return DestReg; 556 } 557 558 unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) { 559 560 if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) 561 return false; 562 563 // If we can do this in a single instruction without a constant pool entry 564 // do so now. 565 const ConstantInt *CI = cast<ConstantInt>(C); 566 if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) { 567 unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16; 568 const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass : 569 &ARM::GPRRegClass; 570 unsigned ImmReg = createResultReg(RC); 571 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 572 TII.get(Opc), ImmReg) 573 .addImm(CI->getZExtValue())); 574 return ImmReg; 575 } 576 577 // Use MVN to emit negative constants. 578 if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) { 579 unsigned Imm = (unsigned)~(CI->getSExtValue()); 580 bool UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) : 581 (ARM_AM::getSOImmVal(Imm) != -1); 582 if (UseImm) { 583 unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi; 584 unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32)); 585 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 586 TII.get(Opc), ImmReg) 587 .addImm(Imm)); 588 return ImmReg; 589 } 590 } 591 592 // Load from constant pool. For now 32-bit only. 593 if (VT != MVT::i32) 594 return false; 595 596 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 597 598 // MachineConstantPool wants an explicit alignment. 599 unsigned Align = TD.getPrefTypeAlignment(C->getType()); 600 if (Align == 0) { 601 // TODO: Figure out if this is correct. 602 Align = TD.getTypeAllocSize(C->getType()); 603 } 604 unsigned Idx = MCP.getConstantPoolIndex(C, Align); 605 606 if (isThumb2) 607 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 608 TII.get(ARM::t2LDRpci), DestReg) 609 .addConstantPoolIndex(Idx)); 610 else 611 // The extra immediate is for addrmode2. 612 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 613 TII.get(ARM::LDRcp), DestReg) 614 .addConstantPoolIndex(Idx) 615 .addImm(0)); 616 617 return DestReg; 618 } 619 620 unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { 621 // For now 32-bit only. 622 if (VT != MVT::i32) return 0; 623 624 Reloc::Model RelocM = TM.getRelocationModel(); 625 bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM); 626 const TargetRegisterClass *RC = isThumb2 ? 627 (const TargetRegisterClass*)&ARM::rGPRRegClass : 628 (const TargetRegisterClass*)&ARM::GPRRegClass; 629 unsigned DestReg = createResultReg(RC); 630 631 // Use movw+movt when possible, it avoids constant pool entries. 632 // Darwin targets don't support movt with Reloc::Static, see 633 // ARMTargetLowering::LowerGlobalAddressDarwin. Other targets only support 634 // static movt relocations. 635 if (Subtarget->useMovt() && 636 Subtarget->isTargetDarwin() == (RelocM != Reloc::Static)) { 637 unsigned Opc; 638 switch (RelocM) { 639 case Reloc::PIC_: 640 Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel; 641 break; 642 case Reloc::DynamicNoPIC: 643 Opc = isThumb2 ? ARM::t2MOV_ga_dyn : ARM::MOV_ga_dyn; 644 break; 645 default: 646 Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm; 647 break; 648 } 649 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 650 DestReg).addGlobalAddress(GV)); 651 } else { 652 // MachineConstantPool wants an explicit alignment. 653 unsigned Align = TD.getPrefTypeAlignment(GV->getType()); 654 if (Align == 0) { 655 // TODO: Figure out if this is correct. 656 Align = TD.getTypeAllocSize(GV->getType()); 657 } 658 659 if (Subtarget->isTargetELF() && RelocM == Reloc::PIC_) 660 return ARMLowerPICELF(GV, Align, VT); 661 662 // Grab index. 663 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : 664 (Subtarget->isThumb() ? 4 : 8); 665 unsigned Id = AFI->createPICLabelUId(); 666 ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id, 667 ARMCP::CPValue, 668 PCAdj); 669 unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); 670 671 // Load value. 672 MachineInstrBuilder MIB; 673 if (isThumb2) { 674 unsigned Opc = (RelocM!=Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic; 675 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) 676 .addConstantPoolIndex(Idx); 677 if (RelocM == Reloc::PIC_) 678 MIB.addImm(Id); 679 AddOptionalDefs(MIB); 680 } else { 681 // The extra immediate is for addrmode2. 682 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), 683 DestReg) 684 .addConstantPoolIndex(Idx) 685 .addImm(0); 686 AddOptionalDefs(MIB); 687 688 if (RelocM == Reloc::PIC_) { 689 unsigned Opc = IsIndirect ? ARM::PICLDR : ARM::PICADD; 690 unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); 691 692 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, 693 DL, TII.get(Opc), NewDestReg) 694 .addReg(DestReg) 695 .addImm(Id); 696 AddOptionalDefs(MIB); 697 return NewDestReg; 698 } 699 } 700 } 701 702 if (IsIndirect) { 703 MachineInstrBuilder MIB; 704 unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); 705 if (isThumb2) 706 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 707 TII.get(ARM::t2LDRi12), NewDestReg) 708 .addReg(DestReg) 709 .addImm(0); 710 else 711 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRi12), 712 NewDestReg) 713 .addReg(DestReg) 714 .addImm(0); 715 DestReg = NewDestReg; 716 AddOptionalDefs(MIB); 717 } 718 719 return DestReg; 720 } 721 722 unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { 723 EVT CEVT = TLI.getValueType(C->getType(), true); 724 725 // Only handle simple types. 726 if (!CEVT.isSimple()) return 0; 727 MVT VT = CEVT.getSimpleVT(); 728 729 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 730 return ARMMaterializeFP(CFP, VT); 731 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 732 return ARMMaterializeGV(GV, VT); 733 else if (isa<ConstantInt>(C)) 734 return ARMMaterializeInt(C, VT); 735 736 return 0; 737 } 738 739 // TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF); 740 741 unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { 742 // Don't handle dynamic allocas. 743 if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; 744 745 MVT VT; 746 if (!isLoadTypeLegal(AI->getType(), VT)) return 0; 747 748 DenseMap<const AllocaInst*, int>::iterator SI = 749 FuncInfo.StaticAllocaMap.find(AI); 750 751 // This will get lowered later into the correct offsets and registers 752 // via rewriteXFrameIndex. 753 if (SI != FuncInfo.StaticAllocaMap.end()) { 754 const TargetRegisterClass* RC = TLI.getRegClassFor(VT); 755 unsigned ResultReg = createResultReg(RC); 756 unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; 757 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 758 TII.get(Opc), ResultReg) 759 .addFrameIndex(SI->second) 760 .addImm(0)); 761 return ResultReg; 762 } 763 764 return 0; 765 } 766 767 bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) { 768 EVT evt = TLI.getValueType(Ty, true); 769 770 // Only handle simple types. 771 if (evt == MVT::Other || !evt.isSimple()) return false; 772 VT = evt.getSimpleVT(); 773 774 // Handle all legal types, i.e. a register that will directly hold this 775 // value. 776 return TLI.isTypeLegal(VT); 777 } 778 779 bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { 780 if (isTypeLegal(Ty, VT)) return true; 781 782 // If this is a type than can be sign or zero-extended to a basic operation 783 // go ahead and accept it now. 784 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) 785 return true; 786 787 return false; 788 } 789 790 // Computes the address to get to an object. 791 bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { 792 // Some boilerplate from the X86 FastISel. 793 const User *U = NULL; 794 unsigned Opcode = Instruction::UserOp1; 795 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 796 // Don't walk into other basic blocks unless the object is an alloca from 797 // another block, otherwise it may not have a virtual register assigned. 798 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 799 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 800 Opcode = I->getOpcode(); 801 U = I; 802 } 803 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 804 Opcode = C->getOpcode(); 805 U = C; 806 } 807 808 if (PointerType *Ty = dyn_cast<PointerType>(Obj->getType())) 809 if (Ty->getAddressSpace() > 255) 810 // Fast instruction selection doesn't support the special 811 // address spaces. 812 return false; 813 814 switch (Opcode) { 815 default: 816 break; 817 case Instruction::BitCast: { 818 // Look through bitcasts. 819 return ARMComputeAddress(U->getOperand(0), Addr); 820 } 821 case Instruction::IntToPtr: { 822 // Look past no-op inttoptrs. 823 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 824 return ARMComputeAddress(U->getOperand(0), Addr); 825 break; 826 } 827 case Instruction::PtrToInt: { 828 // Look past no-op ptrtoints. 829 if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) 830 return ARMComputeAddress(U->getOperand(0), Addr); 831 break; 832 } 833 case Instruction::GetElementPtr: { 834 Address SavedAddr = Addr; 835 int TmpOffset = Addr.Offset; 836 837 // Iterate through the GEP folding the constants into offsets where 838 // we can. 839 gep_type_iterator GTI = gep_type_begin(U); 840 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); 841 i != e; ++i, ++GTI) { 842 const Value *Op = *i; 843 if (StructType *STy = dyn_cast<StructType>(*GTI)) { 844 const StructLayout *SL = TD.getStructLayout(STy); 845 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 846 TmpOffset += SL->getElementOffset(Idx); 847 } else { 848 uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); 849 for (;;) { 850 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 851 // Constant-offset addressing. 852 TmpOffset += CI->getSExtValue() * S; 853 break; 854 } 855 if (isa<AddOperator>(Op) && 856 (!isa<Instruction>(Op) || 857 FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()] 858 == FuncInfo.MBB) && 859 isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) { 860 // An add (in the same block) with a constant operand. Fold the 861 // constant. 862 ConstantInt *CI = 863 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 864 TmpOffset += CI->getSExtValue() * S; 865 // Iterate on the other operand. 866 Op = cast<AddOperator>(Op)->getOperand(0); 867 continue; 868 } 869 // Unsupported 870 goto unsupported_gep; 871 } 872 } 873 } 874 875 // Try to grab the base operand now. 876 Addr.Offset = TmpOffset; 877 if (ARMComputeAddress(U->getOperand(0), Addr)) return true; 878 879 // We failed, restore everything and try the other options. 880 Addr = SavedAddr; 881 882 unsupported_gep: 883 break; 884 } 885 case Instruction::Alloca: { 886 const AllocaInst *AI = cast<AllocaInst>(Obj); 887 DenseMap<const AllocaInst*, int>::iterator SI = 888 FuncInfo.StaticAllocaMap.find(AI); 889 if (SI != FuncInfo.StaticAllocaMap.end()) { 890 Addr.BaseType = Address::FrameIndexBase; 891 Addr.Base.FI = SI->second; 892 return true; 893 } 894 break; 895 } 896 } 897 898 // Try to get this in a register if nothing else has worked. 899 if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj); 900 return Addr.Base.Reg != 0; 901 } 902 903 void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) { 904 bool needsLowering = false; 905 switch (VT.SimpleTy) { 906 default: llvm_unreachable("Unhandled load/store type!"); 907 case MVT::i1: 908 case MVT::i8: 909 case MVT::i16: 910 case MVT::i32: 911 if (!useAM3) { 912 // Integer loads/stores handle 12-bit offsets. 913 needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset); 914 // Handle negative offsets. 915 if (needsLowering && isThumb2) 916 needsLowering = !(Subtarget->hasV6T2Ops() && Addr.Offset < 0 && 917 Addr.Offset > -256); 918 } else { 919 // ARM halfword load/stores and signed byte loads use +/-imm8 offsets. 920 needsLowering = (Addr.Offset > 255 || Addr.Offset < -255); 921 } 922 break; 923 case MVT::f32: 924 case MVT::f64: 925 // Floating point operands handle 8-bit offsets. 926 needsLowering = ((Addr.Offset & 0xff) != Addr.Offset); 927 break; 928 } 929 930 // If this is a stack pointer and the offset needs to be simplified then 931 // put the alloca address into a register, set the base type back to 932 // register and continue. This should almost never happen. 933 if (needsLowering && Addr.BaseType == Address::FrameIndexBase) { 934 const TargetRegisterClass *RC = isThumb2 ? 935 (const TargetRegisterClass*)&ARM::tGPRRegClass : 936 (const TargetRegisterClass*)&ARM::GPRRegClass; 937 unsigned ResultReg = createResultReg(RC); 938 unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; 939 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 940 TII.get(Opc), ResultReg) 941 .addFrameIndex(Addr.Base.FI) 942 .addImm(0)); 943 Addr.Base.Reg = ResultReg; 944 Addr.BaseType = Address::RegBase; 945 } 946 947 // Since the offset is too large for the load/store instruction 948 // get the reg+offset into a register. 949 if (needsLowering) { 950 Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg, 951 /*Op0IsKill*/false, Addr.Offset, MVT::i32); 952 Addr.Offset = 0; 953 } 954 } 955 956 void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr, 957 const MachineInstrBuilder &MIB, 958 unsigned Flags, bool useAM3) { 959 // addrmode5 output depends on the selection dag addressing dividing the 960 // offset by 4 that it then later multiplies. Do this here as well. 961 if (VT.SimpleTy == MVT::f32 || VT.SimpleTy == MVT::f64) 962 Addr.Offset /= 4; 963 964 // Frame base works a bit differently. Handle it separately. 965 if (Addr.BaseType == Address::FrameIndexBase) { 966 int FI = Addr.Base.FI; 967 int Offset = Addr.Offset; 968 MachineMemOperand *MMO = 969 FuncInfo.MF->getMachineMemOperand( 970 MachinePointerInfo::getFixedStack(FI, Offset), 971 Flags, 972 MFI.getObjectSize(FI), 973 MFI.getObjectAlignment(FI)); 974 // Now add the rest of the operands. 975 MIB.addFrameIndex(FI); 976 977 // ARM halfword load/stores and signed byte loads need an additional 978 // operand. 979 if (useAM3) { 980 signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset; 981 MIB.addReg(0); 982 MIB.addImm(Imm); 983 } else { 984 MIB.addImm(Addr.Offset); 985 } 986 MIB.addMemOperand(MMO); 987 } else { 988 // Now add the rest of the operands. 989 MIB.addReg(Addr.Base.Reg); 990 991 // ARM halfword load/stores and signed byte loads need an additional 992 // operand. 993 if (useAM3) { 994 signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset; 995 MIB.addReg(0); 996 MIB.addImm(Imm); 997 } else { 998 MIB.addImm(Addr.Offset); 999 } 1000 } 1001 AddOptionalDefs(MIB); 1002 } 1003 1004 bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, 1005 unsigned Alignment, bool isZExt, bool allocReg) { 1006 unsigned Opc; 1007 bool useAM3 = false; 1008 bool needVMOV = false; 1009 const TargetRegisterClass *RC; 1010 switch (VT.SimpleTy) { 1011 // This is mostly going to be Neon/vector support. 1012 default: return false; 1013 case MVT::i1: 1014 case MVT::i8: 1015 if (isThumb2) { 1016 if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) 1017 Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8; 1018 else 1019 Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12; 1020 } else { 1021 if (isZExt) { 1022 Opc = ARM::LDRBi12; 1023 } else { 1024 Opc = ARM::LDRSB; 1025 useAM3 = true; 1026 } 1027 } 1028 RC = &ARM::GPRRegClass; 1029 break; 1030 case MVT::i16: 1031 if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem()) 1032 return false; 1033 1034 if (isThumb2) { 1035 if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) 1036 Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8; 1037 else 1038 Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12; 1039 } else { 1040 Opc = isZExt ? ARM::LDRH : ARM::LDRSH; 1041 useAM3 = true; 1042 } 1043 RC = &ARM::GPRRegClass; 1044 break; 1045 case MVT::i32: 1046 if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem()) 1047 return false; 1048 1049 if (isThumb2) { 1050 if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) 1051 Opc = ARM::t2LDRi8; 1052 else 1053 Opc = ARM::t2LDRi12; 1054 } else { 1055 Opc = ARM::LDRi12; 1056 } 1057 RC = &ARM::GPRRegClass; 1058 break; 1059 case MVT::f32: 1060 if (!Subtarget->hasVFP2()) return false; 1061 // Unaligned loads need special handling. Floats require word-alignment. 1062 if (Alignment && Alignment < 4) { 1063 needVMOV = true; 1064 VT = MVT::i32; 1065 Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12; 1066 RC = &ARM::GPRRegClass; 1067 } else { 1068 Opc = ARM::VLDRS; 1069 RC = TLI.getRegClassFor(VT); 1070 } 1071 break; 1072 case MVT::f64: 1073 if (!Subtarget->hasVFP2()) return false; 1074 // FIXME: Unaligned loads need special handling. Doublewords require 1075 // word-alignment. 1076 if (Alignment && Alignment < 4) 1077 return false; 1078 1079 Opc = ARM::VLDRD; 1080 RC = TLI.getRegClassFor(VT); 1081 break; 1082 } 1083 // Simplify this down to something we can handle. 1084 ARMSimplifyAddress(Addr, VT, useAM3); 1085 1086 // Create the base instruction, then add the operands. 1087 if (allocReg) 1088 ResultReg = createResultReg(RC); 1089 assert (ResultReg > 255 && "Expected an allocated virtual register."); 1090 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1091 TII.get(Opc), ResultReg); 1092 AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3); 1093 1094 // If we had an unaligned load of a float we've converted it to an regular 1095 // load. Now we must move from the GRP to the FP register. 1096 if (needVMOV) { 1097 unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32)); 1098 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1099 TII.get(ARM::VMOVSR), MoveReg) 1100 .addReg(ResultReg)); 1101 ResultReg = MoveReg; 1102 } 1103 return true; 1104 } 1105 1106 bool ARMFastISel::SelectLoad(const Instruction *I) { 1107 // Atomic loads need special handling. 1108 if (cast<LoadInst>(I)->isAtomic()) 1109 return false; 1110 1111 // Verify we have a legal type before going any further. 1112 MVT VT; 1113 if (!isLoadTypeLegal(I->getType(), VT)) 1114 return false; 1115 1116 // See if we can handle this address. 1117 Address Addr; 1118 if (!ARMComputeAddress(I->getOperand(0), Addr)) return false; 1119 1120 unsigned ResultReg; 1121 if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment())) 1122 return false; 1123 UpdateValueMap(I, ResultReg); 1124 return true; 1125 } 1126 1127 bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, 1128 unsigned Alignment) { 1129 unsigned StrOpc; 1130 bool useAM3 = false; 1131 switch (VT.SimpleTy) { 1132 // This is mostly going to be Neon/vector support. 1133 default: return false; 1134 case MVT::i1: { 1135 unsigned Res = createResultReg(isThumb2 ? 1136 (const TargetRegisterClass*)&ARM::tGPRRegClass : 1137 (const TargetRegisterClass*)&ARM::GPRRegClass); 1138 unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri; 1139 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1140 TII.get(Opc), Res) 1141 .addReg(SrcReg).addImm(1)); 1142 SrcReg = Res; 1143 } // Fallthrough here. 1144 case MVT::i8: 1145 if (isThumb2) { 1146 if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) 1147 StrOpc = ARM::t2STRBi8; 1148 else 1149 StrOpc = ARM::t2STRBi12; 1150 } else { 1151 StrOpc = ARM::STRBi12; 1152 } 1153 break; 1154 case MVT::i16: 1155 if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem()) 1156 return false; 1157 1158 if (isThumb2) { 1159 if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) 1160 StrOpc = ARM::t2STRHi8; 1161 else 1162 StrOpc = ARM::t2STRHi12; 1163 } else { 1164 StrOpc = ARM::STRH; 1165 useAM3 = true; 1166 } 1167 break; 1168 case MVT::i32: 1169 if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem()) 1170 return false; 1171 1172 if (isThumb2) { 1173 if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) 1174 StrOpc = ARM::t2STRi8; 1175 else 1176 StrOpc = ARM::t2STRi12; 1177 } else { 1178 StrOpc = ARM::STRi12; 1179 } 1180 break; 1181 case MVT::f32: 1182 if (!Subtarget->hasVFP2()) return false; 1183 // Unaligned stores need special handling. Floats require word-alignment. 1184 if (Alignment && Alignment < 4) { 1185 unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32)); 1186 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1187 TII.get(ARM::VMOVRS), MoveReg) 1188 .addReg(SrcReg)); 1189 SrcReg = MoveReg; 1190 VT = MVT::i32; 1191 StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12; 1192 } else { 1193 StrOpc = ARM::VSTRS; 1194 } 1195 break; 1196 case MVT::f64: 1197 if (!Subtarget->hasVFP2()) return false; 1198 // FIXME: Unaligned stores need special handling. Doublewords require 1199 // word-alignment. 1200 if (Alignment && Alignment < 4) 1201 return false; 1202 1203 StrOpc = ARM::VSTRD; 1204 break; 1205 } 1206 // Simplify this down to something we can handle. 1207 ARMSimplifyAddress(Addr, VT, useAM3); 1208 1209 // Create the base instruction, then add the operands. 1210 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1211 TII.get(StrOpc)) 1212 .addReg(SrcReg); 1213 AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3); 1214 return true; 1215 } 1216 1217 bool ARMFastISel::SelectStore(const Instruction *I) { 1218 Value *Op0 = I->getOperand(0); 1219 unsigned SrcReg = 0; 1220 1221 // Atomic stores need special handling. 1222 if (cast<StoreInst>(I)->isAtomic()) 1223 return false; 1224 1225 // Verify we have a legal type before going any further. 1226 MVT VT; 1227 if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT)) 1228 return false; 1229 1230 // Get the value to be stored into a register. 1231 SrcReg = getRegForValue(Op0); 1232 if (SrcReg == 0) return false; 1233 1234 // See if we can handle this address. 1235 Address Addr; 1236 if (!ARMComputeAddress(I->getOperand(1), Addr)) 1237 return false; 1238 1239 if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment())) 1240 return false; 1241 return true; 1242 } 1243 1244 static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) { 1245 switch (Pred) { 1246 // Needs two compares... 1247 case CmpInst::FCMP_ONE: 1248 case CmpInst::FCMP_UEQ: 1249 default: 1250 // AL is our "false" for now. The other two need more compares. 1251 return ARMCC::AL; 1252 case CmpInst::ICMP_EQ: 1253 case CmpInst::FCMP_OEQ: 1254 return ARMCC::EQ; 1255 case CmpInst::ICMP_SGT: 1256 case CmpInst::FCMP_OGT: 1257 return ARMCC::GT; 1258 case CmpInst::ICMP_SGE: 1259 case CmpInst::FCMP_OGE: 1260 return ARMCC::GE; 1261 case CmpInst::ICMP_UGT: 1262 case CmpInst::FCMP_UGT: 1263 return ARMCC::HI; 1264 case CmpInst::FCMP_OLT: 1265 return ARMCC::MI; 1266 case CmpInst::ICMP_ULE: 1267 case CmpInst::FCMP_OLE: 1268 return ARMCC::LS; 1269 case CmpInst::FCMP_ORD: 1270 return ARMCC::VC; 1271 case CmpInst::FCMP_UNO: 1272 return ARMCC::VS; 1273 case CmpInst::FCMP_UGE: 1274 return ARMCC::PL; 1275 case CmpInst::ICMP_SLT: 1276 case CmpInst::FCMP_ULT: 1277 return ARMCC::LT; 1278 case CmpInst::ICMP_SLE: 1279 case CmpInst::FCMP_ULE: 1280 return ARMCC::LE; 1281 case CmpInst::FCMP_UNE: 1282 case CmpInst::ICMP_NE: 1283 return ARMCC::NE; 1284 case CmpInst::ICMP_UGE: 1285 return ARMCC::HS; 1286 case CmpInst::ICMP_ULT: 1287 return ARMCC::LO; 1288 } 1289 } 1290 1291 bool ARMFastISel::SelectBranch(const Instruction *I) { 1292 const BranchInst *BI = cast<BranchInst>(I); 1293 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 1294 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 1295 1296 // Simple branch support. 1297 1298 // If we can, avoid recomputing the compare - redoing it could lead to wonky 1299 // behavior. 1300 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 1301 if (CI->hasOneUse() && (CI->getParent() == I->getParent())) { 1302 1303 // Get the compare predicate. 1304 // Try to take advantage of fallthrough opportunities. 1305 CmpInst::Predicate Predicate = CI->getPredicate(); 1306 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 1307 std::swap(TBB, FBB); 1308 Predicate = CmpInst::getInversePredicate(Predicate); 1309 } 1310 1311 ARMCC::CondCodes ARMPred = getComparePred(Predicate); 1312 1313 // We may not handle every CC for now. 1314 if (ARMPred == ARMCC::AL) return false; 1315 1316 // Emit the compare. 1317 if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 1318 return false; 1319 1320 unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; 1321 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) 1322 .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR); 1323 FastEmitBranch(FBB, DL); 1324 FuncInfo.MBB->addSuccessor(TBB); 1325 return true; 1326 } 1327 } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) { 1328 MVT SourceVT; 1329 if (TI->hasOneUse() && TI->getParent() == I->getParent() && 1330 (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) { 1331 unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri; 1332 unsigned OpReg = getRegForValue(TI->getOperand(0)); 1333 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1334 TII.get(TstOpc)) 1335 .addReg(OpReg).addImm(1)); 1336 1337 unsigned CCMode = ARMCC::NE; 1338 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 1339 std::swap(TBB, FBB); 1340 CCMode = ARMCC::EQ; 1341 } 1342 1343 unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; 1344 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) 1345 .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); 1346 1347 FastEmitBranch(FBB, DL); 1348 FuncInfo.MBB->addSuccessor(TBB); 1349 return true; 1350 } 1351 } else if (const ConstantInt *CI = 1352 dyn_cast<ConstantInt>(BI->getCondition())) { 1353 uint64_t Imm = CI->getZExtValue(); 1354 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; 1355 FastEmitBranch(Target, DL); 1356 return true; 1357 } 1358 1359 unsigned CmpReg = getRegForValue(BI->getCondition()); 1360 if (CmpReg == 0) return false; 1361 1362 // We've been divorced from our compare! Our block was split, and 1363 // now our compare lives in a predecessor block. We musn't 1364 // re-compare here, as the children of the compare aren't guaranteed 1365 // live across the block boundary (we *could* check for this). 1366 // Regardless, the compare has been done in the predecessor block, 1367 // and it left a value for us in a virtual register. Ergo, we test 1368 // the one-bit value left in the virtual register. 1369 unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri; 1370 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc)) 1371 .addReg(CmpReg).addImm(1)); 1372 1373 unsigned CCMode = ARMCC::NE; 1374 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 1375 std::swap(TBB, FBB); 1376 CCMode = ARMCC::EQ; 1377 } 1378 1379 unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; 1380 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) 1381 .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); 1382 FastEmitBranch(FBB, DL); 1383 FuncInfo.MBB->addSuccessor(TBB); 1384 return true; 1385 } 1386 1387 bool ARMFastISel::SelectIndirectBr(const Instruction *I) { 1388 unsigned AddrReg = getRegForValue(I->getOperand(0)); 1389 if (AddrReg == 0) return false; 1390 1391 unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX; 1392 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)) 1393 .addReg(AddrReg)); 1394 1395 const IndirectBrInst *IB = cast<IndirectBrInst>(I); 1396 for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i) 1397 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]); 1398 1399 return true; 1400 } 1401 1402 bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, 1403 bool isZExt) { 1404 Type *Ty = Src1Value->getType(); 1405 EVT SrcEVT = TLI.getValueType(Ty, true); 1406 if (!SrcEVT.isSimple()) return false; 1407 MVT SrcVT = SrcEVT.getSimpleVT(); 1408 1409 bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy()); 1410 if (isFloat && !Subtarget->hasVFP2()) 1411 return false; 1412 1413 // Check to see if the 2nd operand is a constant that we can encode directly 1414 // in the compare. 1415 int Imm = 0; 1416 bool UseImm = false; 1417 bool isNegativeImm = false; 1418 // FIXME: At -O0 we don't have anything that canonicalizes operand order. 1419 // Thus, Src1Value may be a ConstantInt, but we're missing it. 1420 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) { 1421 if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 || 1422 SrcVT == MVT::i1) { 1423 const APInt &CIVal = ConstInt->getValue(); 1424 Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue(); 1425 // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather 1426 // then a cmn, because there is no way to represent 2147483648 as a 1427 // signed 32-bit int. 1428 if (Imm < 0 && Imm != (int)0x80000000) { 1429 isNegativeImm = true; 1430 Imm = -Imm; 1431 } 1432 UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) : 1433 (ARM_AM::getSOImmVal(Imm) != -1); 1434 } 1435 } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) { 1436 if (SrcVT == MVT::f32 || SrcVT == MVT::f64) 1437 if (ConstFP->isZero() && !ConstFP->isNegative()) 1438 UseImm = true; 1439 } 1440 1441 unsigned CmpOpc; 1442 bool isICmp = true; 1443 bool needsExt = false; 1444 switch (SrcVT.SimpleTy) { 1445 default: return false; 1446 // TODO: Verify compares. 1447 case MVT::f32: 1448 isICmp = false; 1449 CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES; 1450 break; 1451 case MVT::f64: 1452 isICmp = false; 1453 CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED; 1454 break; 1455 case MVT::i1: 1456 case MVT::i8: 1457 case MVT::i16: 1458 needsExt = true; 1459 // Intentional fall-through. 1460 case MVT::i32: 1461 if (isThumb2) { 1462 if (!UseImm) 1463 CmpOpc = ARM::t2CMPrr; 1464 else 1465 CmpOpc = isNegativeImm ? ARM::t2CMNri : ARM::t2CMPri; 1466 } else { 1467 if (!UseImm) 1468 CmpOpc = ARM::CMPrr; 1469 else 1470 CmpOpc = isNegativeImm ? ARM::CMNri : ARM::CMPri; 1471 } 1472 break; 1473 } 1474 1475 unsigned SrcReg1 = getRegForValue(Src1Value); 1476 if (SrcReg1 == 0) return false; 1477 1478 unsigned SrcReg2 = 0; 1479 if (!UseImm) { 1480 SrcReg2 = getRegForValue(Src2Value); 1481 if (SrcReg2 == 0) return false; 1482 } 1483 1484 // We have i1, i8, or i16, we need to either zero extend or sign extend. 1485 if (needsExt) { 1486 SrcReg1 = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt); 1487 if (SrcReg1 == 0) return false; 1488 if (!UseImm) { 1489 SrcReg2 = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt); 1490 if (SrcReg2 == 0) return false; 1491 } 1492 } 1493 1494 if (!UseImm) { 1495 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1496 TII.get(CmpOpc)) 1497 .addReg(SrcReg1).addReg(SrcReg2)); 1498 } else { 1499 MachineInstrBuilder MIB; 1500 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) 1501 .addReg(SrcReg1); 1502 1503 // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0. 1504 if (isICmp) 1505 MIB.addImm(Imm); 1506 AddOptionalDefs(MIB); 1507 } 1508 1509 // For floating point we need to move the result to a comparison register 1510 // that we can then use for branches. 1511 if (Ty->isFloatTy() || Ty->isDoubleTy()) 1512 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1513 TII.get(ARM::FMSTAT))); 1514 return true; 1515 } 1516 1517 bool ARMFastISel::SelectCmp(const Instruction *I) { 1518 const CmpInst *CI = cast<CmpInst>(I); 1519 1520 // Get the compare predicate. 1521 ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate()); 1522 1523 // We may not handle every CC for now. 1524 if (ARMPred == ARMCC::AL) return false; 1525 1526 // Emit the compare. 1527 if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 1528 return false; 1529 1530 // Now set a register based on the comparison. Explicitly set the predicates 1531 // here. 1532 unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi; 1533 const TargetRegisterClass *RC = isThumb2 ? 1534 (const TargetRegisterClass*)&ARM::rGPRRegClass : 1535 (const TargetRegisterClass*)&ARM::GPRRegClass; 1536 unsigned DestReg = createResultReg(RC); 1537 Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0); 1538 unsigned ZeroReg = TargetMaterializeConstant(Zero); 1539 // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR. 1540 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg) 1541 .addReg(ZeroReg).addImm(1) 1542 .addImm(ARMPred).addReg(ARM::CPSR); 1543 1544 UpdateValueMap(I, DestReg); 1545 return true; 1546 } 1547 1548 bool ARMFastISel::SelectFPExt(const Instruction *I) { 1549 // Make sure we have VFP and that we're extending float to double. 1550 if (!Subtarget->hasVFP2()) return false; 1551 1552 Value *V = I->getOperand(0); 1553 if (!I->getType()->isDoubleTy() || 1554 !V->getType()->isFloatTy()) return false; 1555 1556 unsigned Op = getRegForValue(V); 1557 if (Op == 0) return false; 1558 1559 unsigned Result = createResultReg(&ARM::DPRRegClass); 1560 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1561 TII.get(ARM::VCVTDS), Result) 1562 .addReg(Op)); 1563 UpdateValueMap(I, Result); 1564 return true; 1565 } 1566 1567 bool ARMFastISel::SelectFPTrunc(const Instruction *I) { 1568 // Make sure we have VFP and that we're truncating double to float. 1569 if (!Subtarget->hasVFP2()) return false; 1570 1571 Value *V = I->getOperand(0); 1572 if (!(I->getType()->isFloatTy() && 1573 V->getType()->isDoubleTy())) return false; 1574 1575 unsigned Op = getRegForValue(V); 1576 if (Op == 0) return false; 1577 1578 unsigned Result = createResultReg(&ARM::SPRRegClass); 1579 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1580 TII.get(ARM::VCVTSD), Result) 1581 .addReg(Op)); 1582 UpdateValueMap(I, Result); 1583 return true; 1584 } 1585 1586 bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) { 1587 // Make sure we have VFP. 1588 if (!Subtarget->hasVFP2()) return false; 1589 1590 MVT DstVT; 1591 Type *Ty = I->getType(); 1592 if (!isTypeLegal(Ty, DstVT)) 1593 return false; 1594 1595 Value *Src = I->getOperand(0); 1596 EVT SrcEVT = TLI.getValueType(Src->getType(), true); 1597 if (!SrcEVT.isSimple()) 1598 return false; 1599 MVT SrcVT = SrcEVT.getSimpleVT(); 1600 if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8) 1601 return false; 1602 1603 unsigned SrcReg = getRegForValue(Src); 1604 if (SrcReg == 0) return false; 1605 1606 // Handle sign-extension. 1607 if (SrcVT == MVT::i16 || SrcVT == MVT::i8) { 1608 SrcReg = ARMEmitIntExt(SrcVT, SrcReg, MVT::i32, 1609 /*isZExt*/!isSigned); 1610 if (SrcReg == 0) return false; 1611 } 1612 1613 // The conversion routine works on fp-reg to fp-reg and the operand above 1614 // was an integer, move it to the fp registers if possible. 1615 unsigned FP = ARMMoveToFPReg(MVT::f32, SrcReg); 1616 if (FP == 0) return false; 1617 1618 unsigned Opc; 1619 if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS; 1620 else if (Ty->isDoubleTy()) Opc = isSigned ? ARM::VSITOD : ARM::VUITOD; 1621 else return false; 1622 1623 unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT)); 1624 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 1625 ResultReg) 1626 .addReg(FP)); 1627 UpdateValueMap(I, ResultReg); 1628 return true; 1629 } 1630 1631 bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) { 1632 // Make sure we have VFP. 1633 if (!Subtarget->hasVFP2()) return false; 1634 1635 MVT DstVT; 1636 Type *RetTy = I->getType(); 1637 if (!isTypeLegal(RetTy, DstVT)) 1638 return false; 1639 1640 unsigned Op = getRegForValue(I->getOperand(0)); 1641 if (Op == 0) return false; 1642 1643 unsigned Opc; 1644 Type *OpTy = I->getOperand(0)->getType(); 1645 if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS; 1646 else if (OpTy->isDoubleTy()) Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD; 1647 else return false; 1648 1649 // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg. 1650 unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32)); 1651 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 1652 ResultReg) 1653 .addReg(Op)); 1654 1655 // This result needs to be in an integer register, but the conversion only 1656 // takes place in fp-regs. 1657 unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg); 1658 if (IntReg == 0) return false; 1659 1660 UpdateValueMap(I, IntReg); 1661 return true; 1662 } 1663 1664 bool ARMFastISel::SelectSelect(const Instruction *I) { 1665 MVT VT; 1666 if (!isTypeLegal(I->getType(), VT)) 1667 return false; 1668 1669 // Things need to be register sized for register moves. 1670 if (VT != MVT::i32) return false; 1671 1672 unsigned CondReg = getRegForValue(I->getOperand(0)); 1673 if (CondReg == 0) return false; 1674 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 1675 if (Op1Reg == 0) return false; 1676 1677 // Check to see if we can use an immediate in the conditional move. 1678 int Imm = 0; 1679 bool UseImm = false; 1680 bool isNegativeImm = false; 1681 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) { 1682 assert (VT == MVT::i32 && "Expecting an i32."); 1683 Imm = (int)ConstInt->getValue().getZExtValue(); 1684 if (Imm < 0) { 1685 isNegativeImm = true; 1686 Imm = ~Imm; 1687 } 1688 UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) : 1689 (ARM_AM::getSOImmVal(Imm) != -1); 1690 } 1691 1692 unsigned Op2Reg = 0; 1693 if (!UseImm) { 1694 Op2Reg = getRegForValue(I->getOperand(2)); 1695 if (Op2Reg == 0) return false; 1696 } 1697 1698 unsigned CmpOpc = isThumb2 ? ARM::t2CMPri : ARM::CMPri; 1699 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) 1700 .addReg(CondReg).addImm(0)); 1701 1702 unsigned MovCCOpc; 1703 const TargetRegisterClass *RC; 1704 if (!UseImm) { 1705 RC = isThumb2 ? &ARM::tGPRRegClass : &ARM::GPRRegClass; 1706 MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr; 1707 } else { 1708 RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass; 1709 if (!isNegativeImm) 1710 MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi; 1711 else 1712 MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi; 1713 } 1714 unsigned ResultReg = createResultReg(RC); 1715 if (!UseImm) 1716 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) 1717 .addReg(Op2Reg).addReg(Op1Reg).addImm(ARMCC::NE).addReg(ARM::CPSR); 1718 else 1719 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) 1720 .addReg(Op1Reg).addImm(Imm).addImm(ARMCC::EQ).addReg(ARM::CPSR); 1721 UpdateValueMap(I, ResultReg); 1722 return true; 1723 } 1724 1725 bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) { 1726 MVT VT; 1727 Type *Ty = I->getType(); 1728 if (!isTypeLegal(Ty, VT)) 1729 return false; 1730 1731 // If we have integer div support we should have selected this automagically. 1732 // In case we have a real miss go ahead and return false and we'll pick 1733 // it up later. 1734 if (Subtarget->hasDivide()) return false; 1735 1736 // Otherwise emit a libcall. 1737 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; 1738 if (VT == MVT::i8) 1739 LC = isSigned ? RTLIB::SDIV_I8 : RTLIB::UDIV_I8; 1740 else if (VT == MVT::i16) 1741 LC = isSigned ? RTLIB::SDIV_I16 : RTLIB::UDIV_I16; 1742 else if (VT == MVT::i32) 1743 LC = isSigned ? RTLIB::SDIV_I32 : RTLIB::UDIV_I32; 1744 else if (VT == MVT::i64) 1745 LC = isSigned ? RTLIB::SDIV_I64 : RTLIB::UDIV_I64; 1746 else if (VT == MVT::i128) 1747 LC = isSigned ? RTLIB::SDIV_I128 : RTLIB::UDIV_I128; 1748 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); 1749 1750 return ARMEmitLibcall(I, LC); 1751 } 1752 1753 bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) { 1754 MVT VT; 1755 Type *Ty = I->getType(); 1756 if (!isTypeLegal(Ty, VT)) 1757 return false; 1758 1759 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; 1760 if (VT == MVT::i8) 1761 LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8; 1762 else if (VT == MVT::i16) 1763 LC = isSigned ? RTLIB::SREM_I16 : RTLIB::UREM_I16; 1764 else if (VT == MVT::i32) 1765 LC = isSigned ? RTLIB::SREM_I32 : RTLIB::UREM_I32; 1766 else if (VT == MVT::i64) 1767 LC = isSigned ? RTLIB::SREM_I64 : RTLIB::UREM_I64; 1768 else if (VT == MVT::i128) 1769 LC = isSigned ? RTLIB::SREM_I128 : RTLIB::UREM_I128; 1770 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); 1771 1772 return ARMEmitLibcall(I, LC); 1773 } 1774 1775 bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { 1776 EVT DestVT = TLI.getValueType(I->getType(), true); 1777 1778 // We can get here in the case when we have a binary operation on a non-legal 1779 // type and the target independent selector doesn't know how to handle it. 1780 if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1) 1781 return false; 1782 1783 unsigned Opc; 1784 switch (ISDOpcode) { 1785 default: return false; 1786 case ISD::ADD: 1787 Opc = isThumb2 ? ARM::t2ADDrr : ARM::ADDrr; 1788 break; 1789 case ISD::OR: 1790 Opc = isThumb2 ? ARM::t2ORRrr : ARM::ORRrr; 1791 break; 1792 case ISD::SUB: 1793 Opc = isThumb2 ? ARM::t2SUBrr : ARM::SUBrr; 1794 break; 1795 } 1796 1797 unsigned SrcReg1 = getRegForValue(I->getOperand(0)); 1798 if (SrcReg1 == 0) return false; 1799 1800 // TODO: Often the 2nd operand is an immediate, which can be encoded directly 1801 // in the instruction, rather then materializing the value in a register. 1802 unsigned SrcReg2 = getRegForValue(I->getOperand(1)); 1803 if (SrcReg2 == 0) return false; 1804 1805 unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32)); 1806 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1807 TII.get(Opc), ResultReg) 1808 .addReg(SrcReg1).addReg(SrcReg2)); 1809 UpdateValueMap(I, ResultReg); 1810 return true; 1811 } 1812 1813 bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) { 1814 EVT FPVT = TLI.getValueType(I->getType(), true); 1815 if (!FPVT.isSimple()) return false; 1816 MVT VT = FPVT.getSimpleVT(); 1817 1818 // We can get here in the case when we want to use NEON for our fp 1819 // operations, but can't figure out how to. Just use the vfp instructions 1820 // if we have them. 1821 // FIXME: It'd be nice to use NEON instructions. 1822 Type *Ty = I->getType(); 1823 bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); 1824 if (isFloat && !Subtarget->hasVFP2()) 1825 return false; 1826 1827 unsigned Opc; 1828 bool is64bit = VT == MVT::f64 || VT == MVT::i64; 1829 switch (ISDOpcode) { 1830 default: return false; 1831 case ISD::FADD: 1832 Opc = is64bit ? ARM::VADDD : ARM::VADDS; 1833 break; 1834 case ISD::FSUB: 1835 Opc = is64bit ? ARM::VSUBD : ARM::VSUBS; 1836 break; 1837 case ISD::FMUL: 1838 Opc = is64bit ? ARM::VMULD : ARM::VMULS; 1839 break; 1840 } 1841 unsigned Op1 = getRegForValue(I->getOperand(0)); 1842 if (Op1 == 0) return false; 1843 1844 unsigned Op2 = getRegForValue(I->getOperand(1)); 1845 if (Op2 == 0) return false; 1846 1847 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy)); 1848 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1849 TII.get(Opc), ResultReg) 1850 .addReg(Op1).addReg(Op2)); 1851 UpdateValueMap(I, ResultReg); 1852 return true; 1853 } 1854 1855 // Call Handling Code 1856 1857 // This is largely taken directly from CCAssignFnForNode 1858 // TODO: We may not support all of this. 1859 CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, 1860 bool Return, 1861 bool isVarArg) { 1862 switch (CC) { 1863 default: 1864 llvm_unreachable("Unsupported calling convention"); 1865 case CallingConv::Fast: 1866 if (Subtarget->hasVFP2() && !isVarArg) { 1867 if (!Subtarget->isAAPCS_ABI()) 1868 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); 1869 // For AAPCS ABI targets, just use VFP variant of the calling convention. 1870 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1871 } 1872 // Fallthrough 1873 case CallingConv::C: 1874 // Use target triple & subtarget features to do actual dispatch. 1875 if (Subtarget->isAAPCS_ABI()) { 1876 if (Subtarget->hasVFP2() && 1877 TM.Options.FloatABIType == FloatABI::Hard && !isVarArg) 1878 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 1879 else 1880 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 1881 } else 1882 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 1883 case CallingConv::ARM_AAPCS_VFP: 1884 if (!isVarArg) 1885 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 1886 // Fall through to soft float variant, variadic functions don't 1887 // use hard floating point ABI. 1888 case CallingConv::ARM_AAPCS: 1889 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 1890 case CallingConv::ARM_APCS: 1891 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 1892 case CallingConv::GHC: 1893 if (Return) 1894 llvm_unreachable("Can't return in GHC call convention"); 1895 else 1896 return CC_ARM_APCS_GHC; 1897 } 1898 } 1899 1900 bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, 1901 SmallVectorImpl<unsigned> &ArgRegs, 1902 SmallVectorImpl<MVT> &ArgVTs, 1903 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, 1904 SmallVectorImpl<unsigned> &RegArgs, 1905 CallingConv::ID CC, 1906 unsigned &NumBytes, 1907 bool isVarArg) { 1908 SmallVector<CCValAssign, 16> ArgLocs; 1909 CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs, *Context); 1910 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, 1911 CCAssignFnForCall(CC, false, isVarArg)); 1912 1913 // Check that we can handle all of the arguments. If we can't, then bail out 1914 // now before we add code to the MBB. 1915 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1916 CCValAssign &VA = ArgLocs[i]; 1917 MVT ArgVT = ArgVTs[VA.getValNo()]; 1918 1919 // We don't handle NEON/vector parameters yet. 1920 if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64) 1921 return false; 1922 1923 // Now copy/store arg to correct locations. 1924 if (VA.isRegLoc() && !VA.needsCustom()) { 1925 continue; 1926 } else if (VA.needsCustom()) { 1927 // TODO: We need custom lowering for vector (v2f64) args. 1928 if (VA.getLocVT() != MVT::f64 || 1929 // TODO: Only handle register args for now. 1930 !VA.isRegLoc() || !ArgLocs[++i].isRegLoc()) 1931 return false; 1932 } else { 1933 switch (static_cast<EVT>(ArgVT).getSimpleVT().SimpleTy) { 1934 default: 1935 return false; 1936 case MVT::i1: 1937 case MVT::i8: 1938 case MVT::i16: 1939 case MVT::i32: 1940 break; 1941 case MVT::f32: 1942 if (!Subtarget->hasVFP2()) 1943 return false; 1944 break; 1945 case MVT::f64: 1946 if (!Subtarget->hasVFP2()) 1947 return false; 1948 break; 1949 } 1950 } 1951 } 1952 1953 // At the point, we are able to handle the call's arguments in fast isel. 1954 1955 // Get a count of how many bytes are to be pushed on the stack. 1956 NumBytes = CCInfo.getNextStackOffset(); 1957 1958 // Issue CALLSEQ_START 1959 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 1960 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1961 TII.get(AdjStackDown)) 1962 .addImm(NumBytes)); 1963 1964 // Process the args. 1965 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1966 CCValAssign &VA = ArgLocs[i]; 1967 unsigned Arg = ArgRegs[VA.getValNo()]; 1968 MVT ArgVT = ArgVTs[VA.getValNo()]; 1969 1970 assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) && 1971 "We don't handle NEON/vector parameters yet."); 1972 1973 // Handle arg promotion, etc. 1974 switch (VA.getLocInfo()) { 1975 case CCValAssign::Full: break; 1976 case CCValAssign::SExt: { 1977 MVT DestVT = VA.getLocVT(); 1978 Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false); 1979 assert (Arg != 0 && "Failed to emit a sext"); 1980 ArgVT = DestVT; 1981 break; 1982 } 1983 case CCValAssign::AExt: 1984 // Intentional fall-through. Handle AExt and ZExt. 1985 case CCValAssign::ZExt: { 1986 MVT DestVT = VA.getLocVT(); 1987 Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true); 1988 assert (Arg != 0 && "Failed to emit a sext"); 1989 ArgVT = DestVT; 1990 break; 1991 } 1992 case CCValAssign::BCvt: { 1993 unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg, 1994 /*TODO: Kill=*/false); 1995 assert(BC != 0 && "Failed to emit a bitcast!"); 1996 Arg = BC; 1997 ArgVT = VA.getLocVT(); 1998 break; 1999 } 2000 default: llvm_unreachable("Unknown arg promotion!"); 2001 } 2002 2003 // Now copy/store arg to correct locations. 2004 if (VA.isRegLoc() && !VA.needsCustom()) { 2005 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 2006 VA.getLocReg()) 2007 .addReg(Arg); 2008 RegArgs.push_back(VA.getLocReg()); 2009 } else if (VA.needsCustom()) { 2010 // TODO: We need custom lowering for vector (v2f64) args. 2011 assert(VA.getLocVT() == MVT::f64 && 2012 "Custom lowering for v2f64 args not available"); 2013 2014 CCValAssign &NextVA = ArgLocs[++i]; 2015 2016 assert(VA.isRegLoc() && NextVA.isRegLoc() && 2017 "We only handle register args!"); 2018 2019 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 2020 TII.get(ARM::VMOVRRD), VA.getLocReg()) 2021 .addReg(NextVA.getLocReg(), RegState::Define) 2022 .addReg(Arg)); 2023 RegArgs.push_back(VA.getLocReg()); 2024 RegArgs.push_back(NextVA.getLocReg()); 2025 } else { 2026 assert(VA.isMemLoc()); 2027 // Need to store on the stack. 2028 Address Addr; 2029 Addr.BaseType = Address::RegBase; 2030 Addr.Base.Reg = ARM::SP; 2031 Addr.Offset = VA.getLocMemOffset(); 2032 2033 bool EmitRet = ARMEmitStore(ArgVT, Arg, Addr); (void)EmitRet; 2034 assert(EmitRet && "Could not emit a store for argument!"); 2035 } 2036 } 2037 2038 return true; 2039 } 2040 2041 bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, 2042 const Instruction *I, CallingConv::ID CC, 2043 unsigned &NumBytes, bool isVarArg) { 2044 // Issue CALLSEQ_END 2045 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 2046 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 2047 TII.get(AdjStackUp)) 2048 .addImm(NumBytes).addImm(0)); 2049 2050 // Now the return value. 2051 if (RetVT != MVT::isVoid) { 2052 SmallVector<CCValAssign, 16> RVLocs; 2053 CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context); 2054 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg)); 2055 2056 // Copy all of the result registers out of their specified physreg. 2057 if (RVLocs.size() == 2 && RetVT == MVT::f64) { 2058 // For this move we copy into two registers and then move into the 2059 // double fp reg we want. 2060 MVT DestVT = RVLocs[0].getValVT(); 2061 const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT); 2062 unsigned ResultReg = createResultReg(DstRC); 2063 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 2064 TII.get(ARM::VMOVDRR), ResultReg) 2065 .addReg(RVLocs[0].getLocReg()) 2066 .addReg(RVLocs[1].getLocReg())); 2067 2068 UsedRegs.push_back(RVLocs[0].getLocReg()); 2069 UsedRegs.push_back(RVLocs[1].getLocReg()); 2070 2071 // Finally update the result. 2072 UpdateValueMap(I, ResultReg); 2073 } else { 2074 assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!"); 2075 MVT CopyVT = RVLocs[0].getValVT(); 2076 2077 // Special handling for extended integers. 2078 if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16) 2079 CopyVT = MVT::i32; 2080 2081 const TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); 2082 2083 unsigned ResultReg = createResultReg(DstRC); 2084 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 2085 ResultReg).addReg(RVLocs[0].getLocReg()); 2086 UsedRegs.push_back(RVLocs[0].getLocReg()); 2087 2088 // Finally update the result. 2089 UpdateValueMap(I, ResultReg); 2090 } 2091 } 2092 2093 return true; 2094 } 2095 2096 bool ARMFastISel::SelectRet(const Instruction *I) { 2097 const ReturnInst *Ret = cast<ReturnInst>(I); 2098 const Function &F = *I->getParent()->getParent(); 2099 2100 if (!FuncInfo.CanLowerReturn) 2101 return false; 2102 2103 // Build a list of return value registers. 2104 SmallVector<unsigned, 4> RetRegs; 2105 2106 CallingConv::ID CC = F.getCallingConv(); 2107 if (Ret->getNumOperands() > 0) { 2108 SmallVector<ISD::OutputArg, 4> Outs; 2109 GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); 2110 2111 // Analyze operands of the call, assigning locations to each operand. 2112 SmallVector<CCValAssign, 16> ValLocs; 2113 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,I->getContext()); 2114 CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */, 2115 F.isVarArg())); 2116 2117 const Value *RV = Ret->getOperand(0); 2118 unsigned Reg = getRegForValue(RV); 2119 if (Reg == 0) 2120 return false; 2121 2122 // Only handle a single return value for now. 2123 if (ValLocs.size() != 1) 2124 return false; 2125 2126 CCValAssign &VA = ValLocs[0]; 2127 2128 // Don't bother handling odd stuff for now. 2129 if (VA.getLocInfo() != CCValAssign::Full) 2130 return false; 2131 // Only handle register returns for now. 2132 if (!VA.isRegLoc()) 2133 return false; 2134 2135 unsigned SrcReg = Reg + VA.getValNo(); 2136 EVT RVEVT = TLI.getValueType(RV->getType()); 2137 if (!RVEVT.isSimple()) return false; 2138 MVT RVVT = RVEVT.getSimpleVT(); 2139 MVT DestVT = VA.getValVT(); 2140 // Special handling for extended integers. 2141 if (RVVT != DestVT) { 2142 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) 2143 return false; 2144 2145 assert(DestVT == MVT::i32 && "ARM should always ext to i32"); 2146 2147 // Perform extension if flagged as either zext or sext. Otherwise, do 2148 // nothing. 2149 if (Outs[0].Flags.isZExt() || Outs[0].Flags.isSExt()) { 2150 SrcReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, Outs[0].Flags.isZExt()); 2151 if (SrcReg == 0) return false; 2152 } 2153 } 2154 2155 // Make the copy. 2156 unsigned DstReg = VA.getLocReg(); 2157 const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg); 2158 // Avoid a cross-class copy. This is very unlikely. 2159 if (!SrcRC->contains(DstReg)) 2160 return false; 2161 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 2162 DstReg).addReg(SrcReg); 2163 2164 // Add register to return instruction. 2165 RetRegs.push_back(VA.getLocReg()); 2166 } 2167 2168 unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET; 2169 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 2170 TII.get(RetOpc)); 2171 AddOptionalDefs(MIB); 2172 for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) 2173 MIB.addReg(RetRegs[i], RegState::Implicit); 2174 return true; 2175 } 2176 2177 unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) { 2178 if (UseReg) 2179 return isThumb2 ? ARM::tBLXr : ARM::BLX; 2180 else 2181 return isThumb2 ? ARM::tBL : ARM::BL; 2182 } 2183 2184 unsigned ARMFastISel::getLibcallReg(const Twine &Name) { 2185 GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false, 2186 GlobalValue::ExternalLinkage, 0, Name); 2187 EVT LCREVT = TLI.getValueType(GV->getType()); 2188 if (!LCREVT.isSimple()) return 0; 2189 return ARMMaterializeGV(GV, LCREVT.getSimpleVT()); 2190 } 2191 2192 // A quick function that will emit a call for a named libcall in F with the 2193 // vector of passed arguments for the Instruction in I. We can assume that we 2194 // can emit a call for any libcall we can produce. This is an abridged version 2195 // of the full call infrastructure since we won't need to worry about things 2196 // like computed function pointers or strange arguments at call sites. 2197 // TODO: Try to unify this and the normal call bits for ARM, then try to unify 2198 // with X86. 2199 bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { 2200 CallingConv::ID CC = TLI.getLibcallCallingConv(Call); 2201 2202 // Handle *simple* calls for now. 2203 Type *RetTy = I->getType(); 2204 MVT RetVT; 2205 if (RetTy->isVoidTy()) 2206 RetVT = MVT::isVoid; 2207 else if (!isTypeLegal(RetTy, RetVT)) 2208 return false; 2209 2210 // Can't handle non-double multi-reg retvals. 2211 if (RetVT != MVT::isVoid && RetVT != MVT::i32) { 2212 SmallVector<CCValAssign, 16> RVLocs; 2213 CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context); 2214 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, false)); 2215 if (RVLocs.size() >= 2 && RetVT != MVT::f64) 2216 return false; 2217 } 2218 2219 // Set up the argument vectors. 2220 SmallVector<Value*, 8> Args; 2221 SmallVector<unsigned, 8> ArgRegs; 2222 SmallVector<MVT, 8> ArgVTs; 2223 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 2224 Args.reserve(I->getNumOperands()); 2225 ArgRegs.reserve(I->getNumOperands()); 2226 ArgVTs.reserve(I->getNumOperands()); 2227 ArgFlags.reserve(I->getNumOperands()); 2228 for (unsigned i = 0; i < I->getNumOperands(); ++i) { 2229 Value *Op = I->getOperand(i); 2230 unsigned Arg = getRegForValue(Op); 2231 if (Arg == 0) return false; 2232 2233 Type *ArgTy = Op->getType(); 2234 MVT ArgVT; 2235 if (!isTypeLegal(ArgTy, ArgVT)) return false; 2236 2237 ISD::ArgFlagsTy Flags; 2238 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); 2239 Flags.setOrigAlign(OriginalAlignment); 2240 2241 Args.push_back(Op); 2242 ArgRegs.push_back(Arg); 2243 ArgVTs.push_back(ArgVT); 2244 ArgFlags.push_back(Flags); 2245 } 2246 2247 // Handle the arguments now that we've gotten them. 2248 SmallVector<unsigned, 4> RegArgs; 2249 unsigned NumBytes; 2250 if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, 2251 RegArgs, CC, NumBytes, false)) 2252 return false; 2253 2254 unsigned CalleeReg = 0; 2255 if (EnableARMLongCalls) { 2256 CalleeReg = getLibcallReg(TLI.getLibcallName(Call)); 2257 if (CalleeReg == 0) return false; 2258 } 2259 2260 // Issue the call. 2261 unsigned CallOpc = ARMSelectCallOp(EnableARMLongCalls); 2262 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, 2263 DL, TII.get(CallOpc)); 2264 // BL / BLX don't take a predicate, but tBL / tBLX do. 2265 if (isThumb2) 2266 AddDefaultPred(MIB); 2267 if (EnableARMLongCalls) 2268 MIB.addReg(CalleeReg); 2269 else 2270 MIB.addExternalSymbol(TLI.getLibcallName(Call)); 2271 2272 // Add implicit physical register uses to the call. 2273 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 2274 MIB.addReg(RegArgs[i], RegState::Implicit); 2275 2276 // Add a register mask with the call-preserved registers. 2277 // Proper defs for return values will be added by setPhysRegsDeadExcept(). 2278 MIB.addRegMask(TRI.getCallPreservedMask(CC)); 2279 2280 // Finish off the call including any return values. 2281 SmallVector<unsigned, 4> UsedRegs; 2282 if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, false)) return false; 2283 2284 // Set all unused physreg defs as dead. 2285 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); 2286 2287 return true; 2288 } 2289 2290 bool ARMFastISel::SelectCall(const Instruction *I, 2291 const char *IntrMemName = 0) { 2292 const CallInst *CI = cast<CallInst>(I); 2293 const Value *Callee = CI->getCalledValue(); 2294 2295 // Can't handle inline asm. 2296 if (isa<InlineAsm>(Callee)) return false; 2297 2298 // Allow SelectionDAG isel to handle tail calls. 2299 if (CI->isTailCall()) return false; 2300 2301 // Check the calling convention. 2302 ImmutableCallSite CS(CI); 2303 CallingConv::ID CC = CS.getCallingConv(); 2304 2305 // TODO: Avoid some calling conventions? 2306 2307 PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); 2308 FunctionType *FTy = cast<FunctionType>(PT->getElementType()); 2309 bool isVarArg = FTy->isVarArg(); 2310 2311 // Handle *simple* calls for now. 2312 Type *RetTy = I->getType(); 2313 MVT RetVT; 2314 if (RetTy->isVoidTy()) 2315 RetVT = MVT::isVoid; 2316 else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 && 2317 RetVT != MVT::i8 && RetVT != MVT::i1) 2318 return false; 2319 2320 // Can't handle non-double multi-reg retvals. 2321 if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 && 2322 RetVT != MVT::i16 && RetVT != MVT::i32) { 2323 SmallVector<CCValAssign, 16> RVLocs; 2324 CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context); 2325 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg)); 2326 if (RVLocs.size() >= 2 && RetVT != MVT::f64) 2327 return false; 2328 } 2329 2330 // Set up the argument vectors. 2331 SmallVector<Value*, 8> Args; 2332 SmallVector<unsigned, 8> ArgRegs; 2333 SmallVector<MVT, 8> ArgVTs; 2334 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 2335 unsigned arg_size = CS.arg_size(); 2336 Args.reserve(arg_size); 2337 ArgRegs.reserve(arg_size); 2338 ArgVTs.reserve(arg_size); 2339 ArgFlags.reserve(arg_size); 2340 for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); 2341 i != e; ++i) { 2342 // If we're lowering a memory intrinsic instead of a regular call, skip the 2343 // last two arguments, which shouldn't be passed to the underlying function. 2344 if (IntrMemName && e-i <= 2) 2345 break; 2346 2347 ISD::ArgFlagsTy Flags; 2348 unsigned AttrInd = i - CS.arg_begin() + 1; 2349 if (CS.paramHasAttr(AttrInd, Attribute::SExt)) 2350 Flags.setSExt(); 2351 if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) 2352 Flags.setZExt(); 2353 2354 // FIXME: Only handle *easy* calls for now. 2355 if (CS.paramHasAttr(AttrInd, Attribute::InReg) || 2356 CS.paramHasAttr(AttrInd, Attribute::StructRet) || 2357 CS.paramHasAttr(AttrInd, Attribute::Nest) || 2358 CS.paramHasAttr(AttrInd, Attribute::ByVal)) 2359 return false; 2360 2361 Type *ArgTy = (*i)->getType(); 2362 MVT ArgVT; 2363 if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 && 2364 ArgVT != MVT::i1) 2365 return false; 2366 2367 unsigned Arg = getRegForValue(*i); 2368 if (Arg == 0) 2369 return false; 2370 2371 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); 2372 Flags.setOrigAlign(OriginalAlignment); 2373 2374 Args.push_back(*i); 2375 ArgRegs.push_back(Arg); 2376 ArgVTs.push_back(ArgVT); 2377 ArgFlags.push_back(Flags); 2378 } 2379 2380 // Handle the arguments now that we've gotten them. 2381 SmallVector<unsigned, 4> RegArgs; 2382 unsigned NumBytes; 2383 if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, 2384 RegArgs, CC, NumBytes, isVarArg)) 2385 return false; 2386 2387 bool UseReg = false; 2388 const GlobalValue *GV = dyn_cast<GlobalValue>(Callee); 2389 if (!GV || EnableARMLongCalls) UseReg = true; 2390 2391 unsigned CalleeReg = 0; 2392 if (UseReg) { 2393 if (IntrMemName) 2394 CalleeReg = getLibcallReg(IntrMemName); 2395 else 2396 CalleeReg = getRegForValue(Callee); 2397 2398 if (CalleeReg == 0) return false; 2399 } 2400 2401 // Issue the call. 2402 unsigned CallOpc = ARMSelectCallOp(UseReg); 2403 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, 2404 DL, TII.get(CallOpc)); 2405 2406 // ARM calls don't take a predicate, but tBL / tBLX do. 2407 if(isThumb2) 2408 AddDefaultPred(MIB); 2409 if (UseReg) 2410 MIB.addReg(CalleeReg); 2411 else if (!IntrMemName) 2412 MIB.addGlobalAddress(GV, 0, 0); 2413 else 2414 MIB.addExternalSymbol(IntrMemName, 0); 2415 2416 // Add implicit physical register uses to the call. 2417 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 2418 MIB.addReg(RegArgs[i], RegState::Implicit); 2419 2420 // Add a register mask with the call-preserved registers. 2421 // Proper defs for return values will be added by setPhysRegsDeadExcept(). 2422 MIB.addRegMask(TRI.getCallPreservedMask(CC)); 2423 2424 // Finish off the call including any return values. 2425 SmallVector<unsigned, 4> UsedRegs; 2426 if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg)) 2427 return false; 2428 2429 // Set all unused physreg defs as dead. 2430 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); 2431 2432 return true; 2433 } 2434 2435 bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) { 2436 return Len <= 16; 2437 } 2438 2439 bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, 2440 uint64_t Len, unsigned Alignment) { 2441 // Make sure we don't bloat code by inlining very large memcpy's. 2442 if (!ARMIsMemCpySmall(Len)) 2443 return false; 2444 2445 while (Len) { 2446 MVT VT; 2447 if (!Alignment || Alignment >= 4) { 2448 if (Len >= 4) 2449 VT = MVT::i32; 2450 else if (Len >= 2) 2451 VT = MVT::i16; 2452 else { 2453 assert (Len == 1 && "Expected a length of 1!"); 2454 VT = MVT::i8; 2455 } 2456 } else { 2457 // Bound based on alignment. 2458 if (Len >= 2 && Alignment == 2) 2459 VT = MVT::i16; 2460 else { 2461 VT = MVT::i8; 2462 } 2463 } 2464 2465 bool RV; 2466 unsigned ResultReg; 2467 RV = ARMEmitLoad(VT, ResultReg, Src); 2468 assert (RV == true && "Should be able to handle this load."); 2469 RV = ARMEmitStore(VT, ResultReg, Dest); 2470 assert (RV == true && "Should be able to handle this store."); 2471 (void)RV; 2472 2473 unsigned Size = VT.getSizeInBits()/8; 2474 Len -= Size; 2475 Dest.Offset += Size; 2476 Src.Offset += Size; 2477 } 2478 2479 return true; 2480 } 2481 2482 bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { 2483 // FIXME: Handle more intrinsics. 2484 switch (I.getIntrinsicID()) { 2485 default: return false; 2486 case Intrinsic::frameaddress: { 2487 MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo(); 2488 MFI->setFrameAddressIsTaken(true); 2489 2490 unsigned LdrOpc; 2491 const TargetRegisterClass *RC; 2492 if (isThumb2) { 2493 LdrOpc = ARM::t2LDRi12; 2494 RC = (const TargetRegisterClass*)&ARM::tGPRRegClass; 2495 } else { 2496 LdrOpc = ARM::LDRi12; 2497 RC = (const TargetRegisterClass*)&ARM::GPRRegClass; 2498 } 2499 2500 const ARMBaseRegisterInfo *RegInfo = 2501 static_cast<const ARMBaseRegisterInfo*>(TM.getRegisterInfo()); 2502 unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); 2503 unsigned SrcReg = FramePtr; 2504 2505 // Recursively load frame address 2506 // ldr r0 [fp] 2507 // ldr r0 [r0] 2508 // ldr r0 [r0] 2509 // ... 2510 unsigned DestReg; 2511 unsigned Depth = cast<ConstantInt>(I.getOperand(0))->getZExtValue(); 2512 while (Depth--) { 2513 DestReg = createResultReg(RC); 2514 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 2515 TII.get(LdrOpc), DestReg) 2516 .addReg(SrcReg).addImm(0)); 2517 SrcReg = DestReg; 2518 } 2519 UpdateValueMap(&I, SrcReg); 2520 return true; 2521 } 2522 case Intrinsic::memcpy: 2523 case Intrinsic::memmove: { 2524 const MemTransferInst &MTI = cast<MemTransferInst>(I); 2525 // Don't handle volatile. 2526 if (MTI.isVolatile()) 2527 return false; 2528 2529 // Disable inlining for memmove before calls to ComputeAddress. Otherwise, 2530 // we would emit dead code because we don't currently handle memmoves. 2531 bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy); 2532 if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) { 2533 // Small memcpy's are common enough that we want to do them without a call 2534 // if possible. 2535 uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue(); 2536 if (ARMIsMemCpySmall(Len)) { 2537 Address Dest, Src; 2538 if (!ARMComputeAddress(MTI.getRawDest(), Dest) || 2539 !ARMComputeAddress(MTI.getRawSource(), Src)) 2540 return false; 2541 unsigned Alignment = MTI.getAlignment(); 2542 if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment)) 2543 return true; 2544 } 2545 } 2546 2547 if (!MTI.getLength()->getType()->isIntegerTy(32)) 2548 return false; 2549 2550 if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255) 2551 return false; 2552 2553 const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove"; 2554 return SelectCall(&I, IntrMemName); 2555 } 2556 case Intrinsic::memset: { 2557 const MemSetInst &MSI = cast<MemSetInst>(I); 2558 // Don't handle volatile. 2559 if (MSI.isVolatile()) 2560 return false; 2561 2562 if (!MSI.getLength()->getType()->isIntegerTy(32)) 2563 return false; 2564 2565 if (MSI.getDestAddressSpace() > 255) 2566 return false; 2567 2568 return SelectCall(&I, "memset"); 2569 } 2570 case Intrinsic::trap: { 2571 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get( 2572 Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP)); 2573 return true; 2574 } 2575 } 2576 } 2577 2578 bool ARMFastISel::SelectTrunc(const Instruction *I) { 2579 // The high bits for a type smaller than the register size are assumed to be 2580 // undefined. 2581 Value *Op = I->getOperand(0); 2582 2583 EVT SrcVT, DestVT; 2584 SrcVT = TLI.getValueType(Op->getType(), true); 2585 DestVT = TLI.getValueType(I->getType(), true); 2586 2587 if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8) 2588 return false; 2589 if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1) 2590 return false; 2591 2592 unsigned SrcReg = getRegForValue(Op); 2593 if (!SrcReg) return false; 2594 2595 // Because the high bits are undefined, a truncate doesn't generate 2596 // any code. 2597 UpdateValueMap(I, SrcReg); 2598 return true; 2599 } 2600 2601 unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, 2602 bool isZExt) { 2603 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8) 2604 return 0; 2605 2606 unsigned Opc; 2607 bool isBoolZext = false; 2608 const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32); 2609 switch (SrcVT.SimpleTy) { 2610 default: return 0; 2611 case MVT::i16: 2612 if (!Subtarget->hasV6Ops()) return 0; 2613 RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; 2614 if (isZExt) 2615 Opc = isThumb2 ? ARM::t2UXTH : ARM::UXTH; 2616 else 2617 Opc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; 2618 break; 2619 case MVT::i8: 2620 if (!Subtarget->hasV6Ops()) return 0; 2621 RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; 2622 if (isZExt) 2623 Opc = isThumb2 ? ARM::t2UXTB : ARM::UXTB; 2624 else 2625 Opc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; 2626 break; 2627 case MVT::i1: 2628 if (isZExt) { 2629 RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass; 2630 Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri; 2631 isBoolZext = true; 2632 break; 2633 } 2634 return 0; 2635 } 2636 2637 unsigned ResultReg = createResultReg(RC); 2638 MachineInstrBuilder MIB; 2639 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) 2640 .addReg(SrcReg); 2641 if (isBoolZext) 2642 MIB.addImm(1); 2643 else 2644 MIB.addImm(0); 2645 AddOptionalDefs(MIB); 2646 return ResultReg; 2647 } 2648 2649 bool ARMFastISel::SelectIntExt(const Instruction *I) { 2650 // On ARM, in general, integer casts don't involve legal types; this code 2651 // handles promotable integers. 2652 Type *DestTy = I->getType(); 2653 Value *Src = I->getOperand(0); 2654 Type *SrcTy = Src->getType(); 2655 2656 bool isZExt = isa<ZExtInst>(I); 2657 unsigned SrcReg = getRegForValue(Src); 2658 if (!SrcReg) return false; 2659 2660 EVT SrcEVT, DestEVT; 2661 SrcEVT = TLI.getValueType(SrcTy, true); 2662 DestEVT = TLI.getValueType(DestTy, true); 2663 if (!SrcEVT.isSimple()) return false; 2664 if (!DestEVT.isSimple()) return false; 2665 2666 MVT SrcVT = SrcEVT.getSimpleVT(); 2667 MVT DestVT = DestEVT.getSimpleVT(); 2668 unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt); 2669 if (ResultReg == 0) return false; 2670 UpdateValueMap(I, ResultReg); 2671 return true; 2672 } 2673 2674 bool ARMFastISel::SelectShift(const Instruction *I, 2675 ARM_AM::ShiftOpc ShiftTy) { 2676 // We handle thumb2 mode by target independent selector 2677 // or SelectionDAG ISel. 2678 if (isThumb2) 2679 return false; 2680 2681 // Only handle i32 now. 2682 EVT DestVT = TLI.getValueType(I->getType(), true); 2683 if (DestVT != MVT::i32) 2684 return false; 2685 2686 unsigned Opc = ARM::MOVsr; 2687 unsigned ShiftImm; 2688 Value *Src2Value = I->getOperand(1); 2689 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Src2Value)) { 2690 ShiftImm = CI->getZExtValue(); 2691 2692 // Fall back to selection DAG isel if the shift amount 2693 // is zero or greater than the width of the value type. 2694 if (ShiftImm == 0 || ShiftImm >=32) 2695 return false; 2696 2697 Opc = ARM::MOVsi; 2698 } 2699 2700 Value *Src1Value = I->getOperand(0); 2701 unsigned Reg1 = getRegForValue(Src1Value); 2702 if (Reg1 == 0) return false; 2703 2704 unsigned Reg2 = 0; 2705 if (Opc == ARM::MOVsr) { 2706 Reg2 = getRegForValue(Src2Value); 2707 if (Reg2 == 0) return false; 2708 } 2709 2710 unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32)); 2711 if(ResultReg == 0) return false; 2712 2713 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 2714 TII.get(Opc), ResultReg) 2715 .addReg(Reg1); 2716 2717 if (Opc == ARM::MOVsi) 2718 MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, ShiftImm)); 2719 else if (Opc == ARM::MOVsr) { 2720 MIB.addReg(Reg2); 2721 MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, 0)); 2722 } 2723 2724 AddOptionalDefs(MIB); 2725 UpdateValueMap(I, ResultReg); 2726 return true; 2727 } 2728 2729 // TODO: SoftFP support. 2730 bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { 2731 2732 switch (I->getOpcode()) { 2733 case Instruction::Load: 2734 return SelectLoad(I); 2735 case Instruction::Store: 2736 return SelectStore(I); 2737 case Instruction::Br: 2738 return SelectBranch(I); 2739 case Instruction::IndirectBr: 2740 return SelectIndirectBr(I); 2741 case Instruction::ICmp: 2742 case Instruction::FCmp: 2743 return SelectCmp(I); 2744 case Instruction::FPExt: 2745 return SelectFPExt(I); 2746 case Instruction::FPTrunc: 2747 return SelectFPTrunc(I); 2748 case Instruction::SIToFP: 2749 return SelectIToFP(I, /*isSigned*/ true); 2750 case Instruction::UIToFP: 2751 return SelectIToFP(I, /*isSigned*/ false); 2752 case Instruction::FPToSI: 2753 return SelectFPToI(I, /*isSigned*/ true); 2754 case Instruction::FPToUI: 2755 return SelectFPToI(I, /*isSigned*/ false); 2756 case Instruction::Add: 2757 return SelectBinaryIntOp(I, ISD::ADD); 2758 case Instruction::Or: 2759 return SelectBinaryIntOp(I, ISD::OR); 2760 case Instruction::Sub: 2761 return SelectBinaryIntOp(I, ISD::SUB); 2762 case Instruction::FAdd: 2763 return SelectBinaryFPOp(I, ISD::FADD); 2764 case Instruction::FSub: 2765 return SelectBinaryFPOp(I, ISD::FSUB); 2766 case Instruction::FMul: 2767 return SelectBinaryFPOp(I, ISD::FMUL); 2768 case Instruction::SDiv: 2769 return SelectDiv(I, /*isSigned*/ true); 2770 case Instruction::UDiv: 2771 return SelectDiv(I, /*isSigned*/ false); 2772 case Instruction::SRem: 2773 return SelectRem(I, /*isSigned*/ true); 2774 case Instruction::URem: 2775 return SelectRem(I, /*isSigned*/ false); 2776 case Instruction::Call: 2777 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) 2778 return SelectIntrinsicCall(*II); 2779 return SelectCall(I); 2780 case Instruction::Select: 2781 return SelectSelect(I); 2782 case Instruction::Ret: 2783 return SelectRet(I); 2784 case Instruction::Trunc: 2785 return SelectTrunc(I); 2786 case Instruction::ZExt: 2787 case Instruction::SExt: 2788 return SelectIntExt(I); 2789 case Instruction::Shl: 2790 return SelectShift(I, ARM_AM::lsl); 2791 case Instruction::LShr: 2792 return SelectShift(I, ARM_AM::lsr); 2793 case Instruction::AShr: 2794 return SelectShift(I, ARM_AM::asr); 2795 default: break; 2796 } 2797 return false; 2798 } 2799 2800 /// TryToFoldLoad - The specified machine instr operand is a vreg, and that 2801 /// vreg is being provided by the specified load instruction. If possible, 2802 /// try to fold the load as an operand to the instruction, returning true if 2803 /// successful. 2804 bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, 2805 const LoadInst *LI) { 2806 // Verify we have a legal type before going any further. 2807 MVT VT; 2808 if (!isLoadTypeLegal(LI->getType(), VT)) 2809 return false; 2810 2811 // Combine load followed by zero- or sign-extend. 2812 // ldrb r1, [r0] ldrb r1, [r0] 2813 // uxtb r2, r1 => 2814 // mov r3, r2 mov r3, r1 2815 bool isZExt = true; 2816 switch(MI->getOpcode()) { 2817 default: return false; 2818 case ARM::SXTH: 2819 case ARM::t2SXTH: 2820 isZExt = false; 2821 case ARM::UXTH: 2822 case ARM::t2UXTH: 2823 if (VT != MVT::i16) 2824 return false; 2825 break; 2826 case ARM::SXTB: 2827 case ARM::t2SXTB: 2828 isZExt = false; 2829 case ARM::UXTB: 2830 case ARM::t2UXTB: 2831 if (VT != MVT::i8) 2832 return false; 2833 break; 2834 } 2835 // See if we can handle this address. 2836 Address Addr; 2837 if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false; 2838 2839 unsigned ResultReg = MI->getOperand(0).getReg(); 2840 if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false)) 2841 return false; 2842 MI->eraseFromParent(); 2843 return true; 2844 } 2845 2846 unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, 2847 unsigned Align, MVT VT) { 2848 bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); 2849 ARMConstantPoolConstant *CPV = 2850 ARMConstantPoolConstant::Create(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); 2851 unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); 2852 2853 unsigned Opc; 2854 unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT)); 2855 // Load value. 2856 if (isThumb2) { 2857 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 2858 TII.get(ARM::t2LDRpci), DestReg1) 2859 .addConstantPoolIndex(Idx)); 2860 Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs; 2861 } else { 2862 // The extra immediate is for addrmode2. 2863 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, 2864 DL, TII.get(ARM::LDRcp), DestReg1) 2865 .addConstantPoolIndex(Idx).addImm(0)); 2866 Opc = UseGOTOFF ? ARM::ADDrr : ARM::LDRrs; 2867 } 2868 2869 unsigned GlobalBaseReg = AFI->getGlobalBaseReg(); 2870 if (GlobalBaseReg == 0) { 2871 GlobalBaseReg = MRI.createVirtualRegister(TLI.getRegClassFor(VT)); 2872 AFI->setGlobalBaseReg(GlobalBaseReg); 2873 } 2874 2875 unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT)); 2876 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, 2877 DL, TII.get(Opc), DestReg2) 2878 .addReg(DestReg1) 2879 .addReg(GlobalBaseReg); 2880 if (!UseGOTOFF) 2881 MIB.addImm(0); 2882 AddOptionalDefs(MIB); 2883 2884 return DestReg2; 2885 } 2886 2887 bool ARMFastISel::FastLowerArguments() { 2888 if (!FuncInfo.CanLowerReturn) 2889 return false; 2890 2891 const Function *F = FuncInfo.Fn; 2892 if (F->isVarArg()) 2893 return false; 2894 2895 CallingConv::ID CC = F->getCallingConv(); 2896 switch (CC) { 2897 default: 2898 return false; 2899 case CallingConv::Fast: 2900 case CallingConv::C: 2901 case CallingConv::ARM_AAPCS_VFP: 2902 case CallingConv::ARM_AAPCS: 2903 case CallingConv::ARM_APCS: 2904 break; 2905 } 2906 2907 // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments 2908 // which are passed in r0 - r3. 2909 unsigned Idx = 1; 2910 for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); 2911 I != E; ++I, ++Idx) { 2912 if (Idx > 4) 2913 return false; 2914 2915 if (F->getAttributes().hasAttribute(Idx, Attribute::InReg) || 2916 F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || 2917 F->getAttributes().hasAttribute(Idx, Attribute::ByVal)) 2918 return false; 2919 2920 Type *ArgTy = I->getType(); 2921 if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) 2922 return false; 2923 2924 EVT ArgVT = TLI.getValueType(ArgTy); 2925 if (!ArgVT.isSimple()) return false; 2926 switch (ArgVT.getSimpleVT().SimpleTy) { 2927 case MVT::i8: 2928 case MVT::i16: 2929 case MVT::i32: 2930 break; 2931 default: 2932 return false; 2933 } 2934 } 2935 2936 2937 static const uint16_t GPRArgRegs[] = { 2938 ARM::R0, ARM::R1, ARM::R2, ARM::R3 2939 }; 2940 2941 const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32); 2942 Idx = 0; 2943 for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); 2944 I != E; ++I, ++Idx) { 2945 if (I->use_empty()) 2946 continue; 2947 unsigned SrcReg = GPRArgRegs[Idx]; 2948 unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); 2949 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. 2950 // Without this, EmitLiveInCopies may eliminate the livein if its only 2951 // use is a bitcast (which isn't turned into an instruction). 2952 unsigned ResultReg = createResultReg(RC); 2953 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 2954 ResultReg).addReg(DstReg, getKillRegState(true)); 2955 UpdateValueMap(I, ResultReg); 2956 } 2957 2958 return true; 2959 } 2960 2961 namespace llvm { 2962 FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo, 2963 const TargetLibraryInfo *libInfo) { 2964 // Completely untested on non-iOS. 2965 const TargetMachine &TM = funcInfo.MF->getTarget(); 2966 2967 // Darwin and thumb1 only for now. 2968 const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>(); 2969 if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only()) 2970 return new ARMFastISel(funcInfo, libInfo); 2971 return 0; 2972 } 2973 } 2974