1 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the X86-specific support for the FastISel class. Much 11 // of the target-specific code is generated by tablegen in the file 12 // X86GenFastISel.inc, which is #included here. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "X86.h" 17 #include "X86CallingConv.h" 18 #include "X86InstrBuilder.h" 19 #include "X86InstrInfo.h" 20 #include "X86MachineFunctionInfo.h" 21 #include "X86RegisterInfo.h" 22 #include "X86Subtarget.h" 23 #include "X86TargetMachine.h" 24 #include "llvm/Analysis/BranchProbabilityInfo.h" 25 #include "llvm/CodeGen/Analysis.h" 26 #include "llvm/CodeGen/FastISel.h" 27 #include "llvm/CodeGen/FunctionLoweringInfo.h" 28 #include "llvm/CodeGen/MachineConstantPool.h" 29 #include "llvm/CodeGen/MachineFrameInfo.h" 30 #include "llvm/CodeGen/MachineRegisterInfo.h" 31 #include "llvm/IR/CallSite.h" 32 #include "llvm/IR/CallingConv.h" 33 #include "llvm/IR/DerivedTypes.h" 34 #include "llvm/IR/GetElementPtrTypeIterator.h" 35 #include "llvm/IR/GlobalAlias.h" 36 #include "llvm/IR/GlobalVariable.h" 37 #include "llvm/IR/Instructions.h" 38 #include "llvm/IR/IntrinsicInst.h" 39 #include "llvm/IR/Operator.h" 40 #include "llvm/Support/ErrorHandling.h" 41 #include "llvm/Target/TargetOptions.h" 42 using namespace llvm; 43 44 namespace { 45 46 class X86FastISel final : public FastISel { 47 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 48 /// make the right decision when generating code for different targets. 49 const X86Subtarget *Subtarget; 50 51 /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 52 /// floating point ops. 53 /// When SSE is available, use it for f32 operations. 54 /// When SSE2 is available, use it for f64 operations. 55 bool X86ScalarSSEf64; 56 bool X86ScalarSSEf32; 57 58 public: 59 explicit X86FastISel(FunctionLoweringInfo &funcInfo, 60 const TargetLibraryInfo *libInfo) 61 : FastISel(funcInfo, libInfo) { 62 Subtarget = &TM.getSubtarget<X86Subtarget>(); 63 X86ScalarSSEf64 = Subtarget->hasSSE2(); 64 X86ScalarSSEf32 = Subtarget->hasSSE1(); 65 } 66 67 bool TargetSelectInstruction(const Instruction *I) override; 68 69 /// \brief The specified machine instr operand is a vreg, and that 70 /// vreg is being provided by the specified load instruction. If possible, 71 /// try to fold the load as an operand to the instruction, returning true if 72 /// possible. 73 bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, 74 const LoadInst *LI) override; 75 76 bool FastLowerArguments() override; 77 78 #include "X86GenFastISel.inc" 79 80 private: 81 bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT); 82 83 bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, MachineMemOperand *MMO, 84 unsigned &ResultReg); 85 86 bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM, 87 MachineMemOperand *MMO = nullptr, bool Aligned = false); 88 bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, 89 const X86AddressMode &AM, 90 MachineMemOperand *MMO = nullptr, bool Aligned = false); 91 92 bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, 93 unsigned &ResultReg); 94 95 bool X86SelectAddress(const Value *V, X86AddressMode &AM); 96 bool X86SelectCallAddress(const Value *V, X86AddressMode &AM); 97 98 bool X86SelectLoad(const Instruction *I); 99 100 bool X86SelectStore(const Instruction *I); 101 102 bool X86SelectRet(const Instruction *I); 103 104 bool X86SelectCmp(const Instruction *I); 105 106 bool X86SelectZExt(const Instruction *I); 107 108 bool X86SelectBranch(const Instruction *I); 109 110 bool X86SelectShift(const Instruction *I); 111 112 bool X86SelectDivRem(const Instruction *I); 113 114 bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I); 115 116 bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I); 117 118 bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I); 119 120 bool X86SelectSelect(const Instruction *I); 121 122 bool X86SelectTrunc(const Instruction *I); 123 124 bool X86SelectFPExt(const Instruction *I); 125 bool X86SelectFPTrunc(const Instruction *I); 126 127 bool X86VisitIntrinsicCall(const IntrinsicInst &I); 128 bool X86SelectCall(const Instruction *I); 129 130 bool DoSelectCall(const Instruction *I, const char *MemIntName); 131 132 const X86InstrInfo *getInstrInfo() const { 133 return getTargetMachine()->getInstrInfo(); 134 } 135 const X86TargetMachine *getTargetMachine() const { 136 return static_cast<const X86TargetMachine *>(&TM); 137 } 138 139 bool handleConstantAddresses(const Value *V, X86AddressMode &AM); 140 141 unsigned TargetMaterializeConstant(const Constant *C) override; 142 143 unsigned TargetMaterializeAlloca(const AllocaInst *C) override; 144 145 unsigned TargetMaterializeFloatZero(const ConstantFP *CF) override; 146 147 /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is 148 /// computed in an SSE register, not on the X87 floating point stack. 149 bool isScalarFPTypeInSSEReg(EVT VT) const { 150 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 151 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 152 } 153 154 bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false); 155 156 bool IsMemcpySmall(uint64_t Len); 157 158 bool TryEmitSmallMemcpy(X86AddressMode DestAM, 159 X86AddressMode SrcAM, uint64_t Len); 160 161 bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, 162 const Value *Cond); 163 }; 164 165 } // end anonymous namespace. 166 167 static CmpInst::Predicate optimizeCmpPredicate(const CmpInst *CI) { 168 // If both operands are the same, then try to optimize or fold the cmp. 169 CmpInst::Predicate Predicate = CI->getPredicate(); 170 if (CI->getOperand(0) != CI->getOperand(1)) 171 return Predicate; 172 173 switch (Predicate) { 174 default: llvm_unreachable("Invalid predicate!"); 175 case CmpInst::FCMP_FALSE: Predicate = CmpInst::FCMP_FALSE; break; 176 case CmpInst::FCMP_OEQ: Predicate = CmpInst::FCMP_ORD; break; 177 case CmpInst::FCMP_OGT: Predicate = CmpInst::FCMP_FALSE; break; 178 case CmpInst::FCMP_OGE: Predicate = CmpInst::FCMP_ORD; break; 179 case CmpInst::FCMP_OLT: Predicate = CmpInst::FCMP_FALSE; break; 180 case CmpInst::FCMP_OLE: Predicate = CmpInst::FCMP_ORD; break; 181 case CmpInst::FCMP_ONE: Predicate = CmpInst::FCMP_FALSE; break; 182 case CmpInst::FCMP_ORD: Predicate = CmpInst::FCMP_ORD; break; 183 case CmpInst::FCMP_UNO: Predicate = CmpInst::FCMP_UNO; break; 184 case CmpInst::FCMP_UEQ: Predicate = CmpInst::FCMP_TRUE; break; 185 case CmpInst::FCMP_UGT: Predicate = CmpInst::FCMP_UNO; break; 186 case CmpInst::FCMP_UGE: Predicate = CmpInst::FCMP_TRUE; break; 187 case CmpInst::FCMP_ULT: Predicate = CmpInst::FCMP_UNO; break; 188 case CmpInst::FCMP_ULE: Predicate = CmpInst::FCMP_TRUE; break; 189 case CmpInst::FCMP_UNE: Predicate = CmpInst::FCMP_UNO; break; 190 case CmpInst::FCMP_TRUE: Predicate = CmpInst::FCMP_TRUE; break; 191 192 case CmpInst::ICMP_EQ: Predicate = CmpInst::FCMP_TRUE; break; 193 case CmpInst::ICMP_NE: Predicate = CmpInst::FCMP_FALSE; break; 194 case CmpInst::ICMP_UGT: Predicate = CmpInst::FCMP_FALSE; break; 195 case CmpInst::ICMP_UGE: Predicate = CmpInst::FCMP_TRUE; break; 196 case CmpInst::ICMP_ULT: Predicate = CmpInst::FCMP_FALSE; break; 197 case CmpInst::ICMP_ULE: Predicate = CmpInst::FCMP_TRUE; break; 198 case CmpInst::ICMP_SGT: Predicate = CmpInst::FCMP_FALSE; break; 199 case CmpInst::ICMP_SGE: Predicate = CmpInst::FCMP_TRUE; break; 200 case CmpInst::ICMP_SLT: Predicate = CmpInst::FCMP_FALSE; break; 201 case CmpInst::ICMP_SLE: Predicate = CmpInst::FCMP_TRUE; break; 202 } 203 204 return Predicate; 205 } 206 207 static std::pair<X86::CondCode, bool> 208 getX86ConditionCode(CmpInst::Predicate Predicate) { 209 X86::CondCode CC = X86::COND_INVALID; 210 bool NeedSwap = false; 211 switch (Predicate) { 212 default: break; 213 // Floating-point Predicates 214 case CmpInst::FCMP_UEQ: CC = X86::COND_E; break; 215 case CmpInst::FCMP_OLT: NeedSwap = true; // fall-through 216 case CmpInst::FCMP_OGT: CC = X86::COND_A; break; 217 case CmpInst::FCMP_OLE: NeedSwap = true; // fall-through 218 case CmpInst::FCMP_OGE: CC = X86::COND_AE; break; 219 case CmpInst::FCMP_UGT: NeedSwap = true; // fall-through 220 case CmpInst::FCMP_ULT: CC = X86::COND_B; break; 221 case CmpInst::FCMP_UGE: NeedSwap = true; // fall-through 222 case CmpInst::FCMP_ULE: CC = X86::COND_BE; break; 223 case CmpInst::FCMP_ONE: CC = X86::COND_NE; break; 224 case CmpInst::FCMP_UNO: CC = X86::COND_P; break; 225 case CmpInst::FCMP_ORD: CC = X86::COND_NP; break; 226 case CmpInst::FCMP_OEQ: // fall-through 227 case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break; 228 229 // Integer Predicates 230 case CmpInst::ICMP_EQ: CC = X86::COND_E; break; 231 case CmpInst::ICMP_NE: CC = X86::COND_NE; break; 232 case CmpInst::ICMP_UGT: CC = X86::COND_A; break; 233 case CmpInst::ICMP_UGE: CC = X86::COND_AE; break; 234 case CmpInst::ICMP_ULT: CC = X86::COND_B; break; 235 case CmpInst::ICMP_ULE: CC = X86::COND_BE; break; 236 case CmpInst::ICMP_SGT: CC = X86::COND_G; break; 237 case CmpInst::ICMP_SGE: CC = X86::COND_GE; break; 238 case CmpInst::ICMP_SLT: CC = X86::COND_L; break; 239 case CmpInst::ICMP_SLE: CC = X86::COND_LE; break; 240 } 241 242 return std::make_pair(CC, NeedSwap); 243 } 244 245 static std::pair<unsigned, bool> 246 getX86SSEConditionCode(CmpInst::Predicate Predicate) { 247 unsigned CC; 248 bool NeedSwap = false; 249 250 // SSE Condition code mapping: 251 // 0 - EQ 252 // 1 - LT 253 // 2 - LE 254 // 3 - UNORD 255 // 4 - NEQ 256 // 5 - NLT 257 // 6 - NLE 258 // 7 - ORD 259 switch (Predicate) { 260 default: llvm_unreachable("Unexpected predicate"); 261 case CmpInst::FCMP_OEQ: CC = 0; break; 262 case CmpInst::FCMP_OGT: NeedSwap = true; // fall-through 263 case CmpInst::FCMP_OLT: CC = 1; break; 264 case CmpInst::FCMP_OGE: NeedSwap = true; // fall-through 265 case CmpInst::FCMP_OLE: CC = 2; break; 266 case CmpInst::FCMP_UNO: CC = 3; break; 267 case CmpInst::FCMP_UNE: CC = 4; break; 268 case CmpInst::FCMP_ULE: NeedSwap = true; // fall-through 269 case CmpInst::FCMP_UGE: CC = 5; break; 270 case CmpInst::FCMP_ULT: NeedSwap = true; // fall-through 271 case CmpInst::FCMP_UGT: CC = 6; break; 272 case CmpInst::FCMP_ORD: CC = 7; break; 273 case CmpInst::FCMP_UEQ: 274 case CmpInst::FCMP_ONE: CC = 8; break; 275 } 276 277 return std::make_pair(CC, NeedSwap); 278 } 279 280 /// \brief Check if it is possible to fold the condition from the XALU intrinsic 281 /// into the user. The condition code will only be updated on success. 282 bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, 283 const Value *Cond) { 284 if (!isa<ExtractValueInst>(Cond)) 285 return false; 286 287 const auto *EV = cast<ExtractValueInst>(Cond); 288 if (!isa<IntrinsicInst>(EV->getAggregateOperand())) 289 return false; 290 291 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); 292 MVT RetVT; 293 const Function *Callee = II->getCalledFunction(); 294 Type *RetTy = 295 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); 296 if (!isTypeLegal(RetTy, RetVT)) 297 return false; 298 299 if (RetVT != MVT::i32 && RetVT != MVT::i64) 300 return false; 301 302 X86::CondCode TmpCC; 303 switch (II->getIntrinsicID()) { 304 default: return false; 305 case Intrinsic::sadd_with_overflow: 306 case Intrinsic::ssub_with_overflow: 307 case Intrinsic::smul_with_overflow: 308 case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break; 309 case Intrinsic::uadd_with_overflow: 310 case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break; 311 } 312 313 // Check if both instructions are in the same basic block. 314 if (II->getParent() != I->getParent()) 315 return false; 316 317 // Make sure nothing is in the way 318 BasicBlock::const_iterator Start = I; 319 BasicBlock::const_iterator End = II; 320 for (auto Itr = std::prev(Start); Itr != End; --Itr) { 321 // We only expect extractvalue instructions between the intrinsic and the 322 // instruction to be selected. 323 if (!isa<ExtractValueInst>(Itr)) 324 return false; 325 326 // Check that the extractvalue operand comes from the intrinsic. 327 const auto *EVI = cast<ExtractValueInst>(Itr); 328 if (EVI->getAggregateOperand() != II) 329 return false; 330 } 331 332 CC = TmpCC; 333 return true; 334 } 335 336 bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { 337 EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true); 338 if (evt == MVT::Other || !evt.isSimple()) 339 // Unhandled type. Halt "fast" selection and bail. 340 return false; 341 342 VT = evt.getSimpleVT(); 343 // For now, require SSE/SSE2 for performing floating-point operations, 344 // since x87 requires additional work. 345 if (VT == MVT::f64 && !X86ScalarSSEf64) 346 return false; 347 if (VT == MVT::f32 && !X86ScalarSSEf32) 348 return false; 349 // Similarly, no f80 support yet. 350 if (VT == MVT::f80) 351 return false; 352 // We only handle legal types. For example, on x86-32 the instruction 353 // selector contains all of the 64-bit instructions from x86-64, 354 // under the assumption that i64 won't be used if the target doesn't 355 // support it. 356 return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT); 357 } 358 359 #include "X86GenCallingConv.inc" 360 361 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT. 362 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV. 363 /// Return true and the result register by reference if it is possible. 364 bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, 365 MachineMemOperand *MMO, unsigned &ResultReg) { 366 // Get opcode and regclass of the output for the given load instruction. 367 unsigned Opc = 0; 368 const TargetRegisterClass *RC = nullptr; 369 switch (VT.getSimpleVT().SimpleTy) { 370 default: return false; 371 case MVT::i1: 372 case MVT::i8: 373 Opc = X86::MOV8rm; 374 RC = &X86::GR8RegClass; 375 break; 376 case MVT::i16: 377 Opc = X86::MOV16rm; 378 RC = &X86::GR16RegClass; 379 break; 380 case MVT::i32: 381 Opc = X86::MOV32rm; 382 RC = &X86::GR32RegClass; 383 break; 384 case MVT::i64: 385 // Must be in x86-64 mode. 386 Opc = X86::MOV64rm; 387 RC = &X86::GR64RegClass; 388 break; 389 case MVT::f32: 390 if (X86ScalarSSEf32) { 391 Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm; 392 RC = &X86::FR32RegClass; 393 } else { 394 Opc = X86::LD_Fp32m; 395 RC = &X86::RFP32RegClass; 396 } 397 break; 398 case MVT::f64: 399 if (X86ScalarSSEf64) { 400 Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm; 401 RC = &X86::FR64RegClass; 402 } else { 403 Opc = X86::LD_Fp64m; 404 RC = &X86::RFP64RegClass; 405 } 406 break; 407 case MVT::f80: 408 // No f80 support yet. 409 return false; 410 } 411 412 ResultReg = createResultReg(RC); 413 MachineInstrBuilder MIB = 414 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); 415 addFullAddress(MIB, AM); 416 if (MMO) 417 MIB->addMemOperand(*FuncInfo.MF, MMO); 418 return true; 419 } 420 421 /// X86FastEmitStore - Emit a machine instruction to store a value Val of 422 /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr 423 /// and a displacement offset, or a GlobalAddress, 424 /// i.e. V. Return true if it is possible. 425 bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, 426 const X86AddressMode &AM, 427 MachineMemOperand *MMO, bool Aligned) { 428 // Get opcode and regclass of the output for the given store instruction. 429 unsigned Opc = 0; 430 switch (VT.getSimpleVT().SimpleTy) { 431 case MVT::f80: // No f80 support yet. 432 default: return false; 433 case MVT::i1: { 434 // Mask out all but lowest bit. 435 unsigned AndResult = createResultReg(&X86::GR8RegClass); 436 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 437 TII.get(X86::AND8ri), AndResult) 438 .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1); 439 ValReg = AndResult; 440 } 441 // FALLTHROUGH, handling i1 as i8. 442 case MVT::i8: Opc = X86::MOV8mr; break; 443 case MVT::i16: Opc = X86::MOV16mr; break; 444 case MVT::i32: Opc = X86::MOV32mr; break; 445 case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode. 446 case MVT::f32: 447 Opc = X86ScalarSSEf32 ? 448 (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m; 449 break; 450 case MVT::f64: 451 Opc = X86ScalarSSEf64 ? 452 (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m; 453 break; 454 case MVT::v4f32: 455 if (Aligned) 456 Opc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr; 457 else 458 Opc = Subtarget->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr; 459 break; 460 case MVT::v2f64: 461 if (Aligned) 462 Opc = Subtarget->hasAVX() ? X86::VMOVAPDmr : X86::MOVAPDmr; 463 else 464 Opc = Subtarget->hasAVX() ? X86::VMOVUPDmr : X86::MOVUPDmr; 465 break; 466 case MVT::v4i32: 467 case MVT::v2i64: 468 case MVT::v8i16: 469 case MVT::v16i8: 470 if (Aligned) 471 Opc = Subtarget->hasAVX() ? X86::VMOVDQAmr : X86::MOVDQAmr; 472 else 473 Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr; 474 break; 475 } 476 477 MachineInstrBuilder MIB = 478 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)); 479 addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill)); 480 if (MMO) 481 MIB->addMemOperand(*FuncInfo.MF, MMO); 482 483 return true; 484 } 485 486 bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, 487 const X86AddressMode &AM, 488 MachineMemOperand *MMO, bool Aligned) { 489 // Handle 'null' like i32/i64 0. 490 if (isa<ConstantPointerNull>(Val)) 491 Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext())); 492 493 // If this is a store of a simple constant, fold the constant into the store. 494 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { 495 unsigned Opc = 0; 496 bool Signed = true; 497 switch (VT.getSimpleVT().SimpleTy) { 498 default: break; 499 case MVT::i1: Signed = false; // FALLTHROUGH to handle as i8. 500 case MVT::i8: Opc = X86::MOV8mi; break; 501 case MVT::i16: Opc = X86::MOV16mi; break; 502 case MVT::i32: Opc = X86::MOV32mi; break; 503 case MVT::i64: 504 // Must be a 32-bit sign extended value. 505 if (isInt<32>(CI->getSExtValue())) 506 Opc = X86::MOV64mi32; 507 break; 508 } 509 510 if (Opc) { 511 MachineInstrBuilder MIB = 512 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)); 513 addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue() 514 : CI->getZExtValue()); 515 if (MMO) 516 MIB->addMemOperand(*FuncInfo.MF, MMO); 517 return true; 518 } 519 } 520 521 unsigned ValReg = getRegForValue(Val); 522 if (ValReg == 0) 523 return false; 524 525 bool ValKill = hasTrivialKill(Val); 526 return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned); 527 } 528 529 /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of 530 /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g. 531 /// ISD::SIGN_EXTEND). 532 bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, 533 unsigned Src, EVT SrcVT, 534 unsigned &ResultReg) { 535 unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, 536 Src, /*TODO: Kill=*/false); 537 if (RR == 0) 538 return false; 539 540 ResultReg = RR; 541 return true; 542 } 543 544 bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) { 545 // Handle constant address. 546 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 547 // Can't handle alternate code models yet. 548 if (TM.getCodeModel() != CodeModel::Small) 549 return false; 550 551 // Can't handle TLS yet. 552 if (GV->isThreadLocal()) 553 return false; 554 555 // RIP-relative addresses can't have additional register operands, so if 556 // we've already folded stuff into the addressing mode, just force the 557 // global value into its own register, which we can use as the basereg. 558 if (!Subtarget->isPICStyleRIPRel() || 559 (AM.Base.Reg == 0 && AM.IndexReg == 0)) { 560 // Okay, we've committed to selecting this global. Set up the address. 561 AM.GV = GV; 562 563 // Allow the subtarget to classify the global. 564 unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM); 565 566 // If this reference is relative to the pic base, set it now. 567 if (isGlobalRelativeToPICBase(GVFlags)) { 568 // FIXME: How do we know Base.Reg is free?? 569 AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 570 } 571 572 // Unless the ABI requires an extra load, return a direct reference to 573 // the global. 574 if (!isGlobalStubReference(GVFlags)) { 575 if (Subtarget->isPICStyleRIPRel()) { 576 // Use rip-relative addressing if we can. Above we verified that the 577 // base and index registers are unused. 578 assert(AM.Base.Reg == 0 && AM.IndexReg == 0); 579 AM.Base.Reg = X86::RIP; 580 } 581 AM.GVOpFlags = GVFlags; 582 return true; 583 } 584 585 // Ok, we need to do a load from a stub. If we've already loaded from 586 // this stub, reuse the loaded pointer, otherwise emit the load now. 587 DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V); 588 unsigned LoadReg; 589 if (I != LocalValueMap.end() && I->second != 0) { 590 LoadReg = I->second; 591 } else { 592 // Issue load from stub. 593 unsigned Opc = 0; 594 const TargetRegisterClass *RC = nullptr; 595 X86AddressMode StubAM; 596 StubAM.Base.Reg = AM.Base.Reg; 597 StubAM.GV = GV; 598 StubAM.GVOpFlags = GVFlags; 599 600 // Prepare for inserting code in the local-value area. 601 SavePoint SaveInsertPt = enterLocalValueArea(); 602 603 if (TLI.getPointerTy() == MVT::i64) { 604 Opc = X86::MOV64rm; 605 RC = &X86::GR64RegClass; 606 607 if (Subtarget->isPICStyleRIPRel()) 608 StubAM.Base.Reg = X86::RIP; 609 } else { 610 Opc = X86::MOV32rm; 611 RC = &X86::GR32RegClass; 612 } 613 614 LoadReg = createResultReg(RC); 615 MachineInstrBuilder LoadMI = 616 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg); 617 addFullAddress(LoadMI, StubAM); 618 619 // Ok, back to normal mode. 620 leaveLocalValueArea(SaveInsertPt); 621 622 // Prevent loading GV stub multiple times in same MBB. 623 LocalValueMap[V] = LoadReg; 624 } 625 626 // Now construct the final address. Note that the Disp, Scale, 627 // and Index values may already be set here. 628 AM.Base.Reg = LoadReg; 629 AM.GV = nullptr; 630 return true; 631 } 632 } 633 634 // If all else fails, try to materialize the value in a register. 635 if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { 636 if (AM.Base.Reg == 0) { 637 AM.Base.Reg = getRegForValue(V); 638 return AM.Base.Reg != 0; 639 } 640 if (AM.IndexReg == 0) { 641 assert(AM.Scale == 1 && "Scale with no index!"); 642 AM.IndexReg = getRegForValue(V); 643 return AM.IndexReg != 0; 644 } 645 } 646 647 return false; 648 } 649 650 /// X86SelectAddress - Attempt to fill in an address from the given value. 651 /// 652 bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { 653 SmallVector<const Value *, 32> GEPs; 654 redo_gep: 655 const User *U = nullptr; 656 unsigned Opcode = Instruction::UserOp1; 657 if (const Instruction *I = dyn_cast<Instruction>(V)) { 658 // Don't walk into other basic blocks; it's possible we haven't 659 // visited them yet, so the instructions may not yet be assigned 660 // virtual registers. 661 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) || 662 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 663 Opcode = I->getOpcode(); 664 U = I; 665 } 666 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { 667 Opcode = C->getOpcode(); 668 U = C; 669 } 670 671 if (PointerType *Ty = dyn_cast<PointerType>(V->getType())) 672 if (Ty->getAddressSpace() > 255) 673 // Fast instruction selection doesn't support the special 674 // address spaces. 675 return false; 676 677 switch (Opcode) { 678 default: break; 679 case Instruction::BitCast: 680 // Look past bitcasts. 681 return X86SelectAddress(U->getOperand(0), AM); 682 683 case Instruction::IntToPtr: 684 // Look past no-op inttoptrs. 685 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 686 return X86SelectAddress(U->getOperand(0), AM); 687 break; 688 689 case Instruction::PtrToInt: 690 // Look past no-op ptrtoints. 691 if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) 692 return X86SelectAddress(U->getOperand(0), AM); 693 break; 694 695 case Instruction::Alloca: { 696 // Do static allocas. 697 const AllocaInst *A = cast<AllocaInst>(V); 698 DenseMap<const AllocaInst*, int>::iterator SI = 699 FuncInfo.StaticAllocaMap.find(A); 700 if (SI != FuncInfo.StaticAllocaMap.end()) { 701 AM.BaseType = X86AddressMode::FrameIndexBase; 702 AM.Base.FrameIndex = SI->second; 703 return true; 704 } 705 break; 706 } 707 708 case Instruction::Add: { 709 // Adds of constants are common and easy enough. 710 if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { 711 uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue(); 712 // They have to fit in the 32-bit signed displacement field though. 713 if (isInt<32>(Disp)) { 714 AM.Disp = (uint32_t)Disp; 715 return X86SelectAddress(U->getOperand(0), AM); 716 } 717 } 718 break; 719 } 720 721 case Instruction::GetElementPtr: { 722 X86AddressMode SavedAM = AM; 723 724 // Pattern-match simple GEPs. 725 uint64_t Disp = (int32_t)AM.Disp; 726 unsigned IndexReg = AM.IndexReg; 727 unsigned Scale = AM.Scale; 728 gep_type_iterator GTI = gep_type_begin(U); 729 // Iterate through the indices, folding what we can. Constants can be 730 // folded, and one dynamic index can be handled, if the scale is supported. 731 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); 732 i != e; ++i, ++GTI) { 733 const Value *Op = *i; 734 if (StructType *STy = dyn_cast<StructType>(*GTI)) { 735 const StructLayout *SL = DL.getStructLayout(STy); 736 Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue()); 737 continue; 738 } 739 740 // A array/variable index is always of the form i*S where S is the 741 // constant scale size. See if we can push the scale into immediates. 742 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); 743 for (;;) { 744 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 745 // Constant-offset addressing. 746 Disp += CI->getSExtValue() * S; 747 break; 748 } 749 if (canFoldAddIntoGEP(U, Op)) { 750 // A compatible add with a constant operand. Fold the constant. 751 ConstantInt *CI = 752 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 753 Disp += CI->getSExtValue() * S; 754 // Iterate on the other operand. 755 Op = cast<AddOperator>(Op)->getOperand(0); 756 continue; 757 } 758 if (IndexReg == 0 && 759 (!AM.GV || !Subtarget->isPICStyleRIPRel()) && 760 (S == 1 || S == 2 || S == 4 || S == 8)) { 761 // Scaled-index addressing. 762 Scale = S; 763 IndexReg = getRegForGEPIndex(Op).first; 764 if (IndexReg == 0) 765 return false; 766 break; 767 } 768 // Unsupported. 769 goto unsupported_gep; 770 } 771 } 772 773 // Check for displacement overflow. 774 if (!isInt<32>(Disp)) 775 break; 776 777 AM.IndexReg = IndexReg; 778 AM.Scale = Scale; 779 AM.Disp = (uint32_t)Disp; 780 GEPs.push_back(V); 781 782 if (const GetElementPtrInst *GEP = 783 dyn_cast<GetElementPtrInst>(U->getOperand(0))) { 784 // Ok, the GEP indices were covered by constant-offset and scaled-index 785 // addressing. Update the address state and move on to examining the base. 786 V = GEP; 787 goto redo_gep; 788 } else if (X86SelectAddress(U->getOperand(0), AM)) { 789 return true; 790 } 791 792 // If we couldn't merge the gep value into this addr mode, revert back to 793 // our address and just match the value instead of completely failing. 794 AM = SavedAM; 795 796 for (SmallVectorImpl<const Value *>::reverse_iterator 797 I = GEPs.rbegin(), E = GEPs.rend(); I != E; ++I) 798 if (handleConstantAddresses(*I, AM)) 799 return true; 800 801 return false; 802 unsupported_gep: 803 // Ok, the GEP indices weren't all covered. 804 break; 805 } 806 } 807 808 return handleConstantAddresses(V, AM); 809 } 810 811 /// X86SelectCallAddress - Attempt to fill in an address from the given value. 812 /// 813 bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { 814 const User *U = nullptr; 815 unsigned Opcode = Instruction::UserOp1; 816 const Instruction *I = dyn_cast<Instruction>(V); 817 // Record if the value is defined in the same basic block. 818 // 819 // This information is crucial to know whether or not folding an 820 // operand is valid. 821 // Indeed, FastISel generates or reuses a virtual register for all 822 // operands of all instructions it selects. Obviously, the definition and 823 // its uses must use the same virtual register otherwise the produced 824 // code is incorrect. 825 // Before instruction selection, FunctionLoweringInfo::set sets the virtual 826 // registers for values that are alive across basic blocks. This ensures 827 // that the values are consistently set between across basic block, even 828 // if different instruction selection mechanisms are used (e.g., a mix of 829 // SDISel and FastISel). 830 // For values local to a basic block, the instruction selection process 831 // generates these virtual registers with whatever method is appropriate 832 // for its needs. In particular, FastISel and SDISel do not share the way 833 // local virtual registers are set. 834 // Therefore, this is impossible (or at least unsafe) to share values 835 // between basic blocks unless they use the same instruction selection 836 // method, which is not guarantee for X86. 837 // Moreover, things like hasOneUse could not be used accurately, if we 838 // allow to reference values across basic blocks whereas they are not 839 // alive across basic blocks initially. 840 bool InMBB = true; 841 if (I) { 842 Opcode = I->getOpcode(); 843 U = I; 844 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); 845 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { 846 Opcode = C->getOpcode(); 847 U = C; 848 } 849 850 switch (Opcode) { 851 default: break; 852 case Instruction::BitCast: 853 // Look past bitcasts if its operand is in the same BB. 854 if (InMBB) 855 return X86SelectCallAddress(U->getOperand(0), AM); 856 break; 857 858 case Instruction::IntToPtr: 859 // Look past no-op inttoptrs if its operand is in the same BB. 860 if (InMBB && 861 TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 862 return X86SelectCallAddress(U->getOperand(0), AM); 863 break; 864 865 case Instruction::PtrToInt: 866 // Look past no-op ptrtoints if its operand is in the same BB. 867 if (InMBB && 868 TLI.getValueType(U->getType()) == TLI.getPointerTy()) 869 return X86SelectCallAddress(U->getOperand(0), AM); 870 break; 871 } 872 873 // Handle constant address. 874 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 875 // Can't handle alternate code models yet. 876 if (TM.getCodeModel() != CodeModel::Small) 877 return false; 878 879 // RIP-relative addresses can't have additional register operands. 880 if (Subtarget->isPICStyleRIPRel() && 881 (AM.Base.Reg != 0 || AM.IndexReg != 0)) 882 return false; 883 884 // Can't handle DLL Import. 885 if (GV->hasDLLImportStorageClass()) 886 return false; 887 888 // Can't handle TLS. 889 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) 890 if (GVar->isThreadLocal()) 891 return false; 892 893 // Okay, we've committed to selecting this global. Set up the basic address. 894 AM.GV = GV; 895 896 // No ABI requires an extra load for anything other than DLLImport, which 897 // we rejected above. Return a direct reference to the global. 898 if (Subtarget->isPICStyleRIPRel()) { 899 // Use rip-relative addressing if we can. Above we verified that the 900 // base and index registers are unused. 901 assert(AM.Base.Reg == 0 && AM.IndexReg == 0); 902 AM.Base.Reg = X86::RIP; 903 } else if (Subtarget->isPICStyleStubPIC()) { 904 AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET; 905 } else if (Subtarget->isPICStyleGOT()) { 906 AM.GVOpFlags = X86II::MO_GOTOFF; 907 } 908 909 return true; 910 } 911 912 // If all else fails, try to materialize the value in a register. 913 if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { 914 if (AM.Base.Reg == 0) { 915 AM.Base.Reg = getRegForValue(V); 916 return AM.Base.Reg != 0; 917 } 918 if (AM.IndexReg == 0) { 919 assert(AM.Scale == 1 && "Scale with no index!"); 920 AM.IndexReg = getRegForValue(V); 921 return AM.IndexReg != 0; 922 } 923 } 924 925 return false; 926 } 927 928 929 /// X86SelectStore - Select and emit code to implement store instructions. 930 bool X86FastISel::X86SelectStore(const Instruction *I) { 931 // Atomic stores need special handling. 932 const StoreInst *S = cast<StoreInst>(I); 933 934 if (S->isAtomic()) 935 return false; 936 937 const Value *Val = S->getValueOperand(); 938 const Value *Ptr = S->getPointerOperand(); 939 940 MVT VT; 941 if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true)) 942 return false; 943 944 unsigned Alignment = S->getAlignment(); 945 unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType()); 946 if (Alignment == 0) // Ensure that codegen never sees alignment 0 947 Alignment = ABIAlignment; 948 bool Aligned = Alignment >= ABIAlignment; 949 950 X86AddressMode AM; 951 if (!X86SelectAddress(Ptr, AM)) 952 return false; 953 954 return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned); 955 } 956 957 /// X86SelectRet - Select and emit code to implement ret instructions. 958 bool X86FastISel::X86SelectRet(const Instruction *I) { 959 const ReturnInst *Ret = cast<ReturnInst>(I); 960 const Function &F = *I->getParent()->getParent(); 961 const X86MachineFunctionInfo *X86MFInfo = 962 FuncInfo.MF->getInfo<X86MachineFunctionInfo>(); 963 964 if (!FuncInfo.CanLowerReturn) 965 return false; 966 967 CallingConv::ID CC = F.getCallingConv(); 968 if (CC != CallingConv::C && 969 CC != CallingConv::Fast && 970 CC != CallingConv::X86_FastCall && 971 CC != CallingConv::X86_64_SysV) 972 return false; 973 974 if (Subtarget->isCallingConvWin64(CC)) 975 return false; 976 977 // Don't handle popping bytes on return for now. 978 if (X86MFInfo->getBytesToPopOnReturn() != 0) 979 return false; 980 981 // fastcc with -tailcallopt is intended to provide a guaranteed 982 // tail call optimization. Fastisel doesn't know how to do that. 983 if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) 984 return false; 985 986 // Let SDISel handle vararg functions. 987 if (F.isVarArg()) 988 return false; 989 990 // Build a list of return value registers. 991 SmallVector<unsigned, 4> RetRegs; 992 993 if (Ret->getNumOperands() > 0) { 994 SmallVector<ISD::OutputArg, 4> Outs; 995 GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); 996 997 // Analyze operands of the call, assigning locations to each operand. 998 SmallVector<CCValAssign, 16> ValLocs; 999 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, 1000 I->getContext()); 1001 CCInfo.AnalyzeReturn(Outs, RetCC_X86); 1002 1003 const Value *RV = Ret->getOperand(0); 1004 unsigned Reg = getRegForValue(RV); 1005 if (Reg == 0) 1006 return false; 1007 1008 // Only handle a single return value for now. 1009 if (ValLocs.size() != 1) 1010 return false; 1011 1012 CCValAssign &VA = ValLocs[0]; 1013 1014 // Don't bother handling odd stuff for now. 1015 if (VA.getLocInfo() != CCValAssign::Full) 1016 return false; 1017 // Only handle register returns for now. 1018 if (!VA.isRegLoc()) 1019 return false; 1020 1021 // The calling-convention tables for x87 returns don't tell 1022 // the whole story. 1023 if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) 1024 return false; 1025 1026 unsigned SrcReg = Reg + VA.getValNo(); 1027 EVT SrcVT = TLI.getValueType(RV->getType()); 1028 EVT DstVT = VA.getValVT(); 1029 // Special handling for extended integers. 1030 if (SrcVT != DstVT) { 1031 if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16) 1032 return false; 1033 1034 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) 1035 return false; 1036 1037 assert(DstVT == MVT::i32 && "X86 should always ext to i32"); 1038 1039 if (SrcVT == MVT::i1) { 1040 if (Outs[0].Flags.isSExt()) 1041 return false; 1042 SrcReg = FastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false); 1043 SrcVT = MVT::i8; 1044 } 1045 unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND : 1046 ISD::SIGN_EXTEND; 1047 SrcReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op, 1048 SrcReg, /*TODO: Kill=*/false); 1049 } 1050 1051 // Make the copy. 1052 unsigned DstReg = VA.getLocReg(); 1053 const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg); 1054 // Avoid a cross-class copy. This is very unlikely. 1055 if (!SrcRC->contains(DstReg)) 1056 return false; 1057 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), 1058 DstReg).addReg(SrcReg); 1059 1060 // Add register to return instruction. 1061 RetRegs.push_back(VA.getLocReg()); 1062 } 1063 1064 // The x86-64 ABI for returning structs by value requires that we copy 1065 // the sret argument into %rax for the return. We saved the argument into 1066 // a virtual register in the entry block, so now we copy the value out 1067 // and into %rax. We also do the same with %eax for Win32. 1068 if (F.hasStructRetAttr() && 1069 (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) { 1070 unsigned Reg = X86MFInfo->getSRetReturnReg(); 1071 assert(Reg && 1072 "SRetReturnReg should have been set in LowerFormalArguments()!"); 1073 unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; 1074 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), 1075 RetReg).addReg(Reg); 1076 RetRegs.push_back(RetReg); 1077 } 1078 1079 // Now emit the RET. 1080 MachineInstrBuilder MIB = 1081 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL)); 1082 for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) 1083 MIB.addReg(RetRegs[i], RegState::Implicit); 1084 return true; 1085 } 1086 1087 /// X86SelectLoad - Select and emit code to implement load instructions. 1088 /// 1089 bool X86FastISel::X86SelectLoad(const Instruction *I) { 1090 const LoadInst *LI = cast<LoadInst>(I); 1091 1092 // Atomic loads need special handling. 1093 if (LI->isAtomic()) 1094 return false; 1095 1096 MVT VT; 1097 if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true)) 1098 return false; 1099 1100 const Value *Ptr = LI->getPointerOperand(); 1101 1102 X86AddressMode AM; 1103 if (!X86SelectAddress(Ptr, AM)) 1104 return false; 1105 1106 unsigned ResultReg = 0; 1107 if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg)) 1108 return false; 1109 1110 UpdateValueMap(I, ResultReg); 1111 return true; 1112 } 1113 1114 static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) { 1115 bool HasAVX = Subtarget->hasAVX(); 1116 bool X86ScalarSSEf32 = Subtarget->hasSSE1(); 1117 bool X86ScalarSSEf64 = Subtarget->hasSSE2(); 1118 1119 switch (VT.getSimpleVT().SimpleTy) { 1120 default: return 0; 1121 case MVT::i8: return X86::CMP8rr; 1122 case MVT::i16: return X86::CMP16rr; 1123 case MVT::i32: return X86::CMP32rr; 1124 case MVT::i64: return X86::CMP64rr; 1125 case MVT::f32: 1126 return X86ScalarSSEf32 ? (HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr) : 0; 1127 case MVT::f64: 1128 return X86ScalarSSEf64 ? (HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr) : 0; 1129 } 1130 } 1131 1132 /// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS 1133 /// of the comparison, return an opcode that works for the compare (e.g. 1134 /// CMP32ri) otherwise return 0. 1135 static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) { 1136 switch (VT.getSimpleVT().SimpleTy) { 1137 // Otherwise, we can't fold the immediate into this comparison. 1138 default: return 0; 1139 case MVT::i8: return X86::CMP8ri; 1140 case MVT::i16: return X86::CMP16ri; 1141 case MVT::i32: return X86::CMP32ri; 1142 case MVT::i64: 1143 // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext 1144 // field. 1145 if ((int)RHSC->getSExtValue() == RHSC->getSExtValue()) 1146 return X86::CMP64ri32; 1147 return 0; 1148 } 1149 } 1150 1151 bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, 1152 EVT VT) { 1153 unsigned Op0Reg = getRegForValue(Op0); 1154 if (Op0Reg == 0) return false; 1155 1156 // Handle 'null' like i32/i64 0. 1157 if (isa<ConstantPointerNull>(Op1)) 1158 Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext())); 1159 1160 // We have two options: compare with register or immediate. If the RHS of 1161 // the compare is an immediate that we can fold into this compare, use 1162 // CMPri, otherwise use CMPrr. 1163 if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { 1164 if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) { 1165 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CompareImmOpc)) 1166 .addReg(Op0Reg) 1167 .addImm(Op1C->getSExtValue()); 1168 return true; 1169 } 1170 } 1171 1172 unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget); 1173 if (CompareOpc == 0) return false; 1174 1175 unsigned Op1Reg = getRegForValue(Op1); 1176 if (Op1Reg == 0) return false; 1177 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CompareOpc)) 1178 .addReg(Op0Reg) 1179 .addReg(Op1Reg); 1180 1181 return true; 1182 } 1183 1184 bool X86FastISel::X86SelectCmp(const Instruction *I) { 1185 const CmpInst *CI = cast<CmpInst>(I); 1186 1187 MVT VT; 1188 if (!isTypeLegal(I->getOperand(0)->getType(), VT)) 1189 return false; 1190 1191 // Try to optimize or fold the cmp. 1192 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 1193 unsigned ResultReg = 0; 1194 switch (Predicate) { 1195 default: break; 1196 case CmpInst::FCMP_FALSE: { 1197 ResultReg = createResultReg(&X86::GR32RegClass); 1198 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0), 1199 ResultReg); 1200 ResultReg = FastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true, 1201 X86::sub_8bit); 1202 if (!ResultReg) 1203 return false; 1204 break; 1205 } 1206 case CmpInst::FCMP_TRUE: { 1207 ResultReg = createResultReg(&X86::GR8RegClass); 1208 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri), 1209 ResultReg).addImm(1); 1210 break; 1211 } 1212 } 1213 1214 if (ResultReg) { 1215 UpdateValueMap(I, ResultReg); 1216 return true; 1217 } 1218 1219 const Value *LHS = CI->getOperand(0); 1220 const Value *RHS = CI->getOperand(1); 1221 1222 // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0. 1223 // We don't have to materialize a zero constant for this case and can just use 1224 // %x again on the RHS. 1225 if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) { 1226 const auto *RHSC = dyn_cast<ConstantFP>(RHS); 1227 if (RHSC && RHSC->isNullValue()) 1228 RHS = LHS; 1229 } 1230 1231 // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction. 1232 static unsigned SETFOpcTable[2][3] = { 1233 { X86::SETEr, X86::SETNPr, X86::AND8rr }, 1234 { X86::SETNEr, X86::SETPr, X86::OR8rr } 1235 }; 1236 unsigned *SETFOpc = nullptr; 1237 switch (Predicate) { 1238 default: break; 1239 case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break; 1240 case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break; 1241 } 1242 1243 ResultReg = createResultReg(&X86::GR8RegClass); 1244 if (SETFOpc) { 1245 if (!X86FastEmitCompare(LHS, RHS, VT)) 1246 return false; 1247 1248 unsigned FlagReg1 = createResultReg(&X86::GR8RegClass); 1249 unsigned FlagReg2 = createResultReg(&X86::GR8RegClass); 1250 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]), 1251 FlagReg1); 1252 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]), 1253 FlagReg2); 1254 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]), 1255 ResultReg).addReg(FlagReg1).addReg(FlagReg2); 1256 UpdateValueMap(I, ResultReg); 1257 return true; 1258 } 1259 1260 X86::CondCode CC; 1261 bool SwapArgs; 1262 std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate); 1263 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); 1264 unsigned Opc = X86::getSETFromCond(CC); 1265 1266 if (SwapArgs) 1267 std::swap(LHS, RHS); 1268 1269 // Emit a compare of LHS/RHS. 1270 if (!X86FastEmitCompare(LHS, RHS, VT)) 1271 return false; 1272 1273 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); 1274 UpdateValueMap(I, ResultReg); 1275 return true; 1276 } 1277 1278 bool X86FastISel::X86SelectZExt(const Instruction *I) { 1279 EVT DstVT = TLI.getValueType(I->getType()); 1280 if (!TLI.isTypeLegal(DstVT)) 1281 return false; 1282 1283 unsigned ResultReg = getRegForValue(I->getOperand(0)); 1284 if (ResultReg == 0) 1285 return false; 1286 1287 // Handle zero-extension from i1 to i8, which is common. 1288 MVT SrcVT = TLI.getSimpleValueType(I->getOperand(0)->getType()); 1289 if (SrcVT.SimpleTy == MVT::i1) { 1290 // Set the high bits to zero. 1291 ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false); 1292 SrcVT = MVT::i8; 1293 1294 if (ResultReg == 0) 1295 return false; 1296 } 1297 1298 if (DstVT == MVT::i64) { 1299 // Handle extension to 64-bits via sub-register shenanigans. 1300 unsigned MovInst; 1301 1302 switch (SrcVT.SimpleTy) { 1303 case MVT::i8: MovInst = X86::MOVZX32rr8; break; 1304 case MVT::i16: MovInst = X86::MOVZX32rr16; break; 1305 case MVT::i32: MovInst = X86::MOV32rr; break; 1306 default: llvm_unreachable("Unexpected zext to i64 source type"); 1307 } 1308 1309 unsigned Result32 = createResultReg(&X86::GR32RegClass); 1310 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32) 1311 .addReg(ResultReg); 1312 1313 ResultReg = createResultReg(&X86::GR64RegClass); 1314 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG), 1315 ResultReg) 1316 .addImm(0).addReg(Result32).addImm(X86::sub_32bit); 1317 } else if (DstVT != MVT::i8) { 1318 ResultReg = FastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND, 1319 ResultReg, /*Kill=*/true); 1320 if (ResultReg == 0) 1321 return false; 1322 } 1323 1324 UpdateValueMap(I, ResultReg); 1325 return true; 1326 } 1327 1328 1329 bool X86FastISel::X86SelectBranch(const Instruction *I) { 1330 // Unconditional branches are selected by tablegen-generated code. 1331 // Handle a conditional branch. 1332 const BranchInst *BI = cast<BranchInst>(I); 1333 MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 1334 MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 1335 1336 // Fold the common case of a conditional branch with a comparison 1337 // in the same block (values defined on other blocks may not have 1338 // initialized registers). 1339 X86::CondCode CC; 1340 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 1341 if (CI->hasOneUse() && CI->getParent() == I->getParent()) { 1342 EVT VT = TLI.getValueType(CI->getOperand(0)->getType()); 1343 1344 // Try to optimize or fold the cmp. 1345 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 1346 switch (Predicate) { 1347 default: break; 1348 case CmpInst::FCMP_FALSE: FastEmitBranch(FalseMBB, DbgLoc); return true; 1349 case CmpInst::FCMP_TRUE: FastEmitBranch(TrueMBB, DbgLoc); return true; 1350 } 1351 1352 const Value *CmpLHS = CI->getOperand(0); 1353 const Value *CmpRHS = CI->getOperand(1); 1354 1355 // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 1356 // 0.0. 1357 // We don't have to materialize a zero constant for this case and can just 1358 // use %x again on the RHS. 1359 if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) { 1360 const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS); 1361 if (CmpRHSC && CmpRHSC->isNullValue()) 1362 CmpRHS = CmpLHS; 1363 } 1364 1365 // Try to take advantage of fallthrough opportunities. 1366 if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) { 1367 std::swap(TrueMBB, FalseMBB); 1368 Predicate = CmpInst::getInversePredicate(Predicate); 1369 } 1370 1371 // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition 1372 // code check. Instead two branch instructions are required to check all 1373 // the flags. First we change the predicate to a supported condition code, 1374 // which will be the first branch. Later one we will emit the second 1375 // branch. 1376 bool NeedExtraBranch = false; 1377 switch (Predicate) { 1378 default: break; 1379 case CmpInst::FCMP_OEQ: 1380 std::swap(TrueMBB, FalseMBB); // fall-through 1381 case CmpInst::FCMP_UNE: 1382 NeedExtraBranch = true; 1383 Predicate = CmpInst::FCMP_ONE; 1384 break; 1385 } 1386 1387 bool SwapArgs; 1388 unsigned BranchOpc; 1389 std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate); 1390 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); 1391 1392 BranchOpc = X86::GetCondBranchFromCond(CC); 1393 if (SwapArgs) 1394 std::swap(CmpLHS, CmpRHS); 1395 1396 // Emit a compare of the LHS and RHS, setting the flags. 1397 if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT)) 1398 return false; 1399 1400 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc)) 1401 .addMBB(TrueMBB); 1402 1403 // X86 requires a second branch to handle UNE (and OEQ, which is mapped 1404 // to UNE above). 1405 if (NeedExtraBranch) { 1406 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JP_4)) 1407 .addMBB(TrueMBB); 1408 } 1409 1410 // Obtain the branch weight and add the TrueBB to the successor list. 1411 uint32_t BranchWeight = 0; 1412 if (FuncInfo.BPI) 1413 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), 1414 TrueMBB->getBasicBlock()); 1415 FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight); 1416 1417 // Emits an unconditional branch to the FalseBB, obtains the branch 1418 // weight, and adds it to the successor list. 1419 FastEmitBranch(FalseMBB, DbgLoc); 1420 1421 return true; 1422 } 1423 } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) { 1424 // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which 1425 // typically happen for _Bool and C++ bools. 1426 MVT SourceVT; 1427 if (TI->hasOneUse() && TI->getParent() == I->getParent() && 1428 isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) { 1429 unsigned TestOpc = 0; 1430 switch (SourceVT.SimpleTy) { 1431 default: break; 1432 case MVT::i8: TestOpc = X86::TEST8ri; break; 1433 case MVT::i16: TestOpc = X86::TEST16ri; break; 1434 case MVT::i32: TestOpc = X86::TEST32ri; break; 1435 case MVT::i64: TestOpc = X86::TEST64ri32; break; 1436 } 1437 if (TestOpc) { 1438 unsigned OpReg = getRegForValue(TI->getOperand(0)); 1439 if (OpReg == 0) return false; 1440 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc)) 1441 .addReg(OpReg).addImm(1); 1442 1443 unsigned JmpOpc = X86::JNE_4; 1444 if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) { 1445 std::swap(TrueMBB, FalseMBB); 1446 JmpOpc = X86::JE_4; 1447 } 1448 1449 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc)) 1450 .addMBB(TrueMBB); 1451 FastEmitBranch(FalseMBB, DbgLoc); 1452 uint32_t BranchWeight = 0; 1453 if (FuncInfo.BPI) 1454 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), 1455 TrueMBB->getBasicBlock()); 1456 FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight); 1457 return true; 1458 } 1459 } 1460 } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) { 1461 // Fake request the condition, otherwise the intrinsic might be completely 1462 // optimized away. 1463 unsigned TmpReg = getRegForValue(BI->getCondition()); 1464 if (TmpReg == 0) 1465 return false; 1466 1467 unsigned BranchOpc = X86::GetCondBranchFromCond(CC); 1468 1469 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc)) 1470 .addMBB(TrueMBB); 1471 FastEmitBranch(FalseMBB, DbgLoc); 1472 uint32_t BranchWeight = 0; 1473 if (FuncInfo.BPI) 1474 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), 1475 TrueMBB->getBasicBlock()); 1476 FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight); 1477 return true; 1478 } 1479 1480 // Otherwise do a clumsy setcc and re-test it. 1481 // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used 1482 // in an explicit cast, so make sure to handle that correctly. 1483 unsigned OpReg = getRegForValue(BI->getCondition()); 1484 if (OpReg == 0) return false; 1485 1486 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) 1487 .addReg(OpReg).addImm(1); 1488 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_4)) 1489 .addMBB(TrueMBB); 1490 FastEmitBranch(FalseMBB, DbgLoc); 1491 uint32_t BranchWeight = 0; 1492 if (FuncInfo.BPI) 1493 BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), 1494 TrueMBB->getBasicBlock()); 1495 FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight); 1496 return true; 1497 } 1498 1499 bool X86FastISel::X86SelectShift(const Instruction *I) { 1500 unsigned CReg = 0, OpReg = 0; 1501 const TargetRegisterClass *RC = nullptr; 1502 if (I->getType()->isIntegerTy(8)) { 1503 CReg = X86::CL; 1504 RC = &X86::GR8RegClass; 1505 switch (I->getOpcode()) { 1506 case Instruction::LShr: OpReg = X86::SHR8rCL; break; 1507 case Instruction::AShr: OpReg = X86::SAR8rCL; break; 1508 case Instruction::Shl: OpReg = X86::SHL8rCL; break; 1509 default: return false; 1510 } 1511 } else if (I->getType()->isIntegerTy(16)) { 1512 CReg = X86::CX; 1513 RC = &X86::GR16RegClass; 1514 switch (I->getOpcode()) { 1515 case Instruction::LShr: OpReg = X86::SHR16rCL; break; 1516 case Instruction::AShr: OpReg = X86::SAR16rCL; break; 1517 case Instruction::Shl: OpReg = X86::SHL16rCL; break; 1518 default: return false; 1519 } 1520 } else if (I->getType()->isIntegerTy(32)) { 1521 CReg = X86::ECX; 1522 RC = &X86::GR32RegClass; 1523 switch (I->getOpcode()) { 1524 case Instruction::LShr: OpReg = X86::SHR32rCL; break; 1525 case Instruction::AShr: OpReg = X86::SAR32rCL; break; 1526 case Instruction::Shl: OpReg = X86::SHL32rCL; break; 1527 default: return false; 1528 } 1529 } else if (I->getType()->isIntegerTy(64)) { 1530 CReg = X86::RCX; 1531 RC = &X86::GR64RegClass; 1532 switch (I->getOpcode()) { 1533 case Instruction::LShr: OpReg = X86::SHR64rCL; break; 1534 case Instruction::AShr: OpReg = X86::SAR64rCL; break; 1535 case Instruction::Shl: OpReg = X86::SHL64rCL; break; 1536 default: return false; 1537 } 1538 } else { 1539 return false; 1540 } 1541 1542 MVT VT; 1543 if (!isTypeLegal(I->getType(), VT)) 1544 return false; 1545 1546 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 1547 if (Op0Reg == 0) return false; 1548 1549 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 1550 if (Op1Reg == 0) return false; 1551 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), 1552 CReg).addReg(Op1Reg); 1553 1554 // The shift instruction uses X86::CL. If we defined a super-register 1555 // of X86::CL, emit a subreg KILL to precisely describe what we're doing here. 1556 if (CReg != X86::CL) 1557 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1558 TII.get(TargetOpcode::KILL), X86::CL) 1559 .addReg(CReg, RegState::Kill); 1560 1561 unsigned ResultReg = createResultReg(RC); 1562 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg) 1563 .addReg(Op0Reg); 1564 UpdateValueMap(I, ResultReg); 1565 return true; 1566 } 1567 1568 bool X86FastISel::X86SelectDivRem(const Instruction *I) { 1569 const static unsigned NumTypes = 4; // i8, i16, i32, i64 1570 const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem 1571 const static bool S = true; // IsSigned 1572 const static bool U = false; // !IsSigned 1573 const static unsigned Copy = TargetOpcode::COPY; 1574 // For the X86 DIV/IDIV instruction, in most cases the dividend 1575 // (numerator) must be in a specific register pair highreg:lowreg, 1576 // producing the quotient in lowreg and the remainder in highreg. 1577 // For most data types, to set up the instruction, the dividend is 1578 // copied into lowreg, and lowreg is sign-extended or zero-extended 1579 // into highreg. The exception is i8, where the dividend is defined 1580 // as a single register rather than a register pair, and we 1581 // therefore directly sign-extend or zero-extend the dividend into 1582 // lowreg, instead of copying, and ignore the highreg. 1583 const static struct DivRemEntry { 1584 // The following portion depends only on the data type. 1585 const TargetRegisterClass *RC; 1586 unsigned LowInReg; // low part of the register pair 1587 unsigned HighInReg; // high part of the register pair 1588 // The following portion depends on both the data type and the operation. 1589 struct DivRemResult { 1590 unsigned OpDivRem; // The specific DIV/IDIV opcode to use. 1591 unsigned OpSignExtend; // Opcode for sign-extending lowreg into 1592 // highreg, or copying a zero into highreg. 1593 unsigned OpCopy; // Opcode for copying dividend into lowreg, or 1594 // zero/sign-extending into lowreg for i8. 1595 unsigned DivRemResultReg; // Register containing the desired result. 1596 bool IsOpSigned; // Whether to use signed or unsigned form. 1597 } ResultTable[NumOps]; 1598 } OpTable[NumTypes] = { 1599 { &X86::GR8RegClass, X86::AX, 0, { 1600 { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv 1601 { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem 1602 { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv 1603 { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem 1604 } 1605 }, // i8 1606 { &X86::GR16RegClass, X86::AX, X86::DX, { 1607 { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv 1608 { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem 1609 { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv 1610 { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem 1611 } 1612 }, // i16 1613 { &X86::GR32RegClass, X86::EAX, X86::EDX, { 1614 { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv 1615 { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem 1616 { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv 1617 { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem 1618 } 1619 }, // i32 1620 { &X86::GR64RegClass, X86::RAX, X86::RDX, { 1621 { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv 1622 { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem 1623 { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv 1624 { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem 1625 } 1626 }, // i64 1627 }; 1628 1629 MVT VT; 1630 if (!isTypeLegal(I->getType(), VT)) 1631 return false; 1632 1633 unsigned TypeIndex, OpIndex; 1634 switch (VT.SimpleTy) { 1635 default: return false; 1636 case MVT::i8: TypeIndex = 0; break; 1637 case MVT::i16: TypeIndex = 1; break; 1638 case MVT::i32: TypeIndex = 2; break; 1639 case MVT::i64: TypeIndex = 3; 1640 if (!Subtarget->is64Bit()) 1641 return false; 1642 break; 1643 } 1644 1645 switch (I->getOpcode()) { 1646 default: llvm_unreachable("Unexpected div/rem opcode"); 1647 case Instruction::SDiv: OpIndex = 0; break; 1648 case Instruction::SRem: OpIndex = 1; break; 1649 case Instruction::UDiv: OpIndex = 2; break; 1650 case Instruction::URem: OpIndex = 3; break; 1651 } 1652 1653 const DivRemEntry &TypeEntry = OpTable[TypeIndex]; 1654 const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex]; 1655 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 1656 if (Op0Reg == 0) 1657 return false; 1658 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 1659 if (Op1Reg == 0) 1660 return false; 1661 1662 // Move op0 into low-order input register. 1663 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1664 TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg); 1665 // Zero-extend or sign-extend into high-order input register. 1666 if (OpEntry.OpSignExtend) { 1667 if (OpEntry.IsOpSigned) 1668 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1669 TII.get(OpEntry.OpSignExtend)); 1670 else { 1671 unsigned Zero32 = createResultReg(&X86::GR32RegClass); 1672 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1673 TII.get(X86::MOV32r0), Zero32); 1674 1675 // Copy the zero into the appropriate sub/super/identical physical 1676 // register. Unfortunately the operations needed are not uniform enough to 1677 // fit neatly into the table above. 1678 if (VT.SimpleTy == MVT::i16) { 1679 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1680 TII.get(Copy), TypeEntry.HighInReg) 1681 .addReg(Zero32, 0, X86::sub_16bit); 1682 } else if (VT.SimpleTy == MVT::i32) { 1683 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1684 TII.get(Copy), TypeEntry.HighInReg) 1685 .addReg(Zero32); 1686 } else if (VT.SimpleTy == MVT::i64) { 1687 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1688 TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg) 1689 .addImm(0).addReg(Zero32).addImm(X86::sub_32bit); 1690 } 1691 } 1692 } 1693 // Generate the DIV/IDIV instruction. 1694 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1695 TII.get(OpEntry.OpDivRem)).addReg(Op1Reg); 1696 // For i8 remainder, we can't reference AH directly, as we'll end 1697 // up with bogus copies like %R9B = COPY %AH. Reference AX 1698 // instead to prevent AH references in a REX instruction. 1699 // 1700 // The current assumption of the fast register allocator is that isel 1701 // won't generate explicit references to the GPR8_NOREX registers. If 1702 // the allocator and/or the backend get enhanced to be more robust in 1703 // that regard, this can be, and should be, removed. 1704 unsigned ResultReg = 0; 1705 if ((I->getOpcode() == Instruction::SRem || 1706 I->getOpcode() == Instruction::URem) && 1707 OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) { 1708 unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass); 1709 unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass); 1710 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1711 TII.get(Copy), SourceSuperReg).addReg(X86::AX); 1712 1713 // Shift AX right by 8 bits instead of using AH. 1714 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SHR16ri), 1715 ResultSuperReg).addReg(SourceSuperReg).addImm(8); 1716 1717 // Now reference the 8-bit subreg of the result. 1718 ResultReg = FastEmitInst_extractsubreg(MVT::i8, ResultSuperReg, 1719 /*Kill=*/true, X86::sub_8bit); 1720 } 1721 // Copy the result out of the physreg if we haven't already. 1722 if (!ResultReg) { 1723 ResultReg = createResultReg(TypeEntry.RC); 1724 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg) 1725 .addReg(OpEntry.DivRemResultReg); 1726 } 1727 UpdateValueMap(I, ResultReg); 1728 1729 return true; 1730 } 1731 1732 /// \brief Emit a conditional move instruction (if the are supported) to lower 1733 /// the select. 1734 bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { 1735 // Check if the subtarget supports these instructions. 1736 if (!Subtarget->hasCMov()) 1737 return false; 1738 1739 // FIXME: Add support for i8. 1740 if (RetVT < MVT::i16 || RetVT > MVT::i64) 1741 return false; 1742 1743 const Value *Cond = I->getOperand(0); 1744 const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); 1745 bool NeedTest = true; 1746 X86::CondCode CC = X86::COND_NE; 1747 1748 // Optimize conditions coming from a compare if both instructions are in the 1749 // same basic block (values defined in other basic blocks may not have 1750 // initialized registers). 1751 const auto *CI = dyn_cast<CmpInst>(Cond); 1752 if (CI && (CI->getParent() == I->getParent())) { 1753 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 1754 1755 // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction. 1756 static unsigned SETFOpcTable[2][3] = { 1757 { X86::SETNPr, X86::SETEr , X86::TEST8rr }, 1758 { X86::SETPr, X86::SETNEr, X86::OR8rr } 1759 }; 1760 unsigned *SETFOpc = nullptr; 1761 switch (Predicate) { 1762 default: break; 1763 case CmpInst::FCMP_OEQ: 1764 SETFOpc = &SETFOpcTable[0][0]; 1765 Predicate = CmpInst::ICMP_NE; 1766 break; 1767 case CmpInst::FCMP_UNE: 1768 SETFOpc = &SETFOpcTable[1][0]; 1769 Predicate = CmpInst::ICMP_NE; 1770 break; 1771 } 1772 1773 bool NeedSwap; 1774 std::tie(CC, NeedSwap) = getX86ConditionCode(Predicate); 1775 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); 1776 1777 const Value *CmpLHS = CI->getOperand(0); 1778 const Value *CmpRHS = CI->getOperand(1); 1779 if (NeedSwap) 1780 std::swap(CmpLHS, CmpRHS); 1781 1782 EVT CmpVT = TLI.getValueType(CmpLHS->getType()); 1783 // Emit a compare of the LHS and RHS, setting the flags. 1784 if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT)) 1785 return false; 1786 1787 if (SETFOpc) { 1788 unsigned FlagReg1 = createResultReg(&X86::GR8RegClass); 1789 unsigned FlagReg2 = createResultReg(&X86::GR8RegClass); 1790 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]), 1791 FlagReg1); 1792 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]), 1793 FlagReg2); 1794 auto const &II = TII.get(SETFOpc[2]); 1795 if (II.getNumDefs()) { 1796 unsigned TmpReg = createResultReg(&X86::GR8RegClass); 1797 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg) 1798 .addReg(FlagReg2).addReg(FlagReg1); 1799 } else { 1800 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 1801 .addReg(FlagReg2).addReg(FlagReg1); 1802 } 1803 } 1804 NeedTest = false; 1805 } else if (foldX86XALUIntrinsic(CC, I, Cond)) { 1806 // Fake request the condition, otherwise the intrinsic might be completely 1807 // optimized away. 1808 unsigned TmpReg = getRegForValue(Cond); 1809 if (TmpReg == 0) 1810 return false; 1811 1812 NeedTest = false; 1813 } 1814 1815 if (NeedTest) { 1816 // Selects operate on i1, however, CondReg is 8 bits width and may contain 1817 // garbage. Indeed, only the less significant bit is supposed to be 1818 // accurate. If we read more than the lsb, we may see non-zero values 1819 // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for 1820 // the select. This is achieved by performing TEST against 1. 1821 unsigned CondReg = getRegForValue(Cond); 1822 if (CondReg == 0) 1823 return false; 1824 bool CondIsKill = hasTrivialKill(Cond); 1825 1826 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) 1827 .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1); 1828 } 1829 1830 const Value *LHS = I->getOperand(1); 1831 const Value *RHS = I->getOperand(2); 1832 1833 unsigned RHSReg = getRegForValue(RHS); 1834 bool RHSIsKill = hasTrivialKill(RHS); 1835 1836 unsigned LHSReg = getRegForValue(LHS); 1837 bool LHSIsKill = hasTrivialKill(LHS); 1838 1839 if (!LHSReg || !RHSReg) 1840 return false; 1841 1842 unsigned Opc = X86::getCMovFromCond(CC, RC->getSize()); 1843 unsigned ResultReg = FastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill, 1844 LHSReg, LHSIsKill); 1845 UpdateValueMap(I, ResultReg); 1846 return true; 1847 } 1848 1849 /// \brief Emit SSE instructions to lower the select. 1850 /// 1851 /// Try to use SSE1/SSE2 instructions to simulate a select without branches. 1852 /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary 1853 /// SSE instructions are available. 1854 bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) { 1855 // Optimize conditions coming from a compare if both instructions are in the 1856 // same basic block (values defined in other basic blocks may not have 1857 // initialized registers). 1858 const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0)); 1859 if (!CI || (CI->getParent() != I->getParent())) 1860 return false; 1861 1862 if (I->getType() != CI->getOperand(0)->getType() || 1863 !((Subtarget->hasSSE1() && RetVT == MVT::f32) || 1864 (Subtarget->hasSSE2() && RetVT == MVT::f64) )) 1865 return false; 1866 1867 const Value *CmpLHS = CI->getOperand(0); 1868 const Value *CmpRHS = CI->getOperand(1); 1869 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 1870 1871 // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0. 1872 // We don't have to materialize a zero constant for this case and can just use 1873 // %x again on the RHS. 1874 if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) { 1875 const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS); 1876 if (CmpRHSC && CmpRHSC->isNullValue()) 1877 CmpRHS = CmpLHS; 1878 } 1879 1880 unsigned CC; 1881 bool NeedSwap; 1882 std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate); 1883 if (CC > 7) 1884 return false; 1885 1886 if (NeedSwap) 1887 std::swap(CmpLHS, CmpRHS); 1888 1889 static unsigned OpcTable[2][2][4] = { 1890 { { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr }, 1891 { X86::VCMPSSrr, X86::VFsANDPSrr, X86::VFsANDNPSrr, X86::VFsORPSrr } }, 1892 { { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr }, 1893 { X86::VCMPSDrr, X86::VFsANDPDrr, X86::VFsANDNPDrr, X86::VFsORPDrr } } 1894 }; 1895 1896 bool HasAVX = Subtarget->hasAVX(); 1897 unsigned *Opc = nullptr; 1898 switch (RetVT.SimpleTy) { 1899 default: return false; 1900 case MVT::f32: Opc = &OpcTable[0][HasAVX][0]; break; 1901 case MVT::f64: Opc = &OpcTable[1][HasAVX][0]; break; 1902 } 1903 1904 const Value *LHS = I->getOperand(1); 1905 const Value *RHS = I->getOperand(2); 1906 1907 unsigned LHSReg = getRegForValue(LHS); 1908 bool LHSIsKill = hasTrivialKill(LHS); 1909 1910 unsigned RHSReg = getRegForValue(RHS); 1911 bool RHSIsKill = hasTrivialKill(RHS); 1912 1913 unsigned CmpLHSReg = getRegForValue(CmpLHS); 1914 bool CmpLHSIsKill = hasTrivialKill(CmpLHS); 1915 1916 unsigned CmpRHSReg = getRegForValue(CmpRHS); 1917 bool CmpRHSIsKill = hasTrivialKill(CmpRHS); 1918 1919 if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS) 1920 return false; 1921 1922 const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); 1923 unsigned CmpReg = FastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill, 1924 CmpRHSReg, CmpRHSIsKill, CC); 1925 unsigned AndReg = FastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false, 1926 LHSReg, LHSIsKill); 1927 unsigned AndNReg = FastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true, 1928 RHSReg, RHSIsKill); 1929 unsigned ResultReg = FastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true, 1930 AndReg, /*IsKill=*/true); 1931 UpdateValueMap(I, ResultReg); 1932 return true; 1933 } 1934 1935 bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) { 1936 // These are pseudo CMOV instructions and will be later expanded into control- 1937 // flow. 1938 unsigned Opc; 1939 switch (RetVT.SimpleTy) { 1940 default: return false; 1941 case MVT::i8: Opc = X86::CMOV_GR8; break; 1942 case MVT::i16: Opc = X86::CMOV_GR16; break; 1943 case MVT::i32: Opc = X86::CMOV_GR32; break; 1944 case MVT::f32: Opc = X86::CMOV_FR32; break; 1945 case MVT::f64: Opc = X86::CMOV_FR64; break; 1946 } 1947 1948 const Value *Cond = I->getOperand(0); 1949 X86::CondCode CC = X86::COND_NE; 1950 1951 // Optimize conditions coming from a compare if both instructions are in the 1952 // same basic block (values defined in other basic blocks may not have 1953 // initialized registers). 1954 const auto *CI = dyn_cast<CmpInst>(Cond); 1955 if (CI && (CI->getParent() == I->getParent())) { 1956 bool NeedSwap; 1957 std::tie(CC, NeedSwap) = getX86ConditionCode(CI->getPredicate()); 1958 if (CC > X86::LAST_VALID_COND) 1959 return false; 1960 1961 const Value *CmpLHS = CI->getOperand(0); 1962 const Value *CmpRHS = CI->getOperand(1); 1963 1964 if (NeedSwap) 1965 std::swap(CmpLHS, CmpRHS); 1966 1967 EVT CmpVT = TLI.getValueType(CmpLHS->getType()); 1968 if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT)) 1969 return false; 1970 } else { 1971 unsigned CondReg = getRegForValue(Cond); 1972 if (CondReg == 0) 1973 return false; 1974 bool CondIsKill = hasTrivialKill(Cond); 1975 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) 1976 .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1); 1977 } 1978 1979 const Value *LHS = I->getOperand(1); 1980 const Value *RHS = I->getOperand(2); 1981 1982 unsigned LHSReg = getRegForValue(LHS); 1983 bool LHSIsKill = hasTrivialKill(LHS); 1984 1985 unsigned RHSReg = getRegForValue(RHS); 1986 bool RHSIsKill = hasTrivialKill(RHS); 1987 1988 if (!LHSReg || !RHSReg) 1989 return false; 1990 1991 const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); 1992 1993 unsigned ResultReg = 1994 FastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC); 1995 UpdateValueMap(I, ResultReg); 1996 return true; 1997 } 1998 1999 bool X86FastISel::X86SelectSelect(const Instruction *I) { 2000 MVT RetVT; 2001 if (!isTypeLegal(I->getType(), RetVT)) 2002 return false; 2003 2004 // Check if we can fold the select. 2005 if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) { 2006 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2007 const Value *Opnd = nullptr; 2008 switch (Predicate) { 2009 default: break; 2010 case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break; 2011 case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break; 2012 } 2013 // No need for a select anymore - this is an unconditional move. 2014 if (Opnd) { 2015 unsigned OpReg = getRegForValue(Opnd); 2016 if (OpReg == 0) 2017 return false; 2018 bool OpIsKill = hasTrivialKill(Opnd); 2019 const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); 2020 unsigned ResultReg = createResultReg(RC); 2021 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2022 TII.get(TargetOpcode::COPY), ResultReg) 2023 .addReg(OpReg, getKillRegState(OpIsKill)); 2024 UpdateValueMap(I, ResultReg); 2025 return true; 2026 } 2027 } 2028 2029 // First try to use real conditional move instructions. 2030 if (X86FastEmitCMoveSelect(RetVT, I)) 2031 return true; 2032 2033 // Try to use a sequence of SSE instructions to simulate a conditional move. 2034 if (X86FastEmitSSESelect(RetVT, I)) 2035 return true; 2036 2037 // Fall-back to pseudo conditional move instructions, which will be later 2038 // converted to control-flow. 2039 if (X86FastEmitPseudoSelect(RetVT, I)) 2040 return true; 2041 2042 return false; 2043 } 2044 2045 bool X86FastISel::X86SelectFPExt(const Instruction *I) { 2046 // fpext from float to double. 2047 if (X86ScalarSSEf64 && 2048 I->getType()->isDoubleTy()) { 2049 const Value *V = I->getOperand(0); 2050 if (V->getType()->isFloatTy()) { 2051 unsigned OpReg = getRegForValue(V); 2052 if (OpReg == 0) return false; 2053 unsigned ResultReg = createResultReg(&X86::FR64RegClass); 2054 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2055 TII.get(X86::CVTSS2SDrr), ResultReg) 2056 .addReg(OpReg); 2057 UpdateValueMap(I, ResultReg); 2058 return true; 2059 } 2060 } 2061 2062 return false; 2063 } 2064 2065 bool X86FastISel::X86SelectFPTrunc(const Instruction *I) { 2066 if (X86ScalarSSEf64) { 2067 if (I->getType()->isFloatTy()) { 2068 const Value *V = I->getOperand(0); 2069 if (V->getType()->isDoubleTy()) { 2070 unsigned OpReg = getRegForValue(V); 2071 if (OpReg == 0) return false; 2072 unsigned ResultReg = createResultReg(&X86::FR32RegClass); 2073 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2074 TII.get(X86::CVTSD2SSrr), ResultReg) 2075 .addReg(OpReg); 2076 UpdateValueMap(I, ResultReg); 2077 return true; 2078 } 2079 } 2080 } 2081 2082 return false; 2083 } 2084 2085 bool X86FastISel::X86SelectTrunc(const Instruction *I) { 2086 EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); 2087 EVT DstVT = TLI.getValueType(I->getType()); 2088 2089 // This code only handles truncation to byte. 2090 if (DstVT != MVT::i8 && DstVT != MVT::i1) 2091 return false; 2092 if (!TLI.isTypeLegal(SrcVT)) 2093 return false; 2094 2095 unsigned InputReg = getRegForValue(I->getOperand(0)); 2096 if (!InputReg) 2097 // Unhandled operand. Halt "fast" selection and bail. 2098 return false; 2099 2100 if (SrcVT == MVT::i8) { 2101 // Truncate from i8 to i1; no code needed. 2102 UpdateValueMap(I, InputReg); 2103 return true; 2104 } 2105 2106 if (!Subtarget->is64Bit()) { 2107 // If we're on x86-32; we can't extract an i8 from a general register. 2108 // First issue a copy to GR16_ABCD or GR32_ABCD. 2109 const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) ? 2110 (const TargetRegisterClass*)&X86::GR16_ABCDRegClass : 2111 (const TargetRegisterClass*)&X86::GR32_ABCDRegClass; 2112 unsigned CopyReg = createResultReg(CopyRC); 2113 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), 2114 CopyReg).addReg(InputReg); 2115 InputReg = CopyReg; 2116 } 2117 2118 // Issue an extract_subreg. 2119 unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8, 2120 InputReg, /*Kill=*/true, 2121 X86::sub_8bit); 2122 if (!ResultReg) 2123 return false; 2124 2125 UpdateValueMap(I, ResultReg); 2126 return true; 2127 } 2128 2129 bool X86FastISel::IsMemcpySmall(uint64_t Len) { 2130 return Len <= (Subtarget->is64Bit() ? 32 : 16); 2131 } 2132 2133 bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM, 2134 X86AddressMode SrcAM, uint64_t Len) { 2135 2136 // Make sure we don't bloat code by inlining very large memcpy's. 2137 if (!IsMemcpySmall(Len)) 2138 return false; 2139 2140 bool i64Legal = Subtarget->is64Bit(); 2141 2142 // We don't care about alignment here since we just emit integer accesses. 2143 while (Len) { 2144 MVT VT; 2145 if (Len >= 8 && i64Legal) 2146 VT = MVT::i64; 2147 else if (Len >= 4) 2148 VT = MVT::i32; 2149 else if (Len >= 2) 2150 VT = MVT::i16; 2151 else { 2152 VT = MVT::i8; 2153 } 2154 2155 unsigned Reg; 2156 bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg); 2157 RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM); 2158 assert(RV && "Failed to emit load or store??"); 2159 2160 unsigned Size = VT.getSizeInBits()/8; 2161 Len -= Size; 2162 DestAM.Disp += Size; 2163 SrcAM.Disp += Size; 2164 } 2165 2166 return true; 2167 } 2168 2169 static bool isCommutativeIntrinsic(IntrinsicInst const &I) { 2170 switch (I.getIntrinsicID()) { 2171 case Intrinsic::sadd_with_overflow: 2172 case Intrinsic::uadd_with_overflow: 2173 case Intrinsic::smul_with_overflow: 2174 case Intrinsic::umul_with_overflow: 2175 return true; 2176 default: 2177 return false; 2178 } 2179 } 2180 2181 bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { 2182 // FIXME: Handle more intrinsics. 2183 switch (I.getIntrinsicID()) { 2184 default: return false; 2185 case Intrinsic::frameaddress: { 2186 Type *RetTy = I.getCalledFunction()->getReturnType(); 2187 2188 MVT VT; 2189 if (!isTypeLegal(RetTy, VT)) 2190 return false; 2191 2192 unsigned Opc; 2193 const TargetRegisterClass *RC = nullptr; 2194 2195 switch (VT.SimpleTy) { 2196 default: llvm_unreachable("Invalid result type for frameaddress."); 2197 case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break; 2198 case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break; 2199 } 2200 2201 // This needs to be set before we call getFrameRegister, otherwise we get 2202 // the wrong frame register. 2203 MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo(); 2204 MFI->setFrameAddressIsTaken(true); 2205 2206 const X86RegisterInfo *RegInfo = 2207 static_cast<const X86RegisterInfo*>(TM.getRegisterInfo()); 2208 unsigned FrameReg = RegInfo->getFrameRegister(*(FuncInfo.MF)); 2209 assert(((FrameReg == X86::RBP && VT == MVT::i64) || 2210 (FrameReg == X86::EBP && VT == MVT::i32)) && 2211 "Invalid Frame Register!"); 2212 2213 // Always make a copy of the frame register to to a vreg first, so that we 2214 // never directly reference the frame register (the TwoAddressInstruction- 2215 // Pass doesn't like that). 2216 unsigned SrcReg = createResultReg(RC); 2217 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2218 TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg); 2219 2220 // Now recursively load from the frame address. 2221 // movq (%rbp), %rax 2222 // movq (%rax), %rax 2223 // movq (%rax), %rax 2224 // ... 2225 unsigned DestReg; 2226 unsigned Depth = cast<ConstantInt>(I.getOperand(0))->getZExtValue(); 2227 while (Depth--) { 2228 DestReg = createResultReg(RC); 2229 addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2230 TII.get(Opc), DestReg), SrcReg); 2231 SrcReg = DestReg; 2232 } 2233 2234 UpdateValueMap(&I, SrcReg); 2235 return true; 2236 } 2237 case Intrinsic::memcpy: { 2238 const MemCpyInst &MCI = cast<MemCpyInst>(I); 2239 // Don't handle volatile or variable length memcpys. 2240 if (MCI.isVolatile()) 2241 return false; 2242 2243 if (isa<ConstantInt>(MCI.getLength())) { 2244 // Small memcpy's are common enough that we want to do them 2245 // without a call if possible. 2246 uint64_t Len = cast<ConstantInt>(MCI.getLength())->getZExtValue(); 2247 if (IsMemcpySmall(Len)) { 2248 X86AddressMode DestAM, SrcAM; 2249 if (!X86SelectAddress(MCI.getRawDest(), DestAM) || 2250 !X86SelectAddress(MCI.getRawSource(), SrcAM)) 2251 return false; 2252 TryEmitSmallMemcpy(DestAM, SrcAM, Len); 2253 return true; 2254 } 2255 } 2256 2257 unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32; 2258 if (!MCI.getLength()->getType()->isIntegerTy(SizeWidth)) 2259 return false; 2260 2261 if (MCI.getSourceAddressSpace() > 255 || MCI.getDestAddressSpace() > 255) 2262 return false; 2263 2264 return DoSelectCall(&I, "memcpy"); 2265 } 2266 case Intrinsic::memset: { 2267 const MemSetInst &MSI = cast<MemSetInst>(I); 2268 2269 if (MSI.isVolatile()) 2270 return false; 2271 2272 unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32; 2273 if (!MSI.getLength()->getType()->isIntegerTy(SizeWidth)) 2274 return false; 2275 2276 if (MSI.getDestAddressSpace() > 255) 2277 return false; 2278 2279 return DoSelectCall(&I, "memset"); 2280 } 2281 case Intrinsic::stackprotector: { 2282 // Emit code to store the stack guard onto the stack. 2283 EVT PtrTy = TLI.getPointerTy(); 2284 2285 const Value *Op1 = I.getArgOperand(0); // The guard's value. 2286 const AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); 2287 2288 MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]); 2289 2290 // Grab the frame index. 2291 X86AddressMode AM; 2292 if (!X86SelectAddress(Slot, AM)) return false; 2293 if (!X86FastEmitStore(PtrTy, Op1, AM)) return false; 2294 return true; 2295 } 2296 case Intrinsic::dbg_declare: { 2297 const DbgDeclareInst *DI = cast<DbgDeclareInst>(&I); 2298 X86AddressMode AM; 2299 assert(DI->getAddress() && "Null address should be checked earlier!"); 2300 if (!X86SelectAddress(DI->getAddress(), AM)) 2301 return false; 2302 const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); 2303 // FIXME may need to add RegState::Debug to any registers produced, 2304 // although ESP/EBP should be the only ones at the moment. 2305 addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM). 2306 addImm(0).addMetadata(DI->getVariable()); 2307 return true; 2308 } 2309 case Intrinsic::trap: { 2310 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP)); 2311 return true; 2312 } 2313 case Intrinsic::sqrt: { 2314 if (!Subtarget->hasSSE1()) 2315 return false; 2316 2317 Type *RetTy = I.getCalledFunction()->getReturnType(); 2318 2319 MVT VT; 2320 if (!isTypeLegal(RetTy, VT)) 2321 return false; 2322 2323 // Unfortunately we can't use FastEmit_r, because the AVX version of FSQRT 2324 // is not generated by FastISel yet. 2325 // FIXME: Update this code once tablegen can handle it. 2326 static const unsigned SqrtOpc[2][2] = { 2327 {X86::SQRTSSr, X86::VSQRTSSr}, 2328 {X86::SQRTSDr, X86::VSQRTSDr} 2329 }; 2330 bool HasAVX = Subtarget->hasAVX(); 2331 unsigned Opc; 2332 const TargetRegisterClass *RC; 2333 switch (VT.SimpleTy) { 2334 default: return false; 2335 case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break; 2336 case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break; 2337 } 2338 2339 const Value *SrcVal = I.getArgOperand(0); 2340 unsigned SrcReg = getRegForValue(SrcVal); 2341 2342 if (SrcReg == 0) 2343 return false; 2344 2345 unsigned ImplicitDefReg = 0; 2346 if (HasAVX) { 2347 ImplicitDefReg = createResultReg(RC); 2348 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2349 TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); 2350 } 2351 2352 unsigned ResultReg = createResultReg(RC); 2353 MachineInstrBuilder MIB; 2354 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), 2355 ResultReg); 2356 2357 if (ImplicitDefReg) 2358 MIB.addReg(ImplicitDefReg); 2359 2360 MIB.addReg(SrcReg); 2361 2362 UpdateValueMap(&I, ResultReg); 2363 return true; 2364 } 2365 case Intrinsic::sadd_with_overflow: 2366 case Intrinsic::uadd_with_overflow: 2367 case Intrinsic::ssub_with_overflow: 2368 case Intrinsic::usub_with_overflow: 2369 case Intrinsic::smul_with_overflow: 2370 case Intrinsic::umul_with_overflow: { 2371 // This implements the basic lowering of the xalu with overflow intrinsics 2372 // into add/sub/mul followed by either seto or setb. 2373 const Function *Callee = I.getCalledFunction(); 2374 auto *Ty = cast<StructType>(Callee->getReturnType()); 2375 Type *RetTy = Ty->getTypeAtIndex(0U); 2376 Type *CondTy = Ty->getTypeAtIndex(1); 2377 2378 MVT VT; 2379 if (!isTypeLegal(RetTy, VT)) 2380 return false; 2381 2382 if (VT < MVT::i8 || VT > MVT::i64) 2383 return false; 2384 2385 const Value *LHS = I.getArgOperand(0); 2386 const Value *RHS = I.getArgOperand(1); 2387 2388 // Canonicalize immediate to the RHS. 2389 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && 2390 isCommutativeIntrinsic(I)) 2391 std::swap(LHS, RHS); 2392 2393 unsigned BaseOpc, CondOpc; 2394 switch (I.getIntrinsicID()) { 2395 default: llvm_unreachable("Unexpected intrinsic!"); 2396 case Intrinsic::sadd_with_overflow: 2397 BaseOpc = ISD::ADD; CondOpc = X86::SETOr; break; 2398 case Intrinsic::uadd_with_overflow: 2399 BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break; 2400 case Intrinsic::ssub_with_overflow: 2401 BaseOpc = ISD::SUB; CondOpc = X86::SETOr; break; 2402 case Intrinsic::usub_with_overflow: 2403 BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break; 2404 case Intrinsic::smul_with_overflow: 2405 BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break; 2406 case Intrinsic::umul_with_overflow: 2407 BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break; 2408 } 2409 2410 unsigned LHSReg = getRegForValue(LHS); 2411 if (LHSReg == 0) 2412 return false; 2413 bool LHSIsKill = hasTrivialKill(LHS); 2414 2415 unsigned ResultReg = 0; 2416 // Check if we have an immediate version. 2417 if (auto const *C = dyn_cast<ConstantInt>(RHS)) { 2418 ResultReg = FastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill, 2419 C->getZExtValue()); 2420 } 2421 2422 unsigned RHSReg; 2423 bool RHSIsKill; 2424 if (!ResultReg) { 2425 RHSReg = getRegForValue(RHS); 2426 if (RHSReg == 0) 2427 return false; 2428 RHSIsKill = hasTrivialKill(RHS); 2429 ResultReg = FastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg, 2430 RHSIsKill); 2431 } 2432 2433 // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit 2434 // it manually. 2435 if (BaseOpc == X86ISD::UMUL && !ResultReg) { 2436 static const unsigned MULOpc[] = 2437 { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r }; 2438 static const unsigned Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX }; 2439 // First copy the first operand into RAX, which is an implicit input to 2440 // the X86::MUL*r instruction. 2441 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2442 TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8]) 2443 .addReg(LHSReg, getKillRegState(LHSIsKill)); 2444 ResultReg = FastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8], 2445 TLI.getRegClassFor(VT), RHSReg, RHSIsKill); 2446 } else if (BaseOpc == X86ISD::SMUL && !ResultReg) { 2447 static const unsigned MULOpc[] = 2448 { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr }; 2449 if (VT == MVT::i8) { 2450 // Copy the first operand into AL, which is an implicit input to the 2451 // X86::IMUL8r instruction. 2452 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2453 TII.get(TargetOpcode::COPY), X86::AL) 2454 .addReg(LHSReg, getKillRegState(LHSIsKill)); 2455 ResultReg = FastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg, 2456 RHSIsKill); 2457 } else 2458 ResultReg = FastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8], 2459 TLI.getRegClassFor(VT), LHSReg, LHSIsKill, 2460 RHSReg, RHSIsKill); 2461 } 2462 2463 if (!ResultReg) 2464 return false; 2465 2466 unsigned ResultReg2 = FuncInfo.CreateRegs(CondTy); 2467 assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers."); 2468 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc), 2469 ResultReg2); 2470 2471 UpdateValueMap(&I, ResultReg, 2); 2472 return true; 2473 } 2474 case Intrinsic::x86_sse_cvttss2si: 2475 case Intrinsic::x86_sse_cvttss2si64: 2476 case Intrinsic::x86_sse2_cvttsd2si: 2477 case Intrinsic::x86_sse2_cvttsd2si64: { 2478 bool IsInputDouble; 2479 switch (I.getIntrinsicID()) { 2480 default: llvm_unreachable("Unexpected intrinsic."); 2481 case Intrinsic::x86_sse_cvttss2si: 2482 case Intrinsic::x86_sse_cvttss2si64: 2483 if (!Subtarget->hasSSE1()) 2484 return false; 2485 IsInputDouble = false; 2486 break; 2487 case Intrinsic::x86_sse2_cvttsd2si: 2488 case Intrinsic::x86_sse2_cvttsd2si64: 2489 if (!Subtarget->hasSSE2()) 2490 return false; 2491 IsInputDouble = true; 2492 break; 2493 } 2494 2495 Type *RetTy = I.getCalledFunction()->getReturnType(); 2496 MVT VT; 2497 if (!isTypeLegal(RetTy, VT)) 2498 return false; 2499 2500 static const unsigned CvtOpc[2][2][2] = { 2501 { { X86::CVTTSS2SIrr, X86::VCVTTSS2SIrr }, 2502 { X86::CVTTSS2SI64rr, X86::VCVTTSS2SI64rr } }, 2503 { { X86::CVTTSD2SIrr, X86::VCVTTSD2SIrr }, 2504 { X86::CVTTSD2SI64rr, X86::VCVTTSD2SI64rr } } 2505 }; 2506 bool HasAVX = Subtarget->hasAVX(); 2507 unsigned Opc; 2508 switch (VT.SimpleTy) { 2509 default: llvm_unreachable("Unexpected result type."); 2510 case MVT::i32: Opc = CvtOpc[IsInputDouble][0][HasAVX]; break; 2511 case MVT::i64: Opc = CvtOpc[IsInputDouble][1][HasAVX]; break; 2512 } 2513 2514 // Check if we can fold insertelement instructions into the convert. 2515 const Value *Op = I.getArgOperand(0); 2516 while (auto *IE = dyn_cast<InsertElementInst>(Op)) { 2517 const Value *Index = IE->getOperand(2); 2518 if (!isa<ConstantInt>(Index)) 2519 break; 2520 unsigned Idx = cast<ConstantInt>(Index)->getZExtValue(); 2521 2522 if (Idx == 0) { 2523 Op = IE->getOperand(1); 2524 break; 2525 } 2526 Op = IE->getOperand(0); 2527 } 2528 2529 unsigned Reg = getRegForValue(Op); 2530 if (Reg == 0) 2531 return false; 2532 2533 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 2534 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 2535 .addReg(Reg); 2536 2537 UpdateValueMap(&I, ResultReg); 2538 return true; 2539 } 2540 } 2541 } 2542 2543 bool X86FastISel::FastLowerArguments() { 2544 if (!FuncInfo.CanLowerReturn) 2545 return false; 2546 2547 const Function *F = FuncInfo.Fn; 2548 if (F->isVarArg()) 2549 return false; 2550 2551 CallingConv::ID CC = F->getCallingConv(); 2552 if (CC != CallingConv::C) 2553 return false; 2554 2555 if (Subtarget->isCallingConvWin64(CC)) 2556 return false; 2557 2558 if (!Subtarget->is64Bit()) 2559 return false; 2560 2561 // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments. 2562 unsigned GPRCnt = 0; 2563 unsigned FPRCnt = 0; 2564 unsigned Idx = 0; 2565 for (auto const &Arg : F->args()) { 2566 // The first argument is at index 1. 2567 ++Idx; 2568 if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) || 2569 F->getAttributes().hasAttribute(Idx, Attribute::InReg) || 2570 F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || 2571 F->getAttributes().hasAttribute(Idx, Attribute::Nest)) 2572 return false; 2573 2574 Type *ArgTy = Arg.getType(); 2575 if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) 2576 return false; 2577 2578 EVT ArgVT = TLI.getValueType(ArgTy); 2579 if (!ArgVT.isSimple()) return false; 2580 switch (ArgVT.getSimpleVT().SimpleTy) { 2581 default: return false; 2582 case MVT::i32: 2583 case MVT::i64: 2584 ++GPRCnt; 2585 break; 2586 case MVT::f32: 2587 case MVT::f64: 2588 if (!Subtarget->hasSSE1()) 2589 return false; 2590 ++FPRCnt; 2591 break; 2592 } 2593 2594 if (GPRCnt > 6) 2595 return false; 2596 2597 if (FPRCnt > 8) 2598 return false; 2599 } 2600 2601 static const MCPhysReg GPR32ArgRegs[] = { 2602 X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D 2603 }; 2604 static const MCPhysReg GPR64ArgRegs[] = { 2605 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9 2606 }; 2607 static const MCPhysReg XMMArgRegs[] = { 2608 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 2609 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 2610 }; 2611 2612 unsigned GPRIdx = 0; 2613 unsigned FPRIdx = 0; 2614 for (auto const &Arg : F->args()) { 2615 MVT VT = TLI.getSimpleValueType(Arg.getType()); 2616 const TargetRegisterClass *RC = TLI.getRegClassFor(VT); 2617 unsigned SrcReg; 2618 switch (VT.SimpleTy) { 2619 default: llvm_unreachable("Unexpected value type."); 2620 case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break; 2621 case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break; 2622 case MVT::f32: // fall-through 2623 case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break; 2624 } 2625 unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); 2626 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. 2627 // Without this, EmitLiveInCopies may eliminate the livein if its only 2628 // use is a bitcast (which isn't turned into an instruction). 2629 unsigned ResultReg = createResultReg(RC); 2630 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2631 TII.get(TargetOpcode::COPY), ResultReg) 2632 .addReg(DstReg, getKillRegState(true)); 2633 UpdateValueMap(&Arg, ResultReg); 2634 } 2635 return true; 2636 } 2637 2638 bool X86FastISel::X86SelectCall(const Instruction *I) { 2639 const CallInst *CI = cast<CallInst>(I); 2640 const Value *Callee = CI->getCalledValue(); 2641 2642 // Can't handle inline asm yet. 2643 if (isa<InlineAsm>(Callee)) 2644 return false; 2645 2646 // Handle intrinsic calls. 2647 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) 2648 return X86VisitIntrinsicCall(*II); 2649 2650 // Allow SelectionDAG isel to handle tail calls. 2651 if (cast<CallInst>(I)->isTailCall()) 2652 return false; 2653 2654 return DoSelectCall(I, nullptr); 2655 } 2656 2657 static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget, 2658 const ImmutableCallSite &CS) { 2659 if (Subtarget.is64Bit()) 2660 return 0; 2661 if (Subtarget.getTargetTriple().isOSMSVCRT()) 2662 return 0; 2663 CallingConv::ID CC = CS.getCallingConv(); 2664 if (CC == CallingConv::Fast || CC == CallingConv::GHC) 2665 return 0; 2666 if (!CS.paramHasAttr(1, Attribute::StructRet)) 2667 return 0; 2668 if (CS.paramHasAttr(1, Attribute::InReg)) 2669 return 0; 2670 return 4; 2671 } 2672 2673 // Select either a call, or an llvm.memcpy/memmove/memset intrinsic 2674 bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { 2675 const CallInst *CI = cast<CallInst>(I); 2676 const Value *Callee = CI->getCalledValue(); 2677 2678 // Handle only C and fastcc calling conventions for now. 2679 ImmutableCallSite CS(CI); 2680 CallingConv::ID CC = CS.getCallingConv(); 2681 bool isWin64 = Subtarget->isCallingConvWin64(CC); 2682 if (CC != CallingConv::C && CC != CallingConv::Fast && 2683 CC != CallingConv::X86_FastCall && CC != CallingConv::X86_64_Win64 && 2684 CC != CallingConv::X86_64_SysV) 2685 return false; 2686 2687 // fastcc with -tailcallopt is intended to provide a guaranteed 2688 // tail call optimization. Fastisel doesn't know how to do that. 2689 if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) 2690 return false; 2691 2692 PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); 2693 FunctionType *FTy = cast<FunctionType>(PT->getElementType()); 2694 bool isVarArg = FTy->isVarArg(); 2695 2696 // Don't know how to handle Win64 varargs yet. Nothing special needed for 2697 // x86-32. Special handling for x86-64 is implemented. 2698 if (isVarArg && isWin64) 2699 return false; 2700 2701 // Don't know about inalloca yet. 2702 if (CS.hasInAllocaArgument()) 2703 return false; 2704 2705 // Fast-isel doesn't know about callee-pop yet. 2706 if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg, 2707 TM.Options.GuaranteedTailCallOpt)) 2708 return false; 2709 2710 // Check whether the function can return without sret-demotion. 2711 SmallVector<ISD::OutputArg, 4> Outs; 2712 GetReturnInfo(I->getType(), CS.getAttributes(), Outs, TLI); 2713 bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), 2714 *FuncInfo.MF, FTy->isVarArg(), 2715 Outs, FTy->getContext()); 2716 if (!CanLowerReturn) 2717 return false; 2718 2719 // Materialize callee address in a register. FIXME: GV address can be 2720 // handled with a CALLpcrel32 instead. 2721 X86AddressMode CalleeAM; 2722 if (!X86SelectCallAddress(Callee, CalleeAM)) 2723 return false; 2724 unsigned CalleeOp = 0; 2725 const GlobalValue *GV = nullptr; 2726 if (CalleeAM.GV != nullptr) { 2727 GV = CalleeAM.GV; 2728 } else if (CalleeAM.Base.Reg != 0) { 2729 CalleeOp = CalleeAM.Base.Reg; 2730 } else 2731 return false; 2732 2733 // Deal with call operands first. 2734 SmallVector<const Value *, 8> ArgVals; 2735 SmallVector<unsigned, 8> Args; 2736 SmallVector<MVT, 8> ArgVTs; 2737 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 2738 unsigned arg_size = CS.arg_size(); 2739 Args.reserve(arg_size); 2740 ArgVals.reserve(arg_size); 2741 ArgVTs.reserve(arg_size); 2742 ArgFlags.reserve(arg_size); 2743 for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); 2744 i != e; ++i) { 2745 // If we're lowering a mem intrinsic instead of a regular call, skip the 2746 // last two arguments, which should not passed to the underlying functions. 2747 if (MemIntName && e-i <= 2) 2748 break; 2749 Value *ArgVal = *i; 2750 ISD::ArgFlagsTy Flags; 2751 unsigned AttrInd = i - CS.arg_begin() + 1; 2752 if (CS.paramHasAttr(AttrInd, Attribute::SExt)) 2753 Flags.setSExt(); 2754 if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) 2755 Flags.setZExt(); 2756 2757 if (CS.paramHasAttr(AttrInd, Attribute::ByVal)) { 2758 PointerType *Ty = cast<PointerType>(ArgVal->getType()); 2759 Type *ElementTy = Ty->getElementType(); 2760 unsigned FrameSize = DL.getTypeAllocSize(ElementTy); 2761 unsigned FrameAlign = CS.getParamAlignment(AttrInd); 2762 if (!FrameAlign) 2763 FrameAlign = TLI.getByValTypeAlignment(ElementTy); 2764 Flags.setByVal(); 2765 Flags.setByValSize(FrameSize); 2766 Flags.setByValAlign(FrameAlign); 2767 if (!IsMemcpySmall(FrameSize)) 2768 return false; 2769 } 2770 2771 if (CS.paramHasAttr(AttrInd, Attribute::InReg)) 2772 Flags.setInReg(); 2773 if (CS.paramHasAttr(AttrInd, Attribute::Nest)) 2774 Flags.setNest(); 2775 2776 // If this is an i1/i8/i16 argument, promote to i32 to avoid an extra 2777 // instruction. This is safe because it is common to all fastisel supported 2778 // calling conventions on x86. 2779 if (ConstantInt *CI = dyn_cast<ConstantInt>(ArgVal)) { 2780 if (CI->getBitWidth() == 1 || CI->getBitWidth() == 8 || 2781 CI->getBitWidth() == 16) { 2782 if (Flags.isSExt()) 2783 ArgVal = ConstantExpr::getSExt(CI,Type::getInt32Ty(CI->getContext())); 2784 else 2785 ArgVal = ConstantExpr::getZExt(CI,Type::getInt32Ty(CI->getContext())); 2786 } 2787 } 2788 2789 unsigned ArgReg; 2790 2791 // Passing bools around ends up doing a trunc to i1 and passing it. 2792 // Codegen this as an argument + "and 1". 2793 if (ArgVal->getType()->isIntegerTy(1) && isa<TruncInst>(ArgVal) && 2794 cast<TruncInst>(ArgVal)->getParent() == I->getParent() && 2795 ArgVal->hasOneUse()) { 2796 ArgVal = cast<TruncInst>(ArgVal)->getOperand(0); 2797 ArgReg = getRegForValue(ArgVal); 2798 if (ArgReg == 0) return false; 2799 2800 MVT ArgVT; 2801 if (!isTypeLegal(ArgVal->getType(), ArgVT)) return false; 2802 2803 ArgReg = FastEmit_ri(ArgVT, ArgVT, ISD::AND, ArgReg, 2804 ArgVal->hasOneUse(), 1); 2805 } else { 2806 ArgReg = getRegForValue(ArgVal); 2807 } 2808 2809 if (ArgReg == 0) return false; 2810 2811 Type *ArgTy = ArgVal->getType(); 2812 MVT ArgVT; 2813 if (!isTypeLegal(ArgTy, ArgVT)) 2814 return false; 2815 if (ArgVT == MVT::x86mmx) 2816 return false; 2817 unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); 2818 Flags.setOrigAlign(OriginalAlignment); 2819 2820 Args.push_back(ArgReg); 2821 ArgVals.push_back(ArgVal); 2822 ArgVTs.push_back(ArgVT); 2823 ArgFlags.push_back(Flags); 2824 } 2825 2826 // Analyze operands of the call, assigning locations to each operand. 2827 SmallVector<CCValAssign, 16> ArgLocs; 2828 CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs, 2829 I->getParent()->getContext()); 2830 2831 // Allocate shadow area for Win64 2832 if (isWin64) 2833 CCInfo.AllocateStack(32, 8); 2834 2835 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86); 2836 2837 // Get a count of how many bytes are to be pushed on the stack. 2838 unsigned NumBytes = CCInfo.getNextStackOffset(); 2839 2840 // Issue CALLSEQ_START 2841 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 2842 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) 2843 .addImm(NumBytes); 2844 2845 // Process argument: walk the register/memloc assignments, inserting 2846 // copies / loads. 2847 SmallVector<unsigned, 4> RegArgs; 2848 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2849 CCValAssign &VA = ArgLocs[i]; 2850 unsigned Arg = Args[VA.getValNo()]; 2851 EVT ArgVT = ArgVTs[VA.getValNo()]; 2852 2853 // Promote the value if needed. 2854 switch (VA.getLocInfo()) { 2855 case CCValAssign::Full: break; 2856 case CCValAssign::SExt: { 2857 assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && 2858 "Unexpected extend"); 2859 bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 2860 Arg, ArgVT, Arg); 2861 assert(Emitted && "Failed to emit a sext!"); (void)Emitted; 2862 ArgVT = VA.getLocVT(); 2863 break; 2864 } 2865 case CCValAssign::ZExt: { 2866 assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && 2867 "Unexpected extend"); 2868 bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 2869 Arg, ArgVT, Arg); 2870 assert(Emitted && "Failed to emit a zext!"); (void)Emitted; 2871 ArgVT = VA.getLocVT(); 2872 break; 2873 } 2874 case CCValAssign::AExt: { 2875 assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && 2876 "Unexpected extend"); 2877 bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), 2878 Arg, ArgVT, Arg); 2879 if (!Emitted) 2880 Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 2881 Arg, ArgVT, Arg); 2882 if (!Emitted) 2883 Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 2884 Arg, ArgVT, Arg); 2885 2886 assert(Emitted && "Failed to emit a aext!"); (void)Emitted; 2887 ArgVT = VA.getLocVT(); 2888 break; 2889 } 2890 case CCValAssign::BCvt: { 2891 unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT(), 2892 ISD::BITCAST, Arg, /*TODO: Kill=*/false); 2893 assert(BC != 0 && "Failed to emit a bitcast!"); 2894 Arg = BC; 2895 ArgVT = VA.getLocVT(); 2896 break; 2897 } 2898 case CCValAssign::VExt: 2899 // VExt has not been implemented, so this should be impossible to reach 2900 // for now. However, fallback to Selection DAG isel once implemented. 2901 return false; 2902 case CCValAssign::Indirect: 2903 // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully 2904 // support this. 2905 return false; 2906 case CCValAssign::FPExt: 2907 llvm_unreachable("Unexpected loc info!"); 2908 } 2909 2910 if (VA.isRegLoc()) { 2911 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2912 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg); 2913 RegArgs.push_back(VA.getLocReg()); 2914 } else { 2915 unsigned LocMemOffset = VA.getLocMemOffset(); 2916 X86AddressMode AM; 2917 const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo*>( 2918 getTargetMachine()->getRegisterInfo()); 2919 AM.Base.Reg = RegInfo->getStackRegister(); 2920 AM.Disp = LocMemOffset; 2921 const Value *ArgVal = ArgVals[VA.getValNo()]; 2922 ISD::ArgFlagsTy Flags = ArgFlags[VA.getValNo()]; 2923 2924 if (Flags.isByVal()) { 2925 X86AddressMode SrcAM; 2926 SrcAM.Base.Reg = Arg; 2927 bool Res = TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize()); 2928 assert(Res && "memcpy length already checked!"); (void)Res; 2929 } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) { 2930 // If this is a really simple value, emit this with the Value* version 2931 // of X86FastEmitStore. If it isn't simple, we don't want to do this, 2932 // as it can cause us to reevaluate the argument. 2933 if (!X86FastEmitStore(ArgVT, ArgVal, AM)) 2934 return false; 2935 } else { 2936 if (!X86FastEmitStore(ArgVT, Arg, /*ValIsKill=*/false, AM)) 2937 return false; 2938 } 2939 } 2940 } 2941 2942 // ELF / PIC requires GOT in the EBX register before function calls via PLT 2943 // GOT pointer. 2944 if (Subtarget->isPICStyleGOT()) { 2945 unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 2946 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2947 TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base); 2948 } 2949 2950 if (Subtarget->is64Bit() && isVarArg && !isWin64) { 2951 // Count the number of XMM registers allocated. 2952 static const MCPhysReg XMMArgRegs[] = { 2953 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 2954 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 2955 }; 2956 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 2957 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri), 2958 X86::AL).addImm(NumXMMRegs); 2959 } 2960 2961 // Issue the call. 2962 MachineInstrBuilder MIB; 2963 if (CalleeOp) { 2964 // Register-indirect call. 2965 unsigned CallOpc; 2966 if (Subtarget->is64Bit()) 2967 CallOpc = X86::CALL64r; 2968 else 2969 CallOpc = X86::CALL32r; 2970 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc)) 2971 .addReg(CalleeOp); 2972 2973 } else { 2974 // Direct call. 2975 assert(GV && "Not a direct call"); 2976 unsigned CallOpc; 2977 if (Subtarget->is64Bit()) 2978 CallOpc = X86::CALL64pcrel32; 2979 else 2980 CallOpc = X86::CALLpcrel32; 2981 2982 // See if we need any target-specific flags on the GV operand. 2983 unsigned char OpFlags = 0; 2984 2985 // On ELF targets, in both X86-64 and X86-32 mode, direct calls to 2986 // external symbols most go through the PLT in PIC mode. If the symbol 2987 // has hidden or protected visibility, or if it is static or local, then 2988 // we don't need to use the PLT - we can directly call it. 2989 if (Subtarget->isTargetELF() && 2990 TM.getRelocationModel() == Reloc::PIC_ && 2991 GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) { 2992 OpFlags = X86II::MO_PLT; 2993 } else if (Subtarget->isPICStyleStubAny() && 2994 (GV->isDeclaration() || GV->isWeakForLinker()) && 2995 (!Subtarget->getTargetTriple().isMacOSX() || 2996 Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) { 2997 // PC-relative references to external symbols should go through $stub, 2998 // unless we're building with the leopard linker or later, which 2999 // automatically synthesizes these stubs. 3000 OpFlags = X86II::MO_DARWIN_STUB; 3001 } 3002 3003 3004 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc)); 3005 if (MemIntName) 3006 MIB.addExternalSymbol(MemIntName, OpFlags); 3007 else 3008 MIB.addGlobalAddress(GV, 0, OpFlags); 3009 } 3010 3011 // Add a register mask with the call-preserved registers. 3012 // Proper defs for return values will be added by setPhysRegsDeadExcept(). 3013 MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv())); 3014 3015 // Add an implicit use GOT pointer in EBX. 3016 if (Subtarget->isPICStyleGOT()) 3017 MIB.addReg(X86::EBX, RegState::Implicit); 3018 3019 if (Subtarget->is64Bit() && isVarArg && !isWin64) 3020 MIB.addReg(X86::AL, RegState::Implicit); 3021 3022 // Add implicit physical register uses to the call. 3023 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 3024 MIB.addReg(RegArgs[i], RegState::Implicit); 3025 3026 // Issue CALLSEQ_END 3027 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 3028 const unsigned NumBytesCallee = computeBytesPoppedByCallee(*Subtarget, CS); 3029 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) 3030 .addImm(NumBytes).addImm(NumBytesCallee); 3031 3032 // Build info for return calling conv lowering code. 3033 // FIXME: This is practically a copy-paste from TargetLowering::LowerCallTo. 3034 SmallVector<ISD::InputArg, 32> Ins; 3035 SmallVector<EVT, 4> RetTys; 3036 ComputeValueVTs(TLI, I->getType(), RetTys); 3037 for (unsigned i = 0, e = RetTys.size(); i != e; ++i) { 3038 EVT VT = RetTys[i]; 3039 MVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT); 3040 unsigned NumRegs = TLI.getNumRegisters(I->getParent()->getContext(), VT); 3041 for (unsigned j = 0; j != NumRegs; ++j) { 3042 ISD::InputArg MyFlags; 3043 MyFlags.VT = RegisterVT; 3044 MyFlags.Used = !CS.getInstruction()->use_empty(); 3045 if (CS.paramHasAttr(0, Attribute::SExt)) 3046 MyFlags.Flags.setSExt(); 3047 if (CS.paramHasAttr(0, Attribute::ZExt)) 3048 MyFlags.Flags.setZExt(); 3049 if (CS.paramHasAttr(0, Attribute::InReg)) 3050 MyFlags.Flags.setInReg(); 3051 Ins.push_back(MyFlags); 3052 } 3053 } 3054 3055 // Now handle call return values. 3056 SmallVector<unsigned, 4> UsedRegs; 3057 SmallVector<CCValAssign, 16> RVLocs; 3058 CCState CCRetInfo(CC, false, *FuncInfo.MF, TM, RVLocs, 3059 I->getParent()->getContext()); 3060 unsigned ResultReg = FuncInfo.CreateRegs(I->getType()); 3061 CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86); 3062 for (unsigned i = 0; i != RVLocs.size(); ++i) { 3063 EVT CopyVT = RVLocs[i].getValVT(); 3064 unsigned CopyReg = ResultReg + i; 3065 3066 // If this is a call to a function that returns an fp value on the x87 fp 3067 // stack, but where we prefer to use the value in xmm registers, copy it 3068 // out as F80 and use a truncate to move it from fp stack reg to xmm reg. 3069 if ((RVLocs[i].getLocReg() == X86::ST0 || 3070 RVLocs[i].getLocReg() == X86::ST1)) { 3071 if (isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) { 3072 CopyVT = MVT::f80; 3073 CopyReg = createResultReg(&X86::RFP80RegClass); 3074 } 3075 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3076 TII.get(X86::FpPOP_RETVAL), CopyReg); 3077 } else { 3078 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3079 TII.get(TargetOpcode::COPY), 3080 CopyReg).addReg(RVLocs[i].getLocReg()); 3081 UsedRegs.push_back(RVLocs[i].getLocReg()); 3082 } 3083 3084 if (CopyVT != RVLocs[i].getValVT()) { 3085 // Round the F80 the right size, which also moves to the appropriate xmm 3086 // register. This is accomplished by storing the F80 value in memory and 3087 // then loading it back. Ewww... 3088 EVT ResVT = RVLocs[i].getValVT(); 3089 unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64; 3090 unsigned MemSize = ResVT.getSizeInBits()/8; 3091 int FI = MFI.CreateStackObject(MemSize, MemSize, false); 3092 addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3093 TII.get(Opc)), FI) 3094 .addReg(CopyReg); 3095 Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm; 3096 addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3097 TII.get(Opc), ResultReg + i), FI); 3098 } 3099 } 3100 3101 if (RVLocs.size()) 3102 UpdateValueMap(I, ResultReg, RVLocs.size()); 3103 3104 // Set all unused physreg defs as dead. 3105 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); 3106 3107 return true; 3108 } 3109 3110 3111 bool 3112 X86FastISel::TargetSelectInstruction(const Instruction *I) { 3113 switch (I->getOpcode()) { 3114 default: break; 3115 case Instruction::Load: 3116 return X86SelectLoad(I); 3117 case Instruction::Store: 3118 return X86SelectStore(I); 3119 case Instruction::Ret: 3120 return X86SelectRet(I); 3121 case Instruction::ICmp: 3122 case Instruction::FCmp: 3123 return X86SelectCmp(I); 3124 case Instruction::ZExt: 3125 return X86SelectZExt(I); 3126 case Instruction::Br: 3127 return X86SelectBranch(I); 3128 case Instruction::Call: 3129 return X86SelectCall(I); 3130 case Instruction::LShr: 3131 case Instruction::AShr: 3132 case Instruction::Shl: 3133 return X86SelectShift(I); 3134 case Instruction::SDiv: 3135 case Instruction::UDiv: 3136 case Instruction::SRem: 3137 case Instruction::URem: 3138 return X86SelectDivRem(I); 3139 case Instruction::Select: 3140 return X86SelectSelect(I); 3141 case Instruction::Trunc: 3142 return X86SelectTrunc(I); 3143 case Instruction::FPExt: 3144 return X86SelectFPExt(I); 3145 case Instruction::FPTrunc: 3146 return X86SelectFPTrunc(I); 3147 case Instruction::IntToPtr: // Deliberate fall-through. 3148 case Instruction::PtrToInt: { 3149 EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); 3150 EVT DstVT = TLI.getValueType(I->getType()); 3151 if (DstVT.bitsGT(SrcVT)) 3152 return X86SelectZExt(I); 3153 if (DstVT.bitsLT(SrcVT)) 3154 return X86SelectTrunc(I); 3155 unsigned Reg = getRegForValue(I->getOperand(0)); 3156 if (Reg == 0) return false; 3157 UpdateValueMap(I, Reg); 3158 return true; 3159 } 3160 } 3161 3162 return false; 3163 } 3164 3165 unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { 3166 MVT VT; 3167 if (!isTypeLegal(C->getType(), VT)) 3168 return 0; 3169 3170 // Can't handle alternate code models yet. 3171 if (TM.getCodeModel() != CodeModel::Small) 3172 return 0; 3173 3174 // Get opcode and regclass of the output for the given load instruction. 3175 unsigned Opc = 0; 3176 const TargetRegisterClass *RC = nullptr; 3177 switch (VT.SimpleTy) { 3178 default: return 0; 3179 case MVT::i8: 3180 Opc = X86::MOV8rm; 3181 RC = &X86::GR8RegClass; 3182 break; 3183 case MVT::i16: 3184 Opc = X86::MOV16rm; 3185 RC = &X86::GR16RegClass; 3186 break; 3187 case MVT::i32: 3188 Opc = X86::MOV32rm; 3189 RC = &X86::GR32RegClass; 3190 break; 3191 case MVT::i64: 3192 // Must be in x86-64 mode. 3193 Opc = X86::MOV64rm; 3194 RC = &X86::GR64RegClass; 3195 break; 3196 case MVT::f32: 3197 if (X86ScalarSSEf32) { 3198 Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm; 3199 RC = &X86::FR32RegClass; 3200 } else { 3201 Opc = X86::LD_Fp32m; 3202 RC = &X86::RFP32RegClass; 3203 } 3204 break; 3205 case MVT::f64: 3206 if (X86ScalarSSEf64) { 3207 Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm; 3208 RC = &X86::FR64RegClass; 3209 } else { 3210 Opc = X86::LD_Fp64m; 3211 RC = &X86::RFP64RegClass; 3212 } 3213 break; 3214 case MVT::f80: 3215 // No f80 support yet. 3216 return 0; 3217 } 3218 3219 // Materialize addresses with LEA/MOV instructions. 3220 if (isa<GlobalValue>(C)) { 3221 X86AddressMode AM; 3222 if (X86SelectAddress(C, AM)) { 3223 // If the expression is just a basereg, then we're done, otherwise we need 3224 // to emit an LEA. 3225 if (AM.BaseType == X86AddressMode::RegBase && 3226 AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr) 3227 return AM.Base.Reg; 3228 3229 unsigned ResultReg = createResultReg(RC); 3230 if (TM.getRelocationModel() == Reloc::Static && 3231 TLI.getPointerTy() == MVT::i64) { 3232 // The displacement code be more than 32 bits away so we need to use 3233 // an instruction with a 64 bit immediate 3234 Opc = X86::MOV64ri; 3235 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3236 TII.get(Opc), ResultReg).addGlobalAddress(cast<GlobalValue>(C)); 3237 } else { 3238 Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r; 3239 addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3240 TII.get(Opc), ResultReg), AM); 3241 } 3242 return ResultReg; 3243 } 3244 return 0; 3245 } 3246 3247 // MachineConstantPool wants an explicit alignment. 3248 unsigned Align = DL.getPrefTypeAlignment(C->getType()); 3249 if (Align == 0) { 3250 // Alignment of vector types. FIXME! 3251 Align = DL.getTypeAllocSize(C->getType()); 3252 } 3253 3254 // x86-32 PIC requires a PIC base register for constant pools. 3255 unsigned PICBase = 0; 3256 unsigned char OpFlag = 0; 3257 if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic 3258 OpFlag = X86II::MO_PIC_BASE_OFFSET; 3259 PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 3260 } else if (Subtarget->isPICStyleGOT()) { 3261 OpFlag = X86II::MO_GOTOFF; 3262 PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 3263 } else if (Subtarget->isPICStyleRIPRel() && 3264 TM.getCodeModel() == CodeModel::Small) { 3265 PICBase = X86::RIP; 3266 } 3267 3268 // Create the load from the constant pool. 3269 unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align); 3270 unsigned ResultReg = createResultReg(RC); 3271 addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3272 TII.get(Opc), ResultReg), 3273 MCPOffset, PICBase, OpFlag); 3274 3275 return ResultReg; 3276 } 3277 3278 unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) { 3279 // Fail on dynamic allocas. At this point, getRegForValue has already 3280 // checked its CSE maps, so if we're here trying to handle a dynamic 3281 // alloca, we're not going to succeed. X86SelectAddress has a 3282 // check for dynamic allocas, because it's called directly from 3283 // various places, but TargetMaterializeAlloca also needs a check 3284 // in order to avoid recursion between getRegForValue, 3285 // X86SelectAddrss, and TargetMaterializeAlloca. 3286 if (!FuncInfo.StaticAllocaMap.count(C)) 3287 return 0; 3288 assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?"); 3289 3290 X86AddressMode AM; 3291 if (!X86SelectAddress(C, AM)) 3292 return 0; 3293 unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; 3294 const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); 3295 unsigned ResultReg = createResultReg(RC); 3296 addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3297 TII.get(Opc), ResultReg), AM); 3298 return ResultReg; 3299 } 3300 3301 unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) { 3302 MVT VT; 3303 if (!isTypeLegal(CF->getType(), VT)) 3304 return 0; 3305 3306 // Get opcode and regclass for the given zero. 3307 unsigned Opc = 0; 3308 const TargetRegisterClass *RC = nullptr; 3309 switch (VT.SimpleTy) { 3310 default: return 0; 3311 case MVT::f32: 3312 if (X86ScalarSSEf32) { 3313 Opc = X86::FsFLD0SS; 3314 RC = &X86::FR32RegClass; 3315 } else { 3316 Opc = X86::LD_Fp032; 3317 RC = &X86::RFP32RegClass; 3318 } 3319 break; 3320 case MVT::f64: 3321 if (X86ScalarSSEf64) { 3322 Opc = X86::FsFLD0SD; 3323 RC = &X86::FR64RegClass; 3324 } else { 3325 Opc = X86::LD_Fp064; 3326 RC = &X86::RFP64RegClass; 3327 } 3328 break; 3329 case MVT::f80: 3330 // No f80 support yet. 3331 return 0; 3332 } 3333 3334 unsigned ResultReg = createResultReg(RC); 3335 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); 3336 return ResultReg; 3337 } 3338 3339 3340 bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, 3341 const LoadInst *LI) { 3342 const Value *Ptr = LI->getPointerOperand(); 3343 X86AddressMode AM; 3344 if (!X86SelectAddress(Ptr, AM)) 3345 return false; 3346 3347 const X86InstrInfo &XII = (const X86InstrInfo&)TII; 3348 3349 unsigned Size = DL.getTypeAllocSize(LI->getType()); 3350 unsigned Alignment = LI->getAlignment(); 3351 3352 if (Alignment == 0) // Ensure that codegen never sees alignment 0 3353 Alignment = DL.getABITypeAlignment(LI->getType()); 3354 3355 SmallVector<MachineOperand, 8> AddrOps; 3356 AM.getFullAddress(AddrOps); 3357 3358 MachineInstr *Result = 3359 XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment); 3360 if (!Result) 3361 return false; 3362 3363 Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI)); 3364 FuncInfo.MBB->insert(FuncInfo.InsertPt, Result); 3365 MI->eraseFromParent(); 3366 return true; 3367 } 3368 3369 3370 namespace llvm { 3371 FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo, 3372 const TargetLibraryInfo *libInfo) { 3373 return new X86FastISel(funcInfo, libInfo); 3374 } 3375 } 3376