1 //===-- X86CodeEmitter.cpp - Convert X86 code to machine code -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the pass that transforms the X86 machine instructions into 11 // relocatable machine code. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "x86-emitter" 16 #include "X86InstrInfo.h" 17 #include "X86JITInfo.h" 18 #include "X86Subtarget.h" 19 #include "X86TargetMachine.h" 20 #include "X86Relocations.h" 21 #include "X86.h" 22 #include "llvm/LLVMContext.h" 23 #include "llvm/PassManager.h" 24 #include "llvm/CodeGen/JITCodeEmitter.h" 25 #include "llvm/CodeGen/MachineFunctionPass.h" 26 #include "llvm/CodeGen/MachineInstr.h" 27 #include "llvm/CodeGen/MachineModuleInfo.h" 28 #include "llvm/CodeGen/Passes.h" 29 #include "llvm/ADT/Statistic.h" 30 #include "llvm/MC/MCCodeEmitter.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/raw_ostream.h" 36 #include "llvm/Target/TargetOptions.h" 37 using namespace llvm; 38 39 STATISTIC(NumEmitted, "Number of machine instructions emitted"); 40 41 namespace { 42 template<class CodeEmitter> 43 class Emitter : public MachineFunctionPass { 44 const X86InstrInfo *II; 45 const TargetData *TD; 46 X86TargetMachine &TM; 47 CodeEmitter &MCE; 48 MachineModuleInfo *MMI; 49 intptr_t PICBaseOffset; 50 bool Is64BitMode; 51 bool IsPIC; 52 public: 53 static char ID; 54 explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce) 55 : MachineFunctionPass(ID), II(0), TD(0), TM(tm), 56 MCE(mce), PICBaseOffset(0), Is64BitMode(false), 57 IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} 58 Emitter(X86TargetMachine &tm, CodeEmitter &mce, 59 const X86InstrInfo &ii, const TargetData &td, bool is64) 60 : MachineFunctionPass(ID), II(&ii), TD(&td), TM(tm), 61 MCE(mce), PICBaseOffset(0), Is64BitMode(is64), 62 IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} 63 64 bool runOnMachineFunction(MachineFunction &MF); 65 66 virtual const char *getPassName() const { 67 return "X86 Machine Code Emitter"; 68 } 69 70 void emitOpcodePrefix(uint64_t TSFlags, int MemOperand, 71 const MachineInstr &MI, 72 const MCInstrDesc *Desc) const; 73 74 void emitVEXOpcodePrefix(uint64_t TSFlags, int MemOperand, 75 const MachineInstr &MI, 76 const MCInstrDesc *Desc) const; 77 78 void emitSegmentOverridePrefix(uint64_t TSFlags, 79 int MemOperand, 80 const MachineInstr &MI) const; 81 82 void emitInstruction(MachineInstr &MI, const MCInstrDesc *Desc); 83 84 void getAnalysisUsage(AnalysisUsage &AU) const { 85 AU.setPreservesAll(); 86 AU.addRequired<MachineModuleInfo>(); 87 MachineFunctionPass::getAnalysisUsage(AU); 88 } 89 90 private: 91 void emitPCRelativeBlockAddress(MachineBasicBlock *MBB); 92 void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, 93 intptr_t Disp = 0, intptr_t PCAdj = 0, 94 bool Indirect = false); 95 void emitExternalSymbolAddress(const char *ES, unsigned Reloc); 96 void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0, 97 intptr_t PCAdj = 0); 98 void emitJumpTableAddress(unsigned JTI, unsigned Reloc, 99 intptr_t PCAdj = 0); 100 101 void emitDisplacementField(const MachineOperand *RelocOp, int DispVal, 102 intptr_t Adj = 0, bool IsPCRel = true); 103 104 void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField); 105 void emitRegModRMByte(unsigned RegOpcodeField); 106 void emitSIBByte(unsigned SS, unsigned Index, unsigned Base); 107 void emitConstant(uint64_t Val, unsigned Size); 108 109 void emitMemModRMByte(const MachineInstr &MI, 110 unsigned Op, unsigned RegOpcodeField, 111 intptr_t PCAdj = 0); 112 }; 113 114 template<class CodeEmitter> 115 char Emitter<CodeEmitter>::ID = 0; 116 } // end anonymous namespace. 117 118 /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code 119 /// to the specified templated MachineCodeEmitter object. 120 FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM, 121 JITCodeEmitter &JCE) { 122 return new Emitter<JITCodeEmitter>(TM, JCE); 123 } 124 125 template<class CodeEmitter> 126 bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { 127 MMI = &getAnalysis<MachineModuleInfo>(); 128 MCE.setModuleInfo(MMI); 129 130 II = TM.getInstrInfo(); 131 TD = TM.getTargetData(); 132 Is64BitMode = TM.getSubtarget<X86Subtarget>().is64Bit(); 133 IsPIC = TM.getRelocationModel() == Reloc::PIC_; 134 135 do { 136 DEBUG(dbgs() << "JITTing function '" << MF.getName() << "'\n"); 137 MCE.startFunction(MF); 138 for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); 139 MBB != E; ++MBB) { 140 MCE.StartMachineBasicBlock(MBB); 141 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); 142 I != E; ++I) { 143 const MCInstrDesc &Desc = I->getDesc(); 144 emitInstruction(*I, &Desc); 145 // MOVPC32r is basically a call plus a pop instruction. 146 if (Desc.getOpcode() == X86::MOVPC32r) 147 emitInstruction(*I, &II->get(X86::POP32r)); 148 ++NumEmitted; // Keep track of the # of mi's emitted 149 } 150 } 151 } while (MCE.finishFunction(MF)); 152 153 return false; 154 } 155 156 /// determineREX - Determine if the MachineInstr has to be encoded with a X86-64 157 /// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand 158 /// size, and 3) use of X86-64 extended registers. 159 static unsigned determineREX(const MachineInstr &MI) { 160 unsigned REX = 0; 161 const MCInstrDesc &Desc = MI.getDesc(); 162 163 // Pseudo instructions do not need REX prefix byte. 164 if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo) 165 return 0; 166 if (Desc.TSFlags & X86II::REX_W) 167 REX |= 1 << 3; 168 169 unsigned NumOps = Desc.getNumOperands(); 170 if (NumOps) { 171 bool isTwoAddr = NumOps > 1 && 172 Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1; 173 174 // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. 175 unsigned i = isTwoAddr ? 1 : 0; 176 for (unsigned e = NumOps; i != e; ++i) { 177 const MachineOperand& MO = MI.getOperand(i); 178 if (MO.isReg()) { 179 unsigned Reg = MO.getReg(); 180 if (X86II::isX86_64NonExtLowByteReg(Reg)) 181 REX |= 0x40; 182 } 183 } 184 185 switch (Desc.TSFlags & X86II::FormMask) { 186 case X86II::MRMInitReg: 187 if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) 188 REX |= (1 << 0) | (1 << 2); 189 break; 190 case X86II::MRMSrcReg: { 191 if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) 192 REX |= 1 << 2; 193 i = isTwoAddr ? 2 : 1; 194 for (unsigned e = NumOps; i != e; ++i) { 195 const MachineOperand& MO = MI.getOperand(i); 196 if (X86InstrInfo::isX86_64ExtendedReg(MO)) 197 REX |= 1 << 0; 198 } 199 break; 200 } 201 case X86II::MRMSrcMem: { 202 if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) 203 REX |= 1 << 2; 204 unsigned Bit = 0; 205 i = isTwoAddr ? 2 : 1; 206 for (; i != NumOps; ++i) { 207 const MachineOperand& MO = MI.getOperand(i); 208 if (MO.isReg()) { 209 if (X86InstrInfo::isX86_64ExtendedReg(MO)) 210 REX |= 1 << Bit; 211 Bit++; 212 } 213 } 214 break; 215 } 216 case X86II::MRM0m: case X86II::MRM1m: 217 case X86II::MRM2m: case X86II::MRM3m: 218 case X86II::MRM4m: case X86II::MRM5m: 219 case X86II::MRM6m: case X86II::MRM7m: 220 case X86II::MRMDestMem: { 221 unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands); 222 i = isTwoAddr ? 1 : 0; 223 if (NumOps > e && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e))) 224 REX |= 1 << 2; 225 unsigned Bit = 0; 226 for (; i != e; ++i) { 227 const MachineOperand& MO = MI.getOperand(i); 228 if (MO.isReg()) { 229 if (X86InstrInfo::isX86_64ExtendedReg(MO)) 230 REX |= 1 << Bit; 231 Bit++; 232 } 233 } 234 break; 235 } 236 default: { 237 if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) 238 REX |= 1 << 0; 239 i = isTwoAddr ? 2 : 1; 240 for (unsigned e = NumOps; i != e; ++i) { 241 const MachineOperand& MO = MI.getOperand(i); 242 if (X86InstrInfo::isX86_64ExtendedReg(MO)) 243 REX |= 1 << 2; 244 } 245 break; 246 } 247 } 248 } 249 return REX; 250 } 251 252 253 /// emitPCRelativeBlockAddress - This method keeps track of the information 254 /// necessary to resolve the address of this block later and emits a dummy 255 /// value. 256 /// 257 template<class CodeEmitter> 258 void Emitter<CodeEmitter>::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) { 259 // Remember where this reference was and where it is to so we can 260 // deal with it later. 261 MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), 262 X86::reloc_pcrel_word, MBB)); 263 MCE.emitWordLE(0); 264 } 265 266 /// emitGlobalAddress - Emit the specified address to the code stream assuming 267 /// this is part of a "take the address of a global" instruction. 268 /// 269 template<class CodeEmitter> 270 void Emitter<CodeEmitter>::emitGlobalAddress(const GlobalValue *GV, 271 unsigned Reloc, 272 intptr_t Disp /* = 0 */, 273 intptr_t PCAdj /* = 0 */, 274 bool Indirect /* = false */) { 275 intptr_t RelocCST = Disp; 276 if (Reloc == X86::reloc_picrel_word) 277 RelocCST = PICBaseOffset; 278 else if (Reloc == X86::reloc_pcrel_word) 279 RelocCST = PCAdj; 280 MachineRelocation MR = Indirect 281 ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc, 282 const_cast<GlobalValue *>(GV), 283 RelocCST, false) 284 : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, 285 const_cast<GlobalValue *>(GV), RelocCST, false); 286 MCE.addRelocation(MR); 287 // The relocated value will be added to the displacement 288 if (Reloc == X86::reloc_absolute_dword) 289 MCE.emitDWordLE(Disp); 290 else 291 MCE.emitWordLE((int32_t)Disp); 292 } 293 294 /// emitExternalSymbolAddress - Arrange for the address of an external symbol to 295 /// be emitted to the current location in the function, and allow it to be PC 296 /// relative. 297 template<class CodeEmitter> 298 void Emitter<CodeEmitter>::emitExternalSymbolAddress(const char *ES, 299 unsigned Reloc) { 300 intptr_t RelocCST = (Reloc == X86::reloc_picrel_word) ? PICBaseOffset : 0; 301 302 // X86 never needs stubs because instruction selection will always pick 303 // an instruction sequence that is large enough to hold any address 304 // to a symbol. 305 // (see X86ISelLowering.cpp, near 2039: X86TargetLowering::LowerCall) 306 bool NeedStub = false; 307 MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), 308 Reloc, ES, RelocCST, 309 0, NeedStub)); 310 if (Reloc == X86::reloc_absolute_dword) 311 MCE.emitDWordLE(0); 312 else 313 MCE.emitWordLE(0); 314 } 315 316 /// emitConstPoolAddress - Arrange for the address of an constant pool 317 /// to be emitted to the current location in the function, and allow it to be PC 318 /// relative. 319 template<class CodeEmitter> 320 void Emitter<CodeEmitter>::emitConstPoolAddress(unsigned CPI, unsigned Reloc, 321 intptr_t Disp /* = 0 */, 322 intptr_t PCAdj /* = 0 */) { 323 intptr_t RelocCST = 0; 324 if (Reloc == X86::reloc_picrel_word) 325 RelocCST = PICBaseOffset; 326 else if (Reloc == X86::reloc_pcrel_word) 327 RelocCST = PCAdj; 328 MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), 329 Reloc, CPI, RelocCST)); 330 // The relocated value will be added to the displacement 331 if (Reloc == X86::reloc_absolute_dword) 332 MCE.emitDWordLE(Disp); 333 else 334 MCE.emitWordLE((int32_t)Disp); 335 } 336 337 /// emitJumpTableAddress - Arrange for the address of a jump table to 338 /// be emitted to the current location in the function, and allow it to be PC 339 /// relative. 340 template<class CodeEmitter> 341 void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTI, unsigned Reloc, 342 intptr_t PCAdj /* = 0 */) { 343 intptr_t RelocCST = 0; 344 if (Reloc == X86::reloc_picrel_word) 345 RelocCST = PICBaseOffset; 346 else if (Reloc == X86::reloc_pcrel_word) 347 RelocCST = PCAdj; 348 MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), 349 Reloc, JTI, RelocCST)); 350 // The relocated value will be added to the displacement 351 if (Reloc == X86::reloc_absolute_dword) 352 MCE.emitDWordLE(0); 353 else 354 MCE.emitWordLE(0); 355 } 356 357 inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode, 358 unsigned RM) { 359 assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!"); 360 return RM | (RegOpcode << 3) | (Mod << 6); 361 } 362 363 template<class CodeEmitter> 364 void Emitter<CodeEmitter>::emitRegModRMByte(unsigned ModRMReg, 365 unsigned RegOpcodeFld){ 366 MCE.emitByte(ModRMByte(3, RegOpcodeFld, X86_MC::getX86RegNum(ModRMReg))); 367 } 368 369 template<class CodeEmitter> 370 void Emitter<CodeEmitter>::emitRegModRMByte(unsigned RegOpcodeFld) { 371 MCE.emitByte(ModRMByte(3, RegOpcodeFld, 0)); 372 } 373 374 template<class CodeEmitter> 375 void Emitter<CodeEmitter>::emitSIBByte(unsigned SS, 376 unsigned Index, 377 unsigned Base) { 378 // SIB byte is in the same format as the ModRMByte... 379 MCE.emitByte(ModRMByte(SS, Index, Base)); 380 } 381 382 template<class CodeEmitter> 383 void Emitter<CodeEmitter>::emitConstant(uint64_t Val, unsigned Size) { 384 // Output the constant in little endian byte order... 385 for (unsigned i = 0; i != Size; ++i) { 386 MCE.emitByte(Val & 255); 387 Val >>= 8; 388 } 389 } 390 391 /// isDisp8 - Return true if this signed displacement fits in a 8-bit 392 /// sign-extended field. 393 static bool isDisp8(int Value) { 394 return Value == (signed char)Value; 395 } 396 397 static bool gvNeedsNonLazyPtr(const MachineOperand &GVOp, 398 const TargetMachine &TM) { 399 // For Darwin-64, simulate the linktime GOT by using the same non-lazy-pointer 400 // mechanism as 32-bit mode. 401 if (TM.getSubtarget<X86Subtarget>().is64Bit() && 402 !TM.getSubtarget<X86Subtarget>().isTargetDarwin()) 403 return false; 404 405 // Return true if this is a reference to a stub containing the address of the 406 // global, not the global itself. 407 return isGlobalStubReference(GVOp.getTargetFlags()); 408 } 409 410 template<class CodeEmitter> 411 void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp, 412 int DispVal, 413 intptr_t Adj /* = 0 */, 414 bool IsPCRel /* = true */) { 415 // If this is a simple integer displacement that doesn't require a relocation, 416 // emit it now. 417 if (!RelocOp) { 418 emitConstant(DispVal, 4); 419 return; 420 } 421 422 // Otherwise, this is something that requires a relocation. Emit it as such 423 // now. 424 unsigned RelocType = Is64BitMode ? 425 (IsPCRel ? X86::reloc_pcrel_word : X86::reloc_absolute_word_sext) 426 : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); 427 if (RelocOp->isGlobal()) { 428 // In 64-bit static small code model, we could potentially emit absolute. 429 // But it's probably not beneficial. If the MCE supports using RIP directly 430 // do it, otherwise fallback to absolute (this is determined by IsPCRel). 431 // 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative 432 // 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute 433 bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM); 434 emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(), 435 Adj, Indirect); 436 } else if (RelocOp->isSymbol()) { 437 emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType); 438 } else if (RelocOp->isCPI()) { 439 emitConstPoolAddress(RelocOp->getIndex(), RelocType, 440 RelocOp->getOffset(), Adj); 441 } else { 442 assert(RelocOp->isJTI() && "Unexpected machine operand!"); 443 emitJumpTableAddress(RelocOp->getIndex(), RelocType, Adj); 444 } 445 } 446 447 template<class CodeEmitter> 448 void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, 449 unsigned Op,unsigned RegOpcodeField, 450 intptr_t PCAdj) { 451 const MachineOperand &Op3 = MI.getOperand(Op+3); 452 int DispVal = 0; 453 const MachineOperand *DispForReloc = 0; 454 455 // Figure out what sort of displacement we have to handle here. 456 if (Op3.isGlobal()) { 457 DispForReloc = &Op3; 458 } else if (Op3.isSymbol()) { 459 DispForReloc = &Op3; 460 } else if (Op3.isCPI()) { 461 if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) { 462 DispForReloc = &Op3; 463 } else { 464 DispVal += MCE.getConstantPoolEntryAddress(Op3.getIndex()); 465 DispVal += Op3.getOffset(); 466 } 467 } else if (Op3.isJTI()) { 468 if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) { 469 DispForReloc = &Op3; 470 } else { 471 DispVal += MCE.getJumpTableEntryAddress(Op3.getIndex()); 472 } 473 } else { 474 DispVal = Op3.getImm(); 475 } 476 477 const MachineOperand &Base = MI.getOperand(Op); 478 const MachineOperand &Scale = MI.getOperand(Op+1); 479 const MachineOperand &IndexReg = MI.getOperand(Op+2); 480 481 unsigned BaseReg = Base.getReg(); 482 483 // Handle %rip relative addressing. 484 if (BaseReg == X86::RIP || 485 (Is64BitMode && DispForReloc)) { // [disp32+RIP] in X86-64 mode 486 assert(IndexReg.getReg() == 0 && Is64BitMode && 487 "Invalid rip-relative address"); 488 MCE.emitByte(ModRMByte(0, RegOpcodeField, 5)); 489 emitDisplacementField(DispForReloc, DispVal, PCAdj, true); 490 return; 491 } 492 493 // Indicate that the displacement will use an pcrel or absolute reference 494 // by default. MCEs able to resolve addresses on-the-fly use pcrel by default 495 // while others, unless explicit asked to use RIP, use absolute references. 496 bool IsPCRel = MCE.earlyResolveAddresses() ? true : false; 497 498 // Is a SIB byte needed? 499 // If no BaseReg, issue a RIP relative instruction only if the MCE can 500 // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table 501 // 2-7) and absolute references. 502 unsigned BaseRegNo = -1U; 503 if (BaseReg != 0 && BaseReg != X86::RIP) 504 BaseRegNo = X86_MC::getX86RegNum(BaseReg); 505 506 if (// The SIB byte must be used if there is an index register. 507 IndexReg.getReg() == 0 && 508 // The SIB byte must be used if the base is ESP/RSP/R12, all of which 509 // encode to an R/M value of 4, which indicates that a SIB byte is 510 // present. 511 BaseRegNo != N86::ESP && 512 // If there is no base register and we're in 64-bit mode, we need a SIB 513 // byte to emit an addr that is just 'disp32' (the non-RIP relative form). 514 (!Is64BitMode || BaseReg != 0)) { 515 if (BaseReg == 0 || // [disp32] in X86-32 mode 516 BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode 517 MCE.emitByte(ModRMByte(0, RegOpcodeField, 5)); 518 emitDisplacementField(DispForReloc, DispVal, PCAdj, true); 519 return; 520 } 521 522 // If the base is not EBP/ESP and there is no displacement, use simple 523 // indirect register encoding, this handles addresses like [EAX]. The 524 // encoding for [EBP] with no displacement means [disp32] so we handle it 525 // by emitting a displacement of 0 below. 526 if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) { 527 MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo)); 528 return; 529 } 530 531 // Otherwise, if the displacement fits in a byte, encode as [REG+disp8]. 532 if (!DispForReloc && isDisp8(DispVal)) { 533 MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo)); 534 emitConstant(DispVal, 1); 535 return; 536 } 537 538 // Otherwise, emit the most general non-SIB encoding: [REG+disp32] 539 MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo)); 540 emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel); 541 return; 542 } 543 544 // Otherwise we need a SIB byte, so start by outputting the ModR/M byte first. 545 assert(IndexReg.getReg() != X86::ESP && 546 IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); 547 548 bool ForceDisp32 = false; 549 bool ForceDisp8 = false; 550 if (BaseReg == 0) { 551 // If there is no base register, we emit the special case SIB byte with 552 // MOD=0, BASE=4, to JUST get the index, scale, and displacement. 553 MCE.emitByte(ModRMByte(0, RegOpcodeField, 4)); 554 ForceDisp32 = true; 555 } else if (DispForReloc) { 556 // Emit the normal disp32 encoding. 557 MCE.emitByte(ModRMByte(2, RegOpcodeField, 4)); 558 ForceDisp32 = true; 559 } else if (DispVal == 0 && BaseRegNo != N86::EBP) { 560 // Emit no displacement ModR/M byte 561 MCE.emitByte(ModRMByte(0, RegOpcodeField, 4)); 562 } else if (isDisp8(DispVal)) { 563 // Emit the disp8 encoding... 564 MCE.emitByte(ModRMByte(1, RegOpcodeField, 4)); 565 ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP 566 } else { 567 // Emit the normal disp32 encoding... 568 MCE.emitByte(ModRMByte(2, RegOpcodeField, 4)); 569 } 570 571 // Calculate what the SS field value should be... 572 static const unsigned SSTable[] = { ~0U, 0, 1, ~0U, 2, ~0U, ~0U, ~0U, 3 }; 573 unsigned SS = SSTable[Scale.getImm()]; 574 575 if (BaseReg == 0) { 576 // Handle the SIB byte for the case where there is no base, see Intel 577 // Manual 2A, table 2-7. The displacement has already been output. 578 unsigned IndexRegNo; 579 if (IndexReg.getReg()) 580 IndexRegNo = X86_MC::getX86RegNum(IndexReg.getReg()); 581 else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5) 582 IndexRegNo = 4; 583 emitSIBByte(SS, IndexRegNo, 5); 584 } else { 585 unsigned BaseRegNo = X86_MC::getX86RegNum(BaseReg); 586 unsigned IndexRegNo; 587 if (IndexReg.getReg()) 588 IndexRegNo = X86_MC::getX86RegNum(IndexReg.getReg()); 589 else 590 IndexRegNo = 4; // For example [ESP+1*<noreg>+4] 591 emitSIBByte(SS, IndexRegNo, BaseRegNo); 592 } 593 594 // Do we need to output a displacement? 595 if (ForceDisp8) { 596 emitConstant(DispVal, 1); 597 } else if (DispVal != 0 || ForceDisp32) { 598 emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel); 599 } 600 } 601 602 static const MCInstrDesc *UpdateOp(MachineInstr &MI, const X86InstrInfo *II, 603 unsigned Opcode) { 604 const MCInstrDesc *Desc = &II->get(Opcode); 605 MI.setDesc(*Desc); 606 return Desc; 607 } 608 609 /// Is16BitMemOperand - Return true if the specified instruction has 610 /// a 16-bit memory operand. Op specifies the operand # of the memoperand. 611 static bool Is16BitMemOperand(const MachineInstr &MI, unsigned Op) { 612 const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); 613 const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); 614 615 if ((BaseReg.getReg() != 0 && 616 X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg.getReg())) || 617 (IndexReg.getReg() != 0 && 618 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg.getReg()))) 619 return true; 620 return false; 621 } 622 623 /// Is32BitMemOperand - Return true if the specified instruction has 624 /// a 32-bit memory operand. Op specifies the operand # of the memoperand. 625 static bool Is32BitMemOperand(const MachineInstr &MI, unsigned Op) { 626 const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); 627 const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); 628 629 if ((BaseReg.getReg() != 0 && 630 X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg.getReg())) || 631 (IndexReg.getReg() != 0 && 632 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg.getReg()))) 633 return true; 634 return false; 635 } 636 637 /// Is64BitMemOperand - Return true if the specified instruction has 638 /// a 64-bit memory operand. Op specifies the operand # of the memoperand. 639 #ifndef NDEBUG 640 static bool Is64BitMemOperand(const MachineInstr &MI, unsigned Op) { 641 const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); 642 const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); 643 644 if ((BaseReg.getReg() != 0 && 645 X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg.getReg())) || 646 (IndexReg.getReg() != 0 && 647 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg.getReg()))) 648 return true; 649 return false; 650 } 651 #endif 652 653 template<class CodeEmitter> 654 void Emitter<CodeEmitter>::emitOpcodePrefix(uint64_t TSFlags, 655 int MemOperand, 656 const MachineInstr &MI, 657 const MCInstrDesc *Desc) const { 658 // Emit the lock opcode prefix as needed. 659 if (Desc->TSFlags & X86II::LOCK) 660 MCE.emitByte(0xF0); 661 662 // Emit segment override opcode prefix as needed. 663 emitSegmentOverridePrefix(TSFlags, MemOperand, MI); 664 665 // Emit the repeat opcode prefix as needed. 666 if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) 667 MCE.emitByte(0xF3); 668 669 // Emit the address size opcode prefix as needed. 670 bool need_address_override; 671 if (TSFlags & X86II::AdSize) { 672 need_address_override = true; 673 } else if (MemOperand == -1) { 674 need_address_override = false; 675 } else if (Is64BitMode) { 676 assert(!Is16BitMemOperand(MI, MemOperand)); 677 need_address_override = Is32BitMemOperand(MI, MemOperand); 678 } else { 679 assert(!Is64BitMemOperand(MI, MemOperand)); 680 need_address_override = Is16BitMemOperand(MI, MemOperand); 681 } 682 683 if (need_address_override) 684 MCE.emitByte(0x67); 685 686 // Emit the operand size opcode prefix as needed. 687 if (TSFlags & X86II::OpSize) 688 MCE.emitByte(0x66); 689 690 bool Need0FPrefix = false; 691 switch (Desc->TSFlags & X86II::Op0Mask) { 692 case X86II::TB: // Two-byte opcode prefix 693 case X86II::T8: // 0F 38 694 case X86II::TA: // 0F 3A 695 case X86II::A6: // 0F A6 696 case X86II::A7: // 0F A7 697 Need0FPrefix = true; 698 break; 699 case X86II::REP: break; // already handled. 700 case X86II::T8XS: // F3 0F 38 701 case X86II::XS: // F3 0F 702 MCE.emitByte(0xF3); 703 Need0FPrefix = true; 704 break; 705 case X86II::T8XD: // F2 0F 38 706 case X86II::TAXD: // F2 0F 3A 707 case X86II::XD: // F2 0F 708 MCE.emitByte(0xF2); 709 Need0FPrefix = true; 710 break; 711 case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB: 712 case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF: 713 MCE.emitByte(0xD8+ 714 (((Desc->TSFlags & X86II::Op0Mask)-X86II::D8) 715 >> X86II::Op0Shift)); 716 break; // Two-byte opcode prefix 717 default: llvm_unreachable("Invalid prefix!"); 718 case 0: break; // No prefix! 719 } 720 721 // Handle REX prefix. 722 if (Is64BitMode) { 723 if (unsigned REX = determineREX(MI)) 724 MCE.emitByte(0x40 | REX); 725 } 726 727 // 0x0F escape code must be emitted just before the opcode. 728 if (Need0FPrefix) 729 MCE.emitByte(0x0F); 730 731 switch (Desc->TSFlags & X86II::Op0Mask) { 732 case X86II::T8XD: // F2 0F 38 733 case X86II::T8XS: // F3 0F 38 734 case X86II::T8: // 0F 38 735 MCE.emitByte(0x38); 736 break; 737 case X86II::TAXD: // F2 0F 38 738 case X86II::TA: // 0F 3A 739 MCE.emitByte(0x3A); 740 break; 741 case X86II::A6: // 0F A6 742 MCE.emitByte(0xA6); 743 break; 744 case X86II::A7: // 0F A7 745 MCE.emitByte(0xA7); 746 break; 747 } 748 } 749 750 // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range 751 // 0-7 and the difference between the 2 groups is given by the REX prefix. 752 // In the VEX prefix, registers are seen sequencially from 0-15 and encoded 753 // in 1's complement form, example: 754 // 755 // ModRM field => XMM9 => 1 756 // VEX.VVVV => XMM9 => ~9 757 // 758 // See table 4-35 of Intel AVX Programming Reference for details. 759 static unsigned char getVEXRegisterEncoding(const MachineInstr &MI, 760 unsigned OpNum) { 761 unsigned SrcReg = MI.getOperand(OpNum).getReg(); 762 unsigned SrcRegNum = X86_MC::getX86RegNum(MI.getOperand(OpNum).getReg()); 763 if (X86II::isX86_64ExtendedReg(SrcReg)) 764 SrcRegNum |= 8; 765 766 // The registers represented through VEX_VVVV should 767 // be encoded in 1's complement form. 768 return (~SrcRegNum) & 0xf; 769 } 770 771 /// EmitSegmentOverridePrefix - Emit segment override opcode prefix as needed 772 template<class CodeEmitter> 773 void Emitter<CodeEmitter>::emitSegmentOverridePrefix(uint64_t TSFlags, 774 int MemOperand, 775 const MachineInstr &MI) const { 776 switch (TSFlags & X86II::SegOvrMask) { 777 default: llvm_unreachable("Invalid segment!"); 778 case 0: 779 // No segment override, check for explicit one on memory operand. 780 if (MemOperand != -1) { // If the instruction has a memory operand. 781 switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) { 782 default: llvm_unreachable("Unknown segment register!"); 783 case 0: break; 784 case X86::CS: MCE.emitByte(0x2E); break; 785 case X86::SS: MCE.emitByte(0x36); break; 786 case X86::DS: MCE.emitByte(0x3E); break; 787 case X86::ES: MCE.emitByte(0x26); break; 788 case X86::FS: MCE.emitByte(0x64); break; 789 case X86::GS: MCE.emitByte(0x65); break; 790 } 791 } 792 break; 793 case X86II::FS: 794 MCE.emitByte(0x64); 795 break; 796 case X86II::GS: 797 MCE.emitByte(0x65); 798 break; 799 } 800 } 801 802 template<class CodeEmitter> 803 void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, 804 int MemOperand, 805 const MachineInstr &MI, 806 const MCInstrDesc *Desc) const { 807 bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; 808 bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; 809 810 // VEX_R: opcode externsion equivalent to REX.R in 811 // 1's complement (inverted) form 812 // 813 // 1: Same as REX_R=0 (must be 1 in 32-bit mode) 814 // 0: Same as REX_R=1 (64 bit mode only) 815 // 816 unsigned char VEX_R = 0x1; 817 818 // VEX_X: equivalent to REX.X, only used when a 819 // register is used for index in SIB Byte. 820 // 821 // 1: Same as REX.X=0 (must be 1 in 32-bit mode) 822 // 0: Same as REX.X=1 (64-bit mode only) 823 unsigned char VEX_X = 0x1; 824 825 // VEX_B: 826 // 827 // 1: Same as REX_B=0 (ignored in 32-bit mode) 828 // 0: Same as REX_B=1 (64 bit mode only) 829 // 830 unsigned char VEX_B = 0x1; 831 832 // VEX_W: opcode specific (use like REX.W, or used for 833 // opcode extension, or ignored, depending on the opcode byte) 834 unsigned char VEX_W = 0; 835 836 // XOP: Use XOP prefix byte 0x8f instead of VEX. 837 unsigned char XOP = 0; 838 839 // VEX_5M (VEX m-mmmmm field): 840 // 841 // 0b00000: Reserved for future use 842 // 0b00001: implied 0F leading opcode 843 // 0b00010: implied 0F 38 leading opcode bytes 844 // 0b00011: implied 0F 3A leading opcode bytes 845 // 0b00100-0b11111: Reserved for future use 846 // 0b01000: XOP map select - 08h instructions with imm byte 847 // 0b10001: XOP map select - 09h instructions with no imm byte 848 unsigned char VEX_5M = 0x1; 849 850 // VEX_4V (VEX vvvv field): a register specifier 851 // (in 1's complement form) or 1111 if unused. 852 unsigned char VEX_4V = 0xf; 853 854 // VEX_L (Vector Length): 855 // 856 // 0: scalar or 128-bit vector 857 // 1: 256-bit vector 858 // 859 unsigned char VEX_L = 0; 860 861 // VEX_PP: opcode extension providing equivalent 862 // functionality of a SIMD prefix 863 // 864 // 0b00: None 865 // 0b01: 66 866 // 0b10: F3 867 // 0b11: F2 868 // 869 unsigned char VEX_PP = 0; 870 871 // Encode the operand size opcode prefix as needed. 872 if (TSFlags & X86II::OpSize) 873 VEX_PP = 0x01; 874 875 if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W) 876 VEX_W = 1; 877 878 if ((TSFlags >> X86II::VEXShift) & X86II::XOP) 879 XOP = 1; 880 881 if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L) 882 VEX_L = 1; 883 884 switch (TSFlags & X86II::Op0Mask) { 885 default: llvm_unreachable("Invalid prefix!"); 886 case X86II::T8: // 0F 38 887 VEX_5M = 0x2; 888 break; 889 case X86II::TA: // 0F 3A 890 VEX_5M = 0x3; 891 break; 892 case X86II::T8XS: // F3 0F 38 893 VEX_PP = 0x2; 894 VEX_5M = 0x2; 895 break; 896 case X86II::T8XD: // F2 0F 38 897 VEX_PP = 0x3; 898 VEX_5M = 0x2; 899 break; 900 case X86II::TAXD: // F2 0F 3A 901 VEX_PP = 0x3; 902 VEX_5M = 0x3; 903 break; 904 case X86II::XS: // F3 0F 905 VEX_PP = 0x2; 906 break; 907 case X86II::XD: // F2 0F 908 VEX_PP = 0x3; 909 break; 910 case X86II::XOP8: 911 VEX_5M = 0x8; 912 break; 913 case X86II::XOP9: 914 VEX_5M = 0x9; 915 break; 916 case X86II::A6: // Bypass: Not used by VEX 917 case X86II::A7: // Bypass: Not used by VEX 918 case X86II::TB: // Bypass: Not used by VEX 919 case 0: 920 break; // No prefix! 921 } 922 923 924 // Set the vector length to 256-bit if YMM0-YMM15 is used 925 for (unsigned i = 0; i != MI.getNumOperands(); ++i) { 926 if (!MI.getOperand(i).isReg()) 927 continue; 928 if (MI.getOperand(i).isImplicit()) 929 continue; 930 unsigned SrcReg = MI.getOperand(i).getReg(); 931 if (SrcReg >= X86::YMM0 && SrcReg <= X86::YMM15) 932 VEX_L = 1; 933 } 934 935 // Classify VEX_B, VEX_4V, VEX_R, VEX_X 936 unsigned NumOps = Desc->getNumOperands(); 937 unsigned CurOp = 0; 938 if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0) 939 ++CurOp; 940 else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) { 941 assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1); 942 // Special case for GATHER with 2 TIED_TO operands 943 // Skip the first 2 operands: dst, mask_wb 944 CurOp += 2; 945 } 946 947 switch (TSFlags & X86II::FormMask) { 948 case X86II::MRMInitReg: 949 // Duplicate register. 950 if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) 951 VEX_R = 0x0; 952 953 if (HasVEX_4V) 954 VEX_4V = getVEXRegisterEncoding(MI, CurOp); 955 if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) 956 VEX_B = 0x0; 957 if (HasVEX_4VOp3) 958 VEX_4V = getVEXRegisterEncoding(MI, CurOp); 959 break; 960 case X86II::MRMDestMem: { 961 // MRMDestMem instructions forms: 962 // MemAddr, src1(ModR/M) 963 // MemAddr, src1(VEX_4V), src2(ModR/M) 964 // MemAddr, src1(ModR/M), imm8 965 // 966 if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrBaseReg).getReg())) 967 VEX_B = 0x0; 968 if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg())) 969 VEX_X = 0x0; 970 971 CurOp = X86::AddrNumOperands; 972 if (HasVEX_4V) 973 VEX_4V = getVEXRegisterEncoding(MI, CurOp++); 974 975 const MachineOperand &MO = MI.getOperand(CurOp); 976 if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg())) 977 VEX_R = 0x0; 978 break; 979 } 980 case X86II::MRMSrcMem: 981 // MRMSrcMem instructions forms: 982 // src1(ModR/M), MemAddr 983 // src1(ModR/M), src2(VEX_4V), MemAddr 984 // src1(ModR/M), MemAddr, imm8 985 // src1(ModR/M), MemAddr, src2(VEX_I8IMM) 986 // 987 // FMA4: 988 // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM) 989 // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M), 990 if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) 991 VEX_R = 0x0; 992 993 if (HasVEX_4V) 994 VEX_4V = getVEXRegisterEncoding(MI, 1); 995 996 if (X86II::isX86_64ExtendedReg( 997 MI.getOperand(MemOperand+X86::AddrBaseReg).getReg())) 998 VEX_B = 0x0; 999 if (X86II::isX86_64ExtendedReg( 1000 MI.getOperand(MemOperand+X86::AddrIndexReg).getReg())) 1001 VEX_X = 0x0; 1002 1003 if (HasVEX_4VOp3) 1004 VEX_4V = getVEXRegisterEncoding(MI, X86::AddrNumOperands+1); 1005 break; 1006 case X86II::MRM0m: case X86II::MRM1m: 1007 case X86II::MRM2m: case X86II::MRM3m: 1008 case X86II::MRM4m: case X86II::MRM5m: 1009 case X86II::MRM6m: case X86II::MRM7m: { 1010 // MRM[0-9]m instructions forms: 1011 // MemAddr 1012 // src1(VEX_4V), MemAddr 1013 if (HasVEX_4V) 1014 VEX_4V = getVEXRegisterEncoding(MI, 0); 1015 1016 if (X86II::isX86_64ExtendedReg( 1017 MI.getOperand(MemOperand+X86::AddrBaseReg).getReg())) 1018 VEX_B = 0x0; 1019 if (X86II::isX86_64ExtendedReg( 1020 MI.getOperand(MemOperand+X86::AddrIndexReg).getReg())) 1021 VEX_X = 0x0; 1022 break; 1023 } 1024 case X86II::MRMSrcReg: 1025 // MRMSrcReg instructions forms: 1026 // dst(ModR/M), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM) 1027 // dst(ModR/M), src1(ModR/M) 1028 // dst(ModR/M), src1(ModR/M), imm8 1029 // 1030 if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) 1031 VEX_R = 0x0; 1032 CurOp++; 1033 1034 if (HasVEX_4V) 1035 VEX_4V = getVEXRegisterEncoding(MI, CurOp++); 1036 if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) 1037 VEX_B = 0x0; 1038 CurOp++; 1039 if (HasVEX_4VOp3) 1040 VEX_4V = getVEXRegisterEncoding(MI, CurOp); 1041 break; 1042 case X86II::MRMDestReg: 1043 // MRMDestReg instructions forms: 1044 // dst(ModR/M), src(ModR/M) 1045 // dst(ModR/M), src(ModR/M), imm8 1046 if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) 1047 VEX_B = 0x0; 1048 if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) 1049 VEX_R = 0x0; 1050 break; 1051 case X86II::MRM0r: case X86II::MRM1r: 1052 case X86II::MRM2r: case X86II::MRM3r: 1053 case X86II::MRM4r: case X86II::MRM5r: 1054 case X86II::MRM6r: case X86II::MRM7r: 1055 // MRM0r-MRM7r instructions forms: 1056 // dst(VEX_4V), src(ModR/M), imm8 1057 VEX_4V = getVEXRegisterEncoding(MI, 0); 1058 if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) 1059 VEX_B = 0x0; 1060 break; 1061 default: // RawFrm 1062 break; 1063 } 1064 1065 // Emit segment override opcode prefix as needed. 1066 emitSegmentOverridePrefix(TSFlags, MemOperand, MI); 1067 1068 // VEX opcode prefix can have 2 or 3 bytes 1069 // 1070 // 3 bytes: 1071 // +-----+ +--------------+ +-------------------+ 1072 // | C4h | | RXB | m-mmmm | | W | vvvv | L | pp | 1073 // +-----+ +--------------+ +-------------------+ 1074 // 2 bytes: 1075 // +-----+ +-------------------+ 1076 // | C5h | | R | vvvv | L | pp | 1077 // +-----+ +-------------------+ 1078 // 1079 unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3); 1080 1081 if (VEX_B && VEX_X && !VEX_W && !XOP && (VEX_5M == 1)) { // 2 byte VEX prefix 1082 MCE.emitByte(0xC5); 1083 MCE.emitByte(LastByte | (VEX_R << 7)); 1084 return; 1085 } 1086 1087 // 3 byte VEX prefix 1088 MCE.emitByte(XOP ? 0x8F : 0xC4); 1089 MCE.emitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M); 1090 MCE.emitByte(LastByte | (VEX_W << 7)); 1091 } 1092 1093 template<class CodeEmitter> 1094 void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, 1095 const MCInstrDesc *Desc) { 1096 DEBUG(dbgs() << MI); 1097 1098 // If this is a pseudo instruction, lower it. 1099 switch (Desc->getOpcode()) { 1100 case X86::ADD16rr_DB: Desc = UpdateOp(MI, II, X86::OR16rr); break; 1101 case X86::ADD32rr_DB: Desc = UpdateOp(MI, II, X86::OR32rr); break; 1102 case X86::ADD64rr_DB: Desc = UpdateOp(MI, II, X86::OR64rr); break; 1103 case X86::ADD16ri_DB: Desc = UpdateOp(MI, II, X86::OR16ri); break; 1104 case X86::ADD32ri_DB: Desc = UpdateOp(MI, II, X86::OR32ri); break; 1105 case X86::ADD64ri32_DB: Desc = UpdateOp(MI, II, X86::OR64ri32); break; 1106 case X86::ADD16ri8_DB: Desc = UpdateOp(MI, II, X86::OR16ri8); break; 1107 case X86::ADD32ri8_DB: Desc = UpdateOp(MI, II, X86::OR32ri8); break; 1108 case X86::ADD64ri8_DB: Desc = UpdateOp(MI, II, X86::OR64ri8); break; 1109 case X86::ACQUIRE_MOV8rm: Desc = UpdateOp(MI, II, X86::MOV8rm); break; 1110 case X86::ACQUIRE_MOV16rm: Desc = UpdateOp(MI, II, X86::MOV16rm); break; 1111 case X86::ACQUIRE_MOV32rm: Desc = UpdateOp(MI, II, X86::MOV32rm); break; 1112 case X86::ACQUIRE_MOV64rm: Desc = UpdateOp(MI, II, X86::MOV64rm); break; 1113 case X86::RELEASE_MOV8mr: Desc = UpdateOp(MI, II, X86::MOV8mr); break; 1114 case X86::RELEASE_MOV16mr: Desc = UpdateOp(MI, II, X86::MOV16mr); break; 1115 case X86::RELEASE_MOV32mr: Desc = UpdateOp(MI, II, X86::MOV32mr); break; 1116 case X86::RELEASE_MOV64mr: Desc = UpdateOp(MI, II, X86::MOV64mr); break; 1117 } 1118 1119 1120 MCE.processDebugLoc(MI.getDebugLoc(), true); 1121 1122 unsigned Opcode = Desc->Opcode; 1123 1124 // If this is a two-address instruction, skip one of the register operands. 1125 unsigned NumOps = Desc->getNumOperands(); 1126 unsigned CurOp = 0; 1127 if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0) 1128 ++CurOp; 1129 else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) { 1130 assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1); 1131 // Special case for GATHER with 2 TIED_TO operands 1132 // Skip the first 2 operands: dst, mask_wb 1133 CurOp += 2; 1134 } 1135 1136 uint64_t TSFlags = Desc->TSFlags; 1137 1138 // Is this instruction encoded using the AVX VEX prefix? 1139 bool HasVEXPrefix = (TSFlags >> X86II::VEXShift) & X86II::VEX; 1140 // It uses the VEX.VVVV field? 1141 bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; 1142 bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; 1143 bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; 1144 const unsigned MemOp4_I8IMMOperand = 2; 1145 1146 // Determine where the memory operand starts, if present. 1147 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode); 1148 if (MemoryOperand != -1) MemoryOperand += CurOp; 1149 1150 if (!HasVEXPrefix) 1151 emitOpcodePrefix(TSFlags, MemoryOperand, MI, Desc); 1152 else 1153 emitVEXOpcodePrefix(TSFlags, MemoryOperand, MI, Desc); 1154 1155 unsigned char BaseOpcode = X86II::getBaseOpcodeFor(Desc->TSFlags); 1156 switch (TSFlags & X86II::FormMask) { 1157 default: 1158 llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!"); 1159 case X86II::Pseudo: 1160 // Remember the current PC offset, this is the PIC relocation 1161 // base address. 1162 switch (Opcode) { 1163 default: 1164 llvm_unreachable("pseudo instructions should be removed before code" 1165 " emission"); 1166 // Do nothing for Int_MemBarrier - it's just a comment. Add a debug 1167 // to make it slightly easier to see. 1168 case X86::Int_MemBarrier: 1169 DEBUG(dbgs() << "#MEMBARRIER\n"); 1170 break; 1171 1172 case TargetOpcode::INLINEASM: 1173 // We allow inline assembler nodes with empty bodies - they can 1174 // implicitly define registers, which is ok for JIT. 1175 if (MI.getOperand(0).getSymbolName()[0]) 1176 report_fatal_error("JIT does not support inline asm!"); 1177 break; 1178 case TargetOpcode::PROLOG_LABEL: 1179 case TargetOpcode::GC_LABEL: 1180 case TargetOpcode::EH_LABEL: 1181 MCE.emitLabel(MI.getOperand(0).getMCSymbol()); 1182 break; 1183 1184 case TargetOpcode::IMPLICIT_DEF: 1185 case TargetOpcode::KILL: 1186 break; 1187 case X86::MOVPC32r: { 1188 // This emits the "call" portion of this pseudo instruction. 1189 MCE.emitByte(BaseOpcode); 1190 emitConstant(0, X86II::getSizeOfImm(Desc->TSFlags)); 1191 // Remember PIC base. 1192 PICBaseOffset = (intptr_t) MCE.getCurrentPCOffset(); 1193 X86JITInfo *JTI = TM.getJITInfo(); 1194 JTI->setPICBase(MCE.getCurrentPCValue()); 1195 break; 1196 } 1197 } 1198 CurOp = NumOps; 1199 break; 1200 case X86II::RawFrm: { 1201 MCE.emitByte(BaseOpcode); 1202 1203 if (CurOp == NumOps) 1204 break; 1205 1206 const MachineOperand &MO = MI.getOperand(CurOp++); 1207 1208 DEBUG(dbgs() << "RawFrm CurOp " << CurOp << "\n"); 1209 DEBUG(dbgs() << "isMBB " << MO.isMBB() << "\n"); 1210 DEBUG(dbgs() << "isGlobal " << MO.isGlobal() << "\n"); 1211 DEBUG(dbgs() << "isSymbol " << MO.isSymbol() << "\n"); 1212 DEBUG(dbgs() << "isImm " << MO.isImm() << "\n"); 1213 1214 if (MO.isMBB()) { 1215 emitPCRelativeBlockAddress(MO.getMBB()); 1216 break; 1217 } 1218 1219 if (MO.isGlobal()) { 1220 emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word, 1221 MO.getOffset(), 0); 1222 break; 1223 } 1224 1225 if (MO.isSymbol()) { 1226 emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word); 1227 break; 1228 } 1229 1230 // FIXME: Only used by hackish MCCodeEmitter, remove when dead. 1231 if (MO.isJTI()) { 1232 emitJumpTableAddress(MO.getIndex(), X86::reloc_pcrel_word); 1233 break; 1234 } 1235 1236 assert(MO.isImm() && "Unknown RawFrm operand!"); 1237 if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) { 1238 // Fix up immediate operand for pc relative calls. 1239 intptr_t Imm = (intptr_t)MO.getImm(); 1240 Imm = Imm - MCE.getCurrentPCValue() - 4; 1241 emitConstant(Imm, X86II::getSizeOfImm(Desc->TSFlags)); 1242 } else 1243 emitConstant(MO.getImm(), X86II::getSizeOfImm(Desc->TSFlags)); 1244 break; 1245 } 1246 1247 case X86II::AddRegFrm: { 1248 MCE.emitByte(BaseOpcode + 1249 X86_MC::getX86RegNum(MI.getOperand(CurOp++).getReg())); 1250 1251 if (CurOp == NumOps) 1252 break; 1253 1254 const MachineOperand &MO1 = MI.getOperand(CurOp++); 1255 unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); 1256 if (MO1.isImm()) { 1257 emitConstant(MO1.getImm(), Size); 1258 break; 1259 } 1260 1261 unsigned rt = Is64BitMode ? X86::reloc_pcrel_word 1262 : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); 1263 if (Opcode == X86::MOV64ri64i32) 1264 rt = X86::reloc_absolute_word; // FIXME: add X86II flag? 1265 // This should not occur on Darwin for relocatable objects. 1266 if (Opcode == X86::MOV64ri) 1267 rt = X86::reloc_absolute_dword; // FIXME: add X86II flag? 1268 if (MO1.isGlobal()) { 1269 bool Indirect = gvNeedsNonLazyPtr(MO1, TM); 1270 emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, 1271 Indirect); 1272 } else if (MO1.isSymbol()) 1273 emitExternalSymbolAddress(MO1.getSymbolName(), rt); 1274 else if (MO1.isCPI()) 1275 emitConstPoolAddress(MO1.getIndex(), rt); 1276 else if (MO1.isJTI()) 1277 emitJumpTableAddress(MO1.getIndex(), rt); 1278 break; 1279 } 1280 1281 case X86II::MRMDestReg: { 1282 MCE.emitByte(BaseOpcode); 1283 emitRegModRMByte(MI.getOperand(CurOp).getReg(), 1284 X86_MC::getX86RegNum(MI.getOperand(CurOp+1).getReg())); 1285 CurOp += 2; 1286 break; 1287 } 1288 case X86II::MRMDestMem: { 1289 MCE.emitByte(BaseOpcode); 1290 1291 unsigned SrcRegNum = CurOp + X86::AddrNumOperands; 1292 if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) 1293 SrcRegNum++; 1294 emitMemModRMByte(MI, CurOp, 1295 X86_MC::getX86RegNum(MI.getOperand(SrcRegNum).getReg())); 1296 CurOp = SrcRegNum + 1; 1297 break; 1298 } 1299 1300 case X86II::MRMSrcReg: { 1301 MCE.emitByte(BaseOpcode); 1302 1303 unsigned SrcRegNum = CurOp+1; 1304 if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) 1305 ++SrcRegNum; 1306 1307 if (HasMemOp4) // Skip 2nd src (which is encoded in I8IMM) 1308 ++SrcRegNum; 1309 1310 emitRegModRMByte(MI.getOperand(SrcRegNum).getReg(), 1311 X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg())); 1312 // 2 operands skipped with HasMemOp4, compensate accordingly 1313 CurOp = HasMemOp4 ? SrcRegNum : SrcRegNum + 1; 1314 if (HasVEX_4VOp3) 1315 ++CurOp; 1316 break; 1317 } 1318 case X86II::MRMSrcMem: { 1319 int AddrOperands = X86::AddrNumOperands; 1320 unsigned FirstMemOp = CurOp+1; 1321 if (HasVEX_4V) { 1322 ++AddrOperands; 1323 ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV). 1324 } 1325 if (HasMemOp4) // Skip second register source (encoded in I8IMM) 1326 ++FirstMemOp; 1327 1328 MCE.emitByte(BaseOpcode); 1329 1330 intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ? 1331 X86II::getSizeOfImm(Desc->TSFlags) : 0; 1332 emitMemModRMByte(MI, FirstMemOp, 1333 X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg()),PCAdj); 1334 CurOp += AddrOperands + 1; 1335 if (HasVEX_4VOp3) 1336 ++CurOp; 1337 break; 1338 } 1339 1340 case X86II::MRM0r: case X86II::MRM1r: 1341 case X86II::MRM2r: case X86II::MRM3r: 1342 case X86II::MRM4r: case X86II::MRM5r: 1343 case X86II::MRM6r: case X86II::MRM7r: { 1344 if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV). 1345 ++CurOp; 1346 MCE.emitByte(BaseOpcode); 1347 emitRegModRMByte(MI.getOperand(CurOp++).getReg(), 1348 (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r); 1349 1350 if (CurOp == NumOps) 1351 break; 1352 1353 const MachineOperand &MO1 = MI.getOperand(CurOp++); 1354 unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); 1355 if (MO1.isImm()) { 1356 emitConstant(MO1.getImm(), Size); 1357 break; 1358 } 1359 1360 unsigned rt = Is64BitMode ? X86::reloc_pcrel_word 1361 : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); 1362 if (Opcode == X86::MOV64ri32) 1363 rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? 1364 if (MO1.isGlobal()) { 1365 bool Indirect = gvNeedsNonLazyPtr(MO1, TM); 1366 emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, 1367 Indirect); 1368 } else if (MO1.isSymbol()) 1369 emitExternalSymbolAddress(MO1.getSymbolName(), rt); 1370 else if (MO1.isCPI()) 1371 emitConstPoolAddress(MO1.getIndex(), rt); 1372 else if (MO1.isJTI()) 1373 emitJumpTableAddress(MO1.getIndex(), rt); 1374 break; 1375 } 1376 1377 case X86II::MRM0m: case X86II::MRM1m: 1378 case X86II::MRM2m: case X86II::MRM3m: 1379 case X86II::MRM4m: case X86II::MRM5m: 1380 case X86II::MRM6m: case X86II::MRM7m: { 1381 if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV). 1382 ++CurOp; 1383 intptr_t PCAdj = (CurOp + X86::AddrNumOperands != NumOps) ? 1384 (MI.getOperand(CurOp+X86::AddrNumOperands).isImm() ? 1385 X86II::getSizeOfImm(Desc->TSFlags) : 4) : 0; 1386 1387 MCE.emitByte(BaseOpcode); 1388 emitMemModRMByte(MI, CurOp, (Desc->TSFlags & X86II::FormMask)-X86II::MRM0m, 1389 PCAdj); 1390 CurOp += X86::AddrNumOperands; 1391 1392 if (CurOp == NumOps) 1393 break; 1394 1395 const MachineOperand &MO = MI.getOperand(CurOp++); 1396 unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); 1397 if (MO.isImm()) { 1398 emitConstant(MO.getImm(), Size); 1399 break; 1400 } 1401 1402 unsigned rt = Is64BitMode ? X86::reloc_pcrel_word 1403 : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); 1404 if (Opcode == X86::MOV64mi32) 1405 rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? 1406 if (MO.isGlobal()) { 1407 bool Indirect = gvNeedsNonLazyPtr(MO, TM); 1408 emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0, 1409 Indirect); 1410 } else if (MO.isSymbol()) 1411 emitExternalSymbolAddress(MO.getSymbolName(), rt); 1412 else if (MO.isCPI()) 1413 emitConstPoolAddress(MO.getIndex(), rt); 1414 else if (MO.isJTI()) 1415 emitJumpTableAddress(MO.getIndex(), rt); 1416 break; 1417 } 1418 1419 case X86II::MRMInitReg: 1420 MCE.emitByte(BaseOpcode); 1421 // Duplicate register, used by things like MOV8r0 (aka xor reg,reg). 1422 emitRegModRMByte(MI.getOperand(CurOp).getReg(), 1423 X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg())); 1424 ++CurOp; 1425 break; 1426 1427 case X86II::MRM_C1: 1428 MCE.emitByte(BaseOpcode); 1429 MCE.emitByte(0xC1); 1430 break; 1431 case X86II::MRM_C8: 1432 MCE.emitByte(BaseOpcode); 1433 MCE.emitByte(0xC8); 1434 break; 1435 case X86II::MRM_C9: 1436 MCE.emitByte(BaseOpcode); 1437 MCE.emitByte(0xC9); 1438 break; 1439 case X86II::MRM_E8: 1440 MCE.emitByte(BaseOpcode); 1441 MCE.emitByte(0xE8); 1442 break; 1443 case X86II::MRM_F0: 1444 MCE.emitByte(BaseOpcode); 1445 MCE.emitByte(0xF0); 1446 break; 1447 } 1448 1449 while (CurOp != NumOps && NumOps - CurOp <= 2) { 1450 // The last source register of a 4 operand instruction in AVX is encoded 1451 // in bits[7:4] of a immediate byte. 1452 if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) { 1453 const MachineOperand &MO = MI.getOperand(HasMemOp4 ? MemOp4_I8IMMOperand 1454 : CurOp); 1455 ++CurOp; 1456 unsigned RegNum = X86_MC::getX86RegNum(MO.getReg()) << 4; 1457 if (X86II::isX86_64ExtendedReg(MO.getReg())) 1458 RegNum |= 1 << 7; 1459 // If there is an additional 5th operand it must be an immediate, which 1460 // is encoded in bits[3:0] 1461 if (CurOp != NumOps) { 1462 const MachineOperand &MIMM = MI.getOperand(CurOp++); 1463 if (MIMM.isImm()) { 1464 unsigned Val = MIMM.getImm(); 1465 assert(Val < 16 && "Immediate operand value out of range"); 1466 RegNum |= Val; 1467 } 1468 } 1469 emitConstant(RegNum, 1); 1470 } else { 1471 emitConstant(MI.getOperand(CurOp++).getImm(), 1472 X86II::getSizeOfImm(Desc->TSFlags)); 1473 } 1474 } 1475 1476 if (!MI.isVariadic() && CurOp != NumOps) { 1477 #ifndef NDEBUG 1478 dbgs() << "Cannot encode all operands of: " << MI << "\n"; 1479 #endif 1480 llvm_unreachable(0); 1481 } 1482 1483 MCE.processDebugLoc(MI.getDebugLoc(), false); 1484 } 1485