1 //===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains code to lower X86 MachineInstrs to their corresponding 11 // MCInst records. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "X86AsmPrinter.h" 16 #include "X86RegisterInfo.h" 17 #include "X86ShuffleDecodeConstantPool.h" 18 #include "InstPrinter/X86ATTInstPrinter.h" 19 #include "MCTargetDesc/X86BaseInfo.h" 20 #include "Utils/X86ShuffleDecode.h" 21 #include "llvm/ADT/Optional.h" 22 #include "llvm/ADT/SmallString.h" 23 #include "llvm/ADT/iterator_range.h" 24 #include "llvm/CodeGen/MachineFunction.h" 25 #include "llvm/CodeGen/MachineConstantPool.h" 26 #include "llvm/CodeGen/MachineOperand.h" 27 #include "llvm/CodeGen/MachineModuleInfoImpls.h" 28 #include "llvm/CodeGen/StackMaps.h" 29 #include "llvm/IR/DataLayout.h" 30 #include "llvm/IR/GlobalValue.h" 31 #include "llvm/IR/Mangler.h" 32 #include "llvm/MC/MCAsmInfo.h" 33 #include "llvm/MC/MCCodeEmitter.h" 34 #include "llvm/MC/MCContext.h" 35 #include "llvm/MC/MCExpr.h" 36 #include "llvm/MC/MCFixup.h" 37 #include "llvm/MC/MCInst.h" 38 #include "llvm/MC/MCInstBuilder.h" 39 #include "llvm/MC/MCSection.h" 40 #include "llvm/MC/MCStreamer.h" 41 #include "llvm/MC/MCSymbol.h" 42 #include "llvm/MC/MCSymbolELF.h" 43 #include "llvm/MC/MCSectionELF.h" 44 #include "llvm/Support/TargetRegistry.h" 45 #include "llvm/Support/ELF.h" 46 #include "llvm/Target/TargetLoweringObjectFile.h" 47 48 using namespace llvm; 49 50 namespace { 51 52 /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. 53 class X86MCInstLower { 54 MCContext &Ctx; 55 const MachineFunction &MF; 56 const TargetMachine &TM; 57 const MCAsmInfo &MAI; 58 X86AsmPrinter &AsmPrinter; 59 public: 60 X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter); 61 62 Optional<MCOperand> LowerMachineOperand(const MachineInstr *MI, 63 const MachineOperand &MO) const; 64 void Lower(const MachineInstr *MI, MCInst &OutMI) const; 65 66 MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const; 67 MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; 68 69 private: 70 MachineModuleInfoMachO &getMachOMMI() const; 71 Mangler *getMang() const { 72 return AsmPrinter.Mang; 73 } 74 }; 75 76 } // end anonymous namespace 77 78 // Emit a minimal sequence of nops spanning NumBytes bytes. 79 static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, 80 const MCSubtargetInfo &STI); 81 82 void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst, 83 const MCSubtargetInfo &STI, 84 MCCodeEmitter *CodeEmitter) { 85 if (InShadow) { 86 SmallString<256> Code; 87 SmallVector<MCFixup, 4> Fixups; 88 raw_svector_ostream VecOS(Code); 89 CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI); 90 CurrentShadowSize += Code.size(); 91 if (CurrentShadowSize >= RequiredShadowSize) 92 InShadow = false; // The shadow is big enough. Stop counting. 93 } 94 } 95 96 void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding( 97 MCStreamer &OutStreamer, const MCSubtargetInfo &STI) { 98 if (InShadow && CurrentShadowSize < RequiredShadowSize) { 99 InShadow = false; 100 EmitNops(OutStreamer, RequiredShadowSize - CurrentShadowSize, 101 MF->getSubtarget<X86Subtarget>().is64Bit(), STI); 102 } 103 } 104 105 void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) { 106 OutStreamer->EmitInstruction(Inst, getSubtargetInfo()); 107 SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get()); 108 } 109 110 X86MCInstLower::X86MCInstLower(const MachineFunction &mf, 111 X86AsmPrinter &asmprinter) 112 : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()), 113 AsmPrinter(asmprinter) {} 114 115 MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { 116 return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>(); 117 } 118 119 120 /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol 121 /// operand to an MCSymbol. 122 MCSymbol *X86MCInstLower:: 123 GetSymbolFromOperand(const MachineOperand &MO) const { 124 const DataLayout &DL = MF.getDataLayout(); 125 assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && "Isn't a symbol reference"); 126 127 MCSymbol *Sym = nullptr; 128 SmallString<128> Name; 129 StringRef Suffix; 130 131 switch (MO.getTargetFlags()) { 132 case X86II::MO_DLLIMPORT: 133 // Handle dllimport linkage. 134 Name += "__imp_"; 135 break; 136 case X86II::MO_DARWIN_NONLAZY: 137 case X86II::MO_DARWIN_NONLAZY_PIC_BASE: 138 Suffix = "$non_lazy_ptr"; 139 break; 140 } 141 142 if (!Suffix.empty()) 143 Name += DL.getPrivateGlobalPrefix(); 144 145 if (MO.isGlobal()) { 146 const GlobalValue *GV = MO.getGlobal(); 147 AsmPrinter.getNameWithPrefix(Name, GV); 148 } else if (MO.isSymbol()) { 149 Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL); 150 } else if (MO.isMBB()) { 151 assert(Suffix.empty()); 152 Sym = MO.getMBB()->getSymbol(); 153 } 154 155 Name += Suffix; 156 if (!Sym) 157 Sym = Ctx.getOrCreateSymbol(Name); 158 159 // If the target flags on the operand changes the name of the symbol, do that 160 // before we return the symbol. 161 switch (MO.getTargetFlags()) { 162 default: break; 163 case X86II::MO_DARWIN_NONLAZY: 164 case X86II::MO_DARWIN_NONLAZY_PIC_BASE: { 165 MachineModuleInfoImpl::StubValueTy &StubSym = 166 getMachOMMI().getGVStubEntry(Sym); 167 if (!StubSym.getPointer()) { 168 assert(MO.isGlobal() && "Extern symbol not handled yet"); 169 StubSym = 170 MachineModuleInfoImpl:: 171 StubValueTy(AsmPrinter.getSymbol(MO.getGlobal()), 172 !MO.getGlobal()->hasInternalLinkage()); 173 } 174 break; 175 } 176 } 177 178 return Sym; 179 } 180 181 MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, 182 MCSymbol *Sym) const { 183 // FIXME: We would like an efficient form for this, so we don't have to do a 184 // lot of extra uniquing. 185 const MCExpr *Expr = nullptr; 186 MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; 187 188 switch (MO.getTargetFlags()) { 189 default: llvm_unreachable("Unknown target flag on GV operand"); 190 case X86II::MO_NO_FLAG: // No flag. 191 // These affect the name of the symbol, not any suffix. 192 case X86II::MO_DARWIN_NONLAZY: 193 case X86II::MO_DLLIMPORT: 194 break; 195 196 case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break; 197 case X86II::MO_TLVP_PIC_BASE: 198 Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); 199 // Subtract the pic base. 200 Expr = MCBinaryExpr::createSub(Expr, 201 MCSymbolRefExpr::create(MF.getPICBaseSymbol(), 202 Ctx), 203 Ctx); 204 break; 205 case X86II::MO_SECREL: RefKind = MCSymbolRefExpr::VK_SECREL; break; 206 case X86II::MO_TLSGD: RefKind = MCSymbolRefExpr::VK_TLSGD; break; 207 case X86II::MO_TLSLD: RefKind = MCSymbolRefExpr::VK_TLSLD; break; 208 case X86II::MO_TLSLDM: RefKind = MCSymbolRefExpr::VK_TLSLDM; break; 209 case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break; 210 case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break; 211 case X86II::MO_TPOFF: RefKind = MCSymbolRefExpr::VK_TPOFF; break; 212 case X86II::MO_DTPOFF: RefKind = MCSymbolRefExpr::VK_DTPOFF; break; 213 case X86II::MO_NTPOFF: RefKind = MCSymbolRefExpr::VK_NTPOFF; break; 214 case X86II::MO_GOTNTPOFF: RefKind = MCSymbolRefExpr::VK_GOTNTPOFF; break; 215 case X86II::MO_GOTPCREL: RefKind = MCSymbolRefExpr::VK_GOTPCREL; break; 216 case X86II::MO_GOT: RefKind = MCSymbolRefExpr::VK_GOT; break; 217 case X86II::MO_GOTOFF: RefKind = MCSymbolRefExpr::VK_GOTOFF; break; 218 case X86II::MO_PLT: RefKind = MCSymbolRefExpr::VK_PLT; break; 219 case X86II::MO_PIC_BASE_OFFSET: 220 case X86II::MO_DARWIN_NONLAZY_PIC_BASE: 221 Expr = MCSymbolRefExpr::create(Sym, Ctx); 222 // Subtract the pic base. 223 Expr = MCBinaryExpr::createSub(Expr, 224 MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), 225 Ctx); 226 if (MO.isJTI()) { 227 assert(MAI.doesSetDirectiveSuppressReloc()); 228 // If .set directive is supported, use it to reduce the number of 229 // relocations the assembler will generate for differences between 230 // local labels. This is only safe when the symbols are in the same 231 // section so we are restricting it to jumptable references. 232 MCSymbol *Label = Ctx.createTempSymbol(); 233 AsmPrinter.OutStreamer->EmitAssignment(Label, Expr); 234 Expr = MCSymbolRefExpr::create(Label, Ctx); 235 } 236 break; 237 } 238 239 if (!Expr) 240 Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx); 241 242 if (!MO.isJTI() && !MO.isMBB() && MO.getOffset()) 243 Expr = MCBinaryExpr::createAdd(Expr, 244 MCConstantExpr::create(MO.getOffset(), Ctx), 245 Ctx); 246 return MCOperand::createExpr(Expr); 247 } 248 249 250 /// \brief Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with 251 /// a short fixed-register form. 252 static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) { 253 unsigned ImmOp = Inst.getNumOperands() - 1; 254 assert(Inst.getOperand(0).isReg() && 255 (Inst.getOperand(ImmOp).isImm() || Inst.getOperand(ImmOp).isExpr()) && 256 ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() && 257 Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) || 258 Inst.getNumOperands() == 2) && "Unexpected instruction!"); 259 260 // Check whether the destination register can be fixed. 261 unsigned Reg = Inst.getOperand(0).getReg(); 262 if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX) 263 return; 264 265 // If so, rewrite the instruction. 266 MCOperand Saved = Inst.getOperand(ImmOp); 267 Inst = MCInst(); 268 Inst.setOpcode(Opcode); 269 Inst.addOperand(Saved); 270 } 271 272 /// \brief If a movsx instruction has a shorter encoding for the used register 273 /// simplify the instruction to use it instead. 274 static void SimplifyMOVSX(MCInst &Inst) { 275 unsigned NewOpcode = 0; 276 unsigned Op0 = Inst.getOperand(0).getReg(), Op1 = Inst.getOperand(1).getReg(); 277 switch (Inst.getOpcode()) { 278 default: 279 llvm_unreachable("Unexpected instruction!"); 280 case X86::MOVSX16rr8: // movsbw %al, %ax --> cbtw 281 if (Op0 == X86::AX && Op1 == X86::AL) 282 NewOpcode = X86::CBW; 283 break; 284 case X86::MOVSX32rr16: // movswl %ax, %eax --> cwtl 285 if (Op0 == X86::EAX && Op1 == X86::AX) 286 NewOpcode = X86::CWDE; 287 break; 288 case X86::MOVSX64rr32: // movslq %eax, %rax --> cltq 289 if (Op0 == X86::RAX && Op1 == X86::EAX) 290 NewOpcode = X86::CDQE; 291 break; 292 } 293 294 if (NewOpcode != 0) { 295 Inst = MCInst(); 296 Inst.setOpcode(NewOpcode); 297 } 298 } 299 300 /// \brief Simplify things like MOV32rm to MOV32o32a. 301 static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst, 302 unsigned Opcode) { 303 // Don't make these simplifications in 64-bit mode; other assemblers don't 304 // perform them because they make the code larger. 305 if (Printer.getSubtarget().is64Bit()) 306 return; 307 308 bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg(); 309 unsigned AddrBase = IsStore; 310 unsigned RegOp = IsStore ? 0 : 5; 311 unsigned AddrOp = AddrBase + 3; 312 assert(Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() && 313 Inst.getOperand(AddrBase + X86::AddrBaseReg).isReg() && 314 Inst.getOperand(AddrBase + X86::AddrScaleAmt).isImm() && 315 Inst.getOperand(AddrBase + X86::AddrIndexReg).isReg() && 316 Inst.getOperand(AddrBase + X86::AddrSegmentReg).isReg() && 317 (Inst.getOperand(AddrOp).isExpr() || 318 Inst.getOperand(AddrOp).isImm()) && 319 "Unexpected instruction!"); 320 321 // Check whether the destination register can be fixed. 322 unsigned Reg = Inst.getOperand(RegOp).getReg(); 323 if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX) 324 return; 325 326 // Check whether this is an absolute address. 327 // FIXME: We know TLVP symbol refs aren't, but there should be a better way 328 // to do this here. 329 bool Absolute = true; 330 if (Inst.getOperand(AddrOp).isExpr()) { 331 const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr(); 332 if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE)) 333 if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP) 334 Absolute = false; 335 } 336 337 if (Absolute && 338 (Inst.getOperand(AddrBase + X86::AddrBaseReg).getReg() != 0 || 339 Inst.getOperand(AddrBase + X86::AddrScaleAmt).getImm() != 1 || 340 Inst.getOperand(AddrBase + X86::AddrIndexReg).getReg() != 0)) 341 return; 342 343 // If so, rewrite the instruction. 344 MCOperand Saved = Inst.getOperand(AddrOp); 345 MCOperand Seg = Inst.getOperand(AddrBase + X86::AddrSegmentReg); 346 Inst = MCInst(); 347 Inst.setOpcode(Opcode); 348 Inst.addOperand(Saved); 349 Inst.addOperand(Seg); 350 } 351 352 static unsigned getRetOpcode(const X86Subtarget &Subtarget) { 353 return Subtarget.is64Bit() ? X86::RETQ : X86::RETL; 354 } 355 356 Optional<MCOperand> 357 X86MCInstLower::LowerMachineOperand(const MachineInstr *MI, 358 const MachineOperand &MO) const { 359 switch (MO.getType()) { 360 default: 361 MI->dump(); 362 llvm_unreachable("unknown operand type"); 363 case MachineOperand::MO_Register: 364 // Ignore all implicit register operands. 365 if (MO.isImplicit()) 366 return None; 367 return MCOperand::createReg(MO.getReg()); 368 case MachineOperand::MO_Immediate: 369 return MCOperand::createImm(MO.getImm()); 370 case MachineOperand::MO_MachineBasicBlock: 371 case MachineOperand::MO_GlobalAddress: 372 case MachineOperand::MO_ExternalSymbol: 373 return LowerSymbolOperand(MO, GetSymbolFromOperand(MO)); 374 case MachineOperand::MO_MCSymbol: 375 return LowerSymbolOperand(MO, MO.getMCSymbol()); 376 case MachineOperand::MO_JumpTableIndex: 377 return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex())); 378 case MachineOperand::MO_ConstantPoolIndex: 379 return LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex())); 380 case MachineOperand::MO_BlockAddress: 381 return LowerSymbolOperand( 382 MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress())); 383 case MachineOperand::MO_RegisterMask: 384 // Ignore call clobbers. 385 return None; 386 } 387 } 388 389 void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { 390 OutMI.setOpcode(MI->getOpcode()); 391 392 for (const MachineOperand &MO : MI->operands()) 393 if (auto MaybeMCOp = LowerMachineOperand(MI, MO)) 394 OutMI.addOperand(MaybeMCOp.getValue()); 395 396 // Handle a few special cases to eliminate operand modifiers. 397 ReSimplify: 398 switch (OutMI.getOpcode()) { 399 case X86::LEA64_32r: 400 case X86::LEA64r: 401 case X86::LEA16r: 402 case X86::LEA32r: 403 // LEA should have a segment register, but it must be empty. 404 assert(OutMI.getNumOperands() == 1+X86::AddrNumOperands && 405 "Unexpected # of LEA operands"); 406 assert(OutMI.getOperand(1+X86::AddrSegmentReg).getReg() == 0 && 407 "LEA has segment specified!"); 408 break; 409 410 // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B 411 // if one of the registers is extended, but other isn't. 412 case X86::VMOVZPQILo2PQIrr: 413 case X86::VMOVAPDrr: 414 case X86::VMOVAPDYrr: 415 case X86::VMOVAPSrr: 416 case X86::VMOVAPSYrr: 417 case X86::VMOVDQArr: 418 case X86::VMOVDQAYrr: 419 case X86::VMOVDQUrr: 420 case X86::VMOVDQUYrr: 421 case X86::VMOVUPDrr: 422 case X86::VMOVUPDYrr: 423 case X86::VMOVUPSrr: 424 case X86::VMOVUPSYrr: { 425 if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) && 426 X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) { 427 unsigned NewOpc; 428 switch (OutMI.getOpcode()) { 429 default: llvm_unreachable("Invalid opcode"); 430 case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break; 431 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break; 432 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break; 433 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break; 434 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break; 435 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break; 436 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break; 437 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break; 438 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break; 439 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break; 440 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break; 441 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break; 442 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break; 443 } 444 OutMI.setOpcode(NewOpc); 445 } 446 break; 447 } 448 case X86::VMOVSDrr: 449 case X86::VMOVSSrr: { 450 if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) && 451 X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) { 452 unsigned NewOpc; 453 switch (OutMI.getOpcode()) { 454 default: llvm_unreachable("Invalid opcode"); 455 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break; 456 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break; 457 } 458 OutMI.setOpcode(NewOpc); 459 } 460 break; 461 } 462 463 // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have register 464 // inputs modeled as normal uses instead of implicit uses. As such, truncate 465 // off all but the first operand (the callee). FIXME: Change isel. 466 case X86::TAILJMPr64: 467 case X86::TAILJMPr64_REX: 468 case X86::CALL64r: 469 case X86::CALL64pcrel32: { 470 unsigned Opcode = OutMI.getOpcode(); 471 MCOperand Saved = OutMI.getOperand(0); 472 OutMI = MCInst(); 473 OutMI.setOpcode(Opcode); 474 OutMI.addOperand(Saved); 475 break; 476 } 477 478 case X86::EH_RETURN: 479 case X86::EH_RETURN64: { 480 OutMI = MCInst(); 481 OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget())); 482 break; 483 } 484 485 case X86::CLEANUPRET: { 486 // Replace CATCHRET with the appropriate RET. 487 OutMI = MCInst(); 488 OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget())); 489 break; 490 } 491 492 case X86::CATCHRET: { 493 // Replace CATCHRET with the appropriate RET. 494 const X86Subtarget &Subtarget = AsmPrinter.getSubtarget(); 495 unsigned ReturnReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX; 496 OutMI = MCInst(); 497 OutMI.setOpcode(getRetOpcode(Subtarget)); 498 OutMI.addOperand(MCOperand::createReg(ReturnReg)); 499 break; 500 } 501 502 // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions. 503 case X86::TAILJMPr: 504 case X86::TAILJMPd: 505 case X86::TAILJMPd64: { 506 unsigned Opcode; 507 switch (OutMI.getOpcode()) { 508 default: llvm_unreachable("Invalid opcode"); 509 case X86::TAILJMPr: Opcode = X86::JMP32r; break; 510 case X86::TAILJMPd: 511 case X86::TAILJMPd64: Opcode = X86::JMP_1; break; 512 } 513 514 MCOperand Saved = OutMI.getOperand(0); 515 OutMI = MCInst(); 516 OutMI.setOpcode(Opcode); 517 OutMI.addOperand(Saved); 518 break; 519 } 520 521 case X86::DEC16r: 522 case X86::DEC32r: 523 case X86::INC16r: 524 case X86::INC32r: 525 // If we aren't in 64-bit mode we can use the 1-byte inc/dec instructions. 526 if (!AsmPrinter.getSubtarget().is64Bit()) { 527 unsigned Opcode; 528 switch (OutMI.getOpcode()) { 529 default: llvm_unreachable("Invalid opcode"); 530 case X86::DEC16r: Opcode = X86::DEC16r_alt; break; 531 case X86::DEC32r: Opcode = X86::DEC32r_alt; break; 532 case X86::INC16r: Opcode = X86::INC16r_alt; break; 533 case X86::INC32r: Opcode = X86::INC32r_alt; break; 534 } 535 OutMI.setOpcode(Opcode); 536 } 537 break; 538 539 // These are pseudo-ops for OR to help with the OR->ADD transformation. We do 540 // this with an ugly goto in case the resultant OR uses EAX and needs the 541 // short form. 542 case X86::ADD16rr_DB: OutMI.setOpcode(X86::OR16rr); goto ReSimplify; 543 case X86::ADD32rr_DB: OutMI.setOpcode(X86::OR32rr); goto ReSimplify; 544 case X86::ADD64rr_DB: OutMI.setOpcode(X86::OR64rr); goto ReSimplify; 545 case X86::ADD16ri_DB: OutMI.setOpcode(X86::OR16ri); goto ReSimplify; 546 case X86::ADD32ri_DB: OutMI.setOpcode(X86::OR32ri); goto ReSimplify; 547 case X86::ADD64ri32_DB: OutMI.setOpcode(X86::OR64ri32); goto ReSimplify; 548 case X86::ADD16ri8_DB: OutMI.setOpcode(X86::OR16ri8); goto ReSimplify; 549 case X86::ADD32ri8_DB: OutMI.setOpcode(X86::OR32ri8); goto ReSimplify; 550 case X86::ADD64ri8_DB: OutMI.setOpcode(X86::OR64ri8); goto ReSimplify; 551 552 // Atomic load and store require a separate pseudo-inst because Acquire 553 // implies mayStore and Release implies mayLoad; fix these to regular MOV 554 // instructions here 555 case X86::ACQUIRE_MOV8rm: OutMI.setOpcode(X86::MOV8rm); goto ReSimplify; 556 case X86::ACQUIRE_MOV16rm: OutMI.setOpcode(X86::MOV16rm); goto ReSimplify; 557 case X86::ACQUIRE_MOV32rm: OutMI.setOpcode(X86::MOV32rm); goto ReSimplify; 558 case X86::ACQUIRE_MOV64rm: OutMI.setOpcode(X86::MOV64rm); goto ReSimplify; 559 case X86::RELEASE_MOV8mr: OutMI.setOpcode(X86::MOV8mr); goto ReSimplify; 560 case X86::RELEASE_MOV16mr: OutMI.setOpcode(X86::MOV16mr); goto ReSimplify; 561 case X86::RELEASE_MOV32mr: OutMI.setOpcode(X86::MOV32mr); goto ReSimplify; 562 case X86::RELEASE_MOV64mr: OutMI.setOpcode(X86::MOV64mr); goto ReSimplify; 563 case X86::RELEASE_MOV8mi: OutMI.setOpcode(X86::MOV8mi); goto ReSimplify; 564 case X86::RELEASE_MOV16mi: OutMI.setOpcode(X86::MOV16mi); goto ReSimplify; 565 case X86::RELEASE_MOV32mi: OutMI.setOpcode(X86::MOV32mi); goto ReSimplify; 566 case X86::RELEASE_MOV64mi32: OutMI.setOpcode(X86::MOV64mi32); goto ReSimplify; 567 case X86::RELEASE_ADD8mi: OutMI.setOpcode(X86::ADD8mi); goto ReSimplify; 568 case X86::RELEASE_ADD8mr: OutMI.setOpcode(X86::ADD8mr); goto ReSimplify; 569 case X86::RELEASE_ADD32mi: OutMI.setOpcode(X86::ADD32mi); goto ReSimplify; 570 case X86::RELEASE_ADD32mr: OutMI.setOpcode(X86::ADD32mr); goto ReSimplify; 571 case X86::RELEASE_ADD64mi32: OutMI.setOpcode(X86::ADD64mi32); goto ReSimplify; 572 case X86::RELEASE_ADD64mr: OutMI.setOpcode(X86::ADD64mr); goto ReSimplify; 573 case X86::RELEASE_AND8mi: OutMI.setOpcode(X86::AND8mi); goto ReSimplify; 574 case X86::RELEASE_AND8mr: OutMI.setOpcode(X86::AND8mr); goto ReSimplify; 575 case X86::RELEASE_AND32mi: OutMI.setOpcode(X86::AND32mi); goto ReSimplify; 576 case X86::RELEASE_AND32mr: OutMI.setOpcode(X86::AND32mr); goto ReSimplify; 577 case X86::RELEASE_AND64mi32: OutMI.setOpcode(X86::AND64mi32); goto ReSimplify; 578 case X86::RELEASE_AND64mr: OutMI.setOpcode(X86::AND64mr); goto ReSimplify; 579 case X86::RELEASE_OR8mi: OutMI.setOpcode(X86::OR8mi); goto ReSimplify; 580 case X86::RELEASE_OR8mr: OutMI.setOpcode(X86::OR8mr); goto ReSimplify; 581 case X86::RELEASE_OR32mi: OutMI.setOpcode(X86::OR32mi); goto ReSimplify; 582 case X86::RELEASE_OR32mr: OutMI.setOpcode(X86::OR32mr); goto ReSimplify; 583 case X86::RELEASE_OR64mi32: OutMI.setOpcode(X86::OR64mi32); goto ReSimplify; 584 case X86::RELEASE_OR64mr: OutMI.setOpcode(X86::OR64mr); goto ReSimplify; 585 case X86::RELEASE_XOR8mi: OutMI.setOpcode(X86::XOR8mi); goto ReSimplify; 586 case X86::RELEASE_XOR8mr: OutMI.setOpcode(X86::XOR8mr); goto ReSimplify; 587 case X86::RELEASE_XOR32mi: OutMI.setOpcode(X86::XOR32mi); goto ReSimplify; 588 case X86::RELEASE_XOR32mr: OutMI.setOpcode(X86::XOR32mr); goto ReSimplify; 589 case X86::RELEASE_XOR64mi32: OutMI.setOpcode(X86::XOR64mi32); goto ReSimplify; 590 case X86::RELEASE_XOR64mr: OutMI.setOpcode(X86::XOR64mr); goto ReSimplify; 591 case X86::RELEASE_INC8m: OutMI.setOpcode(X86::INC8m); goto ReSimplify; 592 case X86::RELEASE_INC16m: OutMI.setOpcode(X86::INC16m); goto ReSimplify; 593 case X86::RELEASE_INC32m: OutMI.setOpcode(X86::INC32m); goto ReSimplify; 594 case X86::RELEASE_INC64m: OutMI.setOpcode(X86::INC64m); goto ReSimplify; 595 case X86::RELEASE_DEC8m: OutMI.setOpcode(X86::DEC8m); goto ReSimplify; 596 case X86::RELEASE_DEC16m: OutMI.setOpcode(X86::DEC16m); goto ReSimplify; 597 case X86::RELEASE_DEC32m: OutMI.setOpcode(X86::DEC32m); goto ReSimplify; 598 case X86::RELEASE_DEC64m: OutMI.setOpcode(X86::DEC64m); goto ReSimplify; 599 600 // We don't currently select the correct instruction form for instructions 601 // which have a short %eax, etc. form. Handle this by custom lowering, for 602 // now. 603 // 604 // Note, we are currently not handling the following instructions: 605 // MOV64ao8, MOV64o8a 606 // XCHG16ar, XCHG32ar, XCHG64ar 607 case X86::MOV8mr_NOREX: 608 case X86::MOV8mr: 609 case X86::MOV8rm_NOREX: 610 case X86::MOV8rm: 611 case X86::MOV16mr: 612 case X86::MOV16rm: 613 case X86::MOV32mr: 614 case X86::MOV32rm: { 615 unsigned NewOpc; 616 switch (OutMI.getOpcode()) { 617 default: llvm_unreachable("Invalid opcode"); 618 case X86::MOV8mr_NOREX: 619 case X86::MOV8mr: NewOpc = X86::MOV8o32a; break; 620 case X86::MOV8rm_NOREX: 621 case X86::MOV8rm: NewOpc = X86::MOV8ao32; break; 622 case X86::MOV16mr: NewOpc = X86::MOV16o32a; break; 623 case X86::MOV16rm: NewOpc = X86::MOV16ao32; break; 624 case X86::MOV32mr: NewOpc = X86::MOV32o32a; break; 625 case X86::MOV32rm: NewOpc = X86::MOV32ao32; break; 626 } 627 SimplifyShortMoveForm(AsmPrinter, OutMI, NewOpc); 628 break; 629 } 630 631 case X86::ADC8ri: case X86::ADC16ri: case X86::ADC32ri: case X86::ADC64ri32: 632 case X86::ADD8ri: case X86::ADD16ri: case X86::ADD32ri: case X86::ADD64ri32: 633 case X86::AND8ri: case X86::AND16ri: case X86::AND32ri: case X86::AND64ri32: 634 case X86::CMP8ri: case X86::CMP16ri: case X86::CMP32ri: case X86::CMP64ri32: 635 case X86::OR8ri: case X86::OR16ri: case X86::OR32ri: case X86::OR64ri32: 636 case X86::SBB8ri: case X86::SBB16ri: case X86::SBB32ri: case X86::SBB64ri32: 637 case X86::SUB8ri: case X86::SUB16ri: case X86::SUB32ri: case X86::SUB64ri32: 638 case X86::TEST8ri:case X86::TEST16ri:case X86::TEST32ri:case X86::TEST64ri32: 639 case X86::XOR8ri: case X86::XOR16ri: case X86::XOR32ri: case X86::XOR64ri32: { 640 unsigned NewOpc; 641 switch (OutMI.getOpcode()) { 642 default: llvm_unreachable("Invalid opcode"); 643 case X86::ADC8ri: NewOpc = X86::ADC8i8; break; 644 case X86::ADC16ri: NewOpc = X86::ADC16i16; break; 645 case X86::ADC32ri: NewOpc = X86::ADC32i32; break; 646 case X86::ADC64ri32: NewOpc = X86::ADC64i32; break; 647 case X86::ADD8ri: NewOpc = X86::ADD8i8; break; 648 case X86::ADD16ri: NewOpc = X86::ADD16i16; break; 649 case X86::ADD32ri: NewOpc = X86::ADD32i32; break; 650 case X86::ADD64ri32: NewOpc = X86::ADD64i32; break; 651 case X86::AND8ri: NewOpc = X86::AND8i8; break; 652 case X86::AND16ri: NewOpc = X86::AND16i16; break; 653 case X86::AND32ri: NewOpc = X86::AND32i32; break; 654 case X86::AND64ri32: NewOpc = X86::AND64i32; break; 655 case X86::CMP8ri: NewOpc = X86::CMP8i8; break; 656 case X86::CMP16ri: NewOpc = X86::CMP16i16; break; 657 case X86::CMP32ri: NewOpc = X86::CMP32i32; break; 658 case X86::CMP64ri32: NewOpc = X86::CMP64i32; break; 659 case X86::OR8ri: NewOpc = X86::OR8i8; break; 660 case X86::OR16ri: NewOpc = X86::OR16i16; break; 661 case X86::OR32ri: NewOpc = X86::OR32i32; break; 662 case X86::OR64ri32: NewOpc = X86::OR64i32; break; 663 case X86::SBB8ri: NewOpc = X86::SBB8i8; break; 664 case X86::SBB16ri: NewOpc = X86::SBB16i16; break; 665 case X86::SBB32ri: NewOpc = X86::SBB32i32; break; 666 case X86::SBB64ri32: NewOpc = X86::SBB64i32; break; 667 case X86::SUB8ri: NewOpc = X86::SUB8i8; break; 668 case X86::SUB16ri: NewOpc = X86::SUB16i16; break; 669 case X86::SUB32ri: NewOpc = X86::SUB32i32; break; 670 case X86::SUB64ri32: NewOpc = X86::SUB64i32; break; 671 case X86::TEST8ri: NewOpc = X86::TEST8i8; break; 672 case X86::TEST16ri: NewOpc = X86::TEST16i16; break; 673 case X86::TEST32ri: NewOpc = X86::TEST32i32; break; 674 case X86::TEST64ri32: NewOpc = X86::TEST64i32; break; 675 case X86::XOR8ri: NewOpc = X86::XOR8i8; break; 676 case X86::XOR16ri: NewOpc = X86::XOR16i16; break; 677 case X86::XOR32ri: NewOpc = X86::XOR32i32; break; 678 case X86::XOR64ri32: NewOpc = X86::XOR64i32; break; 679 } 680 SimplifyShortImmForm(OutMI, NewOpc); 681 break; 682 } 683 684 // Try to shrink some forms of movsx. 685 case X86::MOVSX16rr8: 686 case X86::MOVSX32rr16: 687 case X86::MOVSX64rr32: 688 SimplifyMOVSX(OutMI); 689 break; 690 } 691 } 692 693 void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, 694 const MachineInstr &MI) { 695 696 bool is64Bits = MI.getOpcode() == X86::TLS_addr64 || 697 MI.getOpcode() == X86::TLS_base_addr64; 698 699 bool needsPadding = MI.getOpcode() == X86::TLS_addr64; 700 701 MCContext &context = OutStreamer->getContext(); 702 703 if (needsPadding) 704 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); 705 706 MCSymbolRefExpr::VariantKind SRVK; 707 switch (MI.getOpcode()) { 708 case X86::TLS_addr32: 709 case X86::TLS_addr64: 710 SRVK = MCSymbolRefExpr::VK_TLSGD; 711 break; 712 case X86::TLS_base_addr32: 713 SRVK = MCSymbolRefExpr::VK_TLSLDM; 714 break; 715 case X86::TLS_base_addr64: 716 SRVK = MCSymbolRefExpr::VK_TLSLD; 717 break; 718 default: 719 llvm_unreachable("unexpected opcode"); 720 } 721 722 MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)); 723 const MCSymbolRefExpr *symRef = MCSymbolRefExpr::create(sym, SRVK, context); 724 725 MCInst LEA; 726 if (is64Bits) { 727 LEA.setOpcode(X86::LEA64r); 728 LEA.addOperand(MCOperand::createReg(X86::RDI)); // dest 729 LEA.addOperand(MCOperand::createReg(X86::RIP)); // base 730 LEA.addOperand(MCOperand::createImm(1)); // scale 731 LEA.addOperand(MCOperand::createReg(0)); // index 732 LEA.addOperand(MCOperand::createExpr(symRef)); // disp 733 LEA.addOperand(MCOperand::createReg(0)); // seg 734 } else if (SRVK == MCSymbolRefExpr::VK_TLSLDM) { 735 LEA.setOpcode(X86::LEA32r); 736 LEA.addOperand(MCOperand::createReg(X86::EAX)); // dest 737 LEA.addOperand(MCOperand::createReg(X86::EBX)); // base 738 LEA.addOperand(MCOperand::createImm(1)); // scale 739 LEA.addOperand(MCOperand::createReg(0)); // index 740 LEA.addOperand(MCOperand::createExpr(symRef)); // disp 741 LEA.addOperand(MCOperand::createReg(0)); // seg 742 } else { 743 LEA.setOpcode(X86::LEA32r); 744 LEA.addOperand(MCOperand::createReg(X86::EAX)); // dest 745 LEA.addOperand(MCOperand::createReg(0)); // base 746 LEA.addOperand(MCOperand::createImm(1)); // scale 747 LEA.addOperand(MCOperand::createReg(X86::EBX)); // index 748 LEA.addOperand(MCOperand::createExpr(symRef)); // disp 749 LEA.addOperand(MCOperand::createReg(0)); // seg 750 } 751 EmitAndCountInstruction(LEA); 752 753 if (needsPadding) { 754 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); 755 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); 756 EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX)); 757 } 758 759 StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr"; 760 MCSymbol *tlsGetAddr = context.getOrCreateSymbol(name); 761 const MCSymbolRefExpr *tlsRef = 762 MCSymbolRefExpr::create(tlsGetAddr, 763 MCSymbolRefExpr::VK_PLT, 764 context); 765 766 EmitAndCountInstruction(MCInstBuilder(is64Bits ? X86::CALL64pcrel32 767 : X86::CALLpcrel32) 768 .addExpr(tlsRef)); 769 } 770 771 /// \brief Emit the largest nop instruction smaller than or equal to \p NumBytes 772 /// bytes. Return the size of nop emitted. 773 static unsigned EmitNop(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, 774 const MCSubtargetInfo &STI) { 775 // This works only for 64bit. For 32bit we have to do additional checking if 776 // the CPU supports multi-byte nops. 777 assert(Is64Bit && "EmitNops only supports X86-64"); 778 779 unsigned NopSize; 780 unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg; 781 Opc = IndexReg = Displacement = SegmentReg = 0; 782 BaseReg = X86::RAX; 783 ScaleVal = 1; 784 switch (NumBytes) { 785 case 0: llvm_unreachable("Zero nops?"); break; 786 case 1: NopSize = 1; Opc = X86::NOOP; break; 787 case 2: NopSize = 2; Opc = X86::XCHG16ar; break; 788 case 3: NopSize = 3; Opc = X86::NOOPL; break; 789 case 4: NopSize = 4; Opc = X86::NOOPL; Displacement = 8; break; 790 case 5: NopSize = 5; Opc = X86::NOOPL; Displacement = 8; 791 IndexReg = X86::RAX; break; 792 case 6: NopSize = 6; Opc = X86::NOOPW; Displacement = 8; 793 IndexReg = X86::RAX; break; 794 case 7: NopSize = 7; Opc = X86::NOOPL; Displacement = 512; break; 795 case 8: NopSize = 8; Opc = X86::NOOPL; Displacement = 512; 796 IndexReg = X86::RAX; break; 797 case 9: NopSize = 9; Opc = X86::NOOPW; Displacement = 512; 798 IndexReg = X86::RAX; break; 799 default: NopSize = 10; Opc = X86::NOOPW; Displacement = 512; 800 IndexReg = X86::RAX; SegmentReg = X86::CS; break; 801 } 802 803 unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U); 804 NopSize += NumPrefixes; 805 for (unsigned i = 0; i != NumPrefixes; ++i) 806 OS.EmitBytes("\x66"); 807 808 switch (Opc) { 809 default: 810 llvm_unreachable("Unexpected opcode"); 811 break; 812 case X86::NOOP: 813 OS.EmitInstruction(MCInstBuilder(Opc), STI); 814 break; 815 case X86::XCHG16ar: 816 OS.EmitInstruction(MCInstBuilder(Opc).addReg(X86::AX), STI); 817 break; 818 case X86::NOOPL: 819 case X86::NOOPW: 820 OS.EmitInstruction(MCInstBuilder(Opc) 821 .addReg(BaseReg) 822 .addImm(ScaleVal) 823 .addReg(IndexReg) 824 .addImm(Displacement) 825 .addReg(SegmentReg), 826 STI); 827 break; 828 } 829 assert(NopSize <= NumBytes && "We overemitted?"); 830 return NopSize; 831 } 832 833 /// \brief Emit the optimal amount of multi-byte nops on X86. 834 static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, 835 const MCSubtargetInfo &STI) { 836 unsigned NopsToEmit = NumBytes; 837 (void)NopsToEmit; 838 while (NumBytes) { 839 NumBytes -= EmitNop(OS, NumBytes, Is64Bit, STI); 840 assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!"); 841 } 842 } 843 844 void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, 845 X86MCInstLower &MCIL) { 846 assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64"); 847 848 StatepointOpers SOpers(&MI); 849 if (unsigned PatchBytes = SOpers.getNumPatchBytes()) { 850 EmitNops(*OutStreamer, PatchBytes, Subtarget->is64Bit(), 851 getSubtargetInfo()); 852 } else { 853 // Lower call target and choose correct opcode 854 const MachineOperand &CallTarget = SOpers.getCallTarget(); 855 MCOperand CallTargetMCOp; 856 unsigned CallOpcode; 857 switch (CallTarget.getType()) { 858 case MachineOperand::MO_GlobalAddress: 859 case MachineOperand::MO_ExternalSymbol: 860 CallTargetMCOp = MCIL.LowerSymbolOperand( 861 CallTarget, MCIL.GetSymbolFromOperand(CallTarget)); 862 CallOpcode = X86::CALL64pcrel32; 863 // Currently, we only support relative addressing with statepoints. 864 // Otherwise, we'll need a scratch register to hold the target 865 // address. You'll fail asserts during load & relocation if this 866 // symbol is to far away. (TODO: support non-relative addressing) 867 break; 868 case MachineOperand::MO_Immediate: 869 CallTargetMCOp = MCOperand::createImm(CallTarget.getImm()); 870 CallOpcode = X86::CALL64pcrel32; 871 // Currently, we only support relative addressing with statepoints. 872 // Otherwise, we'll need a scratch register to hold the target 873 // immediate. You'll fail asserts during load & relocation if this 874 // address is to far away. (TODO: support non-relative addressing) 875 break; 876 case MachineOperand::MO_Register: 877 CallTargetMCOp = MCOperand::createReg(CallTarget.getReg()); 878 CallOpcode = X86::CALL64r; 879 break; 880 default: 881 llvm_unreachable("Unsupported operand type in statepoint call target"); 882 break; 883 } 884 885 // Emit call 886 MCInst CallInst; 887 CallInst.setOpcode(CallOpcode); 888 CallInst.addOperand(CallTargetMCOp); 889 OutStreamer->EmitInstruction(CallInst, getSubtargetInfo()); 890 } 891 892 // Record our statepoint node in the same section used by STACKMAP 893 // and PATCHPOINT 894 SM.recordStatepoint(MI); 895 } 896 897 void X86AsmPrinter::LowerFAULTING_LOAD_OP(const MachineInstr &MI, 898 X86MCInstLower &MCIL) { 899 // FAULTING_LOAD_OP <def>, <MBB handler>, <load opcode>, <load operands> 900 901 unsigned LoadDefRegister = MI.getOperand(0).getReg(); 902 MCSymbol *HandlerLabel = MI.getOperand(1).getMBB()->getSymbol(); 903 unsigned LoadOpcode = MI.getOperand(2).getImm(); 904 unsigned LoadOperandsBeginIdx = 3; 905 906 FM.recordFaultingOp(FaultMaps::FaultingLoad, HandlerLabel); 907 908 MCInst LoadMI; 909 LoadMI.setOpcode(LoadOpcode); 910 911 if (LoadDefRegister != X86::NoRegister) 912 LoadMI.addOperand(MCOperand::createReg(LoadDefRegister)); 913 914 for (auto I = MI.operands_begin() + LoadOperandsBeginIdx, 915 E = MI.operands_end(); 916 I != E; ++I) 917 if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, *I)) 918 LoadMI.addOperand(MaybeOperand.getValue()); 919 920 OutStreamer->EmitInstruction(LoadMI, getSubtargetInfo()); 921 } 922 923 void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, 924 X86MCInstLower &MCIL) { 925 // PATCHABLE_OP minsize, opcode, operands 926 927 unsigned MinSize = MI.getOperand(0).getImm(); 928 unsigned Opcode = MI.getOperand(1).getImm(); 929 930 MCInst MCI; 931 MCI.setOpcode(Opcode); 932 for (auto &MO : make_range(MI.operands_begin() + 2, MI.operands_end())) 933 if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO)) 934 MCI.addOperand(MaybeOperand.getValue()); 935 936 SmallString<256> Code; 937 SmallVector<MCFixup, 4> Fixups; 938 raw_svector_ostream VecOS(Code); 939 CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo()); 940 941 if (Code.size() < MinSize) { 942 if (MinSize == 2 && Opcode == X86::PUSH64r) { 943 // This is an optimization that lets us get away without emitting a nop in 944 // many cases. 945 // 946 // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %R9) takes two 947 // bytes too, so the check on MinSize is important. 948 MCI.setOpcode(X86::PUSH64rmr); 949 } else { 950 unsigned NopSize = EmitNop(*OutStreamer, MinSize, Subtarget->is64Bit(), 951 getSubtargetInfo()); 952 assert(NopSize == MinSize && "Could not implement MinSize!"); 953 (void) NopSize; 954 } 955 } 956 957 OutStreamer->EmitInstruction(MCI, getSubtargetInfo()); 958 } 959 960 // Lower a stackmap of the form: 961 // <id>, <shadowBytes>, ... 962 void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { 963 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); 964 SM.recordStackMap(MI); 965 unsigned NumShadowBytes = MI.getOperand(1).getImm(); 966 SMShadowTracker.reset(NumShadowBytes); 967 } 968 969 // Lower a patchpoint of the form: 970 // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ... 971 void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, 972 X86MCInstLower &MCIL) { 973 assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64"); 974 975 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); 976 977 SM.recordPatchPoint(MI); 978 979 PatchPointOpers opers(&MI); 980 unsigned ScratchIdx = opers.getNextScratchIdx(); 981 unsigned EncodedBytes = 0; 982 const MachineOperand &CalleeMO = 983 opers.getMetaOper(PatchPointOpers::TargetPos); 984 985 // Check for null target. If target is non-null (i.e. is non-zero or is 986 // symbolic) then emit a call. 987 if (!(CalleeMO.isImm() && !CalleeMO.getImm())) { 988 MCOperand CalleeMCOp; 989 switch (CalleeMO.getType()) { 990 default: 991 /// FIXME: Add a verifier check for bad callee types. 992 llvm_unreachable("Unrecognized callee operand type."); 993 case MachineOperand::MO_Immediate: 994 if (CalleeMO.getImm()) 995 CalleeMCOp = MCOperand::createImm(CalleeMO.getImm()); 996 break; 997 case MachineOperand::MO_ExternalSymbol: 998 case MachineOperand::MO_GlobalAddress: 999 CalleeMCOp = 1000 MCIL.LowerSymbolOperand(CalleeMO, 1001 MCIL.GetSymbolFromOperand(CalleeMO)); 1002 break; 1003 } 1004 1005 // Emit MOV to materialize the target address and the CALL to target. 1006 // This is encoded with 12-13 bytes, depending on which register is used. 1007 unsigned ScratchReg = MI.getOperand(ScratchIdx).getReg(); 1008 if (X86II::isX86_64ExtendedReg(ScratchReg)) 1009 EncodedBytes = 13; 1010 else 1011 EncodedBytes = 12; 1012 1013 EmitAndCountInstruction( 1014 MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp)); 1015 EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg)); 1016 } 1017 1018 // Emit padding. 1019 unsigned NumBytes = opers.getMetaOper(PatchPointOpers::NBytesPos).getImm(); 1020 assert(NumBytes >= EncodedBytes && 1021 "Patchpoint can't request size less than the length of a call."); 1022 1023 EmitNops(*OutStreamer, NumBytes - EncodedBytes, Subtarget->is64Bit(), 1024 getSubtargetInfo()); 1025 } 1026 1027 void X86AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI, 1028 SledKind Kind) { 1029 auto Fn = MI.getParent()->getParent()->getFunction(); 1030 auto Attr = Fn->getFnAttribute("function-instrument"); 1031 bool AlwaysInstrument = 1032 Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always"; 1033 Sleds.emplace_back( 1034 XRayFunctionEntry{Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn}); 1035 } 1036 1037 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, 1038 X86MCInstLower &MCIL) { 1039 // We want to emit the following pattern: 1040 // 1041 // .Lxray_sled_N: 1042 // .palign 2, ... 1043 // jmp .tmpN 1044 // # 9 bytes worth of noops 1045 // .tmpN 1046 // 1047 // We need the 9 bytes because at runtime, we'd be patching over the full 11 1048 // bytes with the following pattern: 1049 // 1050 // mov %r10, <function id, 32-bit> // 6 bytes 1051 // call <relative offset, 32-bits> // 5 bytes 1052 // 1053 auto CurSled = OutContext.createTempSymbol("xray_sled_", true); 1054 OutStreamer->EmitLabel(CurSled); 1055 OutStreamer->EmitCodeAlignment(4); 1056 auto Target = OutContext.createTempSymbol(); 1057 1058 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as 1059 // an operand (computed as an offset from the jmp instruction). 1060 // FIXME: Find another less hacky way do force the relative jump. 1061 OutStreamer->EmitBytes("\xeb\x09"); 1062 EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo()); 1063 OutStreamer->EmitLabel(Target); 1064 recordSled(CurSled, MI, SledKind::FUNCTION_ENTER); 1065 } 1066 1067 void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI, 1068 X86MCInstLower &MCIL) { 1069 // Since PATCHABLE_RET takes the opcode of the return statement as an 1070 // argument, we use that to emit the correct form of the RET that we want. 1071 // i.e. when we see this: 1072 // 1073 // PATCHABLE_RET X86::RET ... 1074 // 1075 // We should emit the RET followed by sleds. 1076 // 1077 // .Lxray_sled_N: 1078 // ret # or equivalent instruction 1079 // # 10 bytes worth of noops 1080 // 1081 // This just makes sure that the alignment for the next instruction is 2. 1082 auto CurSled = OutContext.createTempSymbol("xray_sled_", true); 1083 OutStreamer->EmitLabel(CurSled); 1084 unsigned OpCode = MI.getOperand(0).getImm(); 1085 MCInst Ret; 1086 Ret.setOpcode(OpCode); 1087 for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end())) 1088 if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO)) 1089 Ret.addOperand(MaybeOperand.getValue()); 1090 OutStreamer->EmitInstruction(Ret, getSubtargetInfo()); 1091 EmitNops(*OutStreamer, 10, Subtarget->is64Bit(), getSubtargetInfo()); 1092 recordSled(CurSled, MI, SledKind::FUNCTION_EXIT); 1093 } 1094 1095 void X86AsmPrinter::EmitXRayTable() { 1096 if (Sleds.empty()) 1097 return; 1098 if (Subtarget->isTargetELF()) { 1099 auto *Section = OutContext.getELFSection( 1100 "xray_instr_map", ELF::SHT_PROGBITS, 1101 ELF::SHF_ALLOC | ELF::SHF_GROUP | ELF::SHF_MERGE, 0, 1102 CurrentFnSym->getName()); 1103 auto PrevSection = OutStreamer->getCurrentSectionOnly(); 1104 OutStreamer->SwitchSection(Section); 1105 for (const auto &Sled : Sleds) { 1106 OutStreamer->EmitSymbolValue(Sled.Sled, 8); 1107 OutStreamer->EmitSymbolValue(CurrentFnSym, 8); 1108 auto Kind = static_cast<uint8_t>(Sled.Kind); 1109 OutStreamer->EmitBytes( 1110 StringRef(reinterpret_cast<const char *>(&Kind), 1)); 1111 OutStreamer->EmitBytes( 1112 StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1)); 1113 OutStreamer->EmitZeros(14); 1114 } 1115 OutStreamer->SwitchSection(PrevSection); 1116 } 1117 Sleds.clear(); 1118 } 1119 1120 // Returns instruction preceding MBBI in MachineFunction. 1121 // If MBBI is the first instruction of the first basic block, returns null. 1122 static MachineBasicBlock::const_iterator 1123 PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) { 1124 const MachineBasicBlock *MBB = MBBI->getParent(); 1125 while (MBBI == MBB->begin()) { 1126 if (MBB == &MBB->getParent()->front()) 1127 return MachineBasicBlock::const_iterator(); 1128 MBB = MBB->getPrevNode(); 1129 MBBI = MBB->end(); 1130 } 1131 return --MBBI; 1132 } 1133 1134 static const Constant *getConstantFromPool(const MachineInstr &MI, 1135 const MachineOperand &Op) { 1136 if (!Op.isCPI()) 1137 return nullptr; 1138 1139 ArrayRef<MachineConstantPoolEntry> Constants = 1140 MI.getParent()->getParent()->getConstantPool()->getConstants(); 1141 const MachineConstantPoolEntry &ConstantEntry = 1142 Constants[Op.getIndex()]; 1143 1144 // Bail if this is a machine constant pool entry, we won't be able to dig out 1145 // anything useful. 1146 if (ConstantEntry.isMachineConstantPoolEntry()) 1147 return nullptr; 1148 1149 auto *C = dyn_cast<Constant>(ConstantEntry.Val.ConstVal); 1150 assert((!C || ConstantEntry.getType() == C->getType()) && 1151 "Expected a constant of the same type!"); 1152 return C; 1153 } 1154 1155 static std::string getShuffleComment(const MachineOperand &DstOp, 1156 const MachineOperand &SrcOp1, 1157 const MachineOperand &SrcOp2, 1158 ArrayRef<int> Mask) { 1159 std::string Comment; 1160 1161 // Compute the name for a register. This is really goofy because we have 1162 // multiple instruction printers that could (in theory) use different 1163 // names. Fortunately most people use the ATT style (outside of Windows) 1164 // and they actually agree on register naming here. Ultimately, this is 1165 // a comment, and so its OK if it isn't perfect. 1166 auto GetRegisterName = [](unsigned RegNum) -> StringRef { 1167 return X86ATTInstPrinter::getRegisterName(RegNum); 1168 }; 1169 1170 // TODO: Add support for specifying an AVX512 style mask register in the comment. 1171 StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem"; 1172 StringRef Src1Name = 1173 SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem"; 1174 StringRef Src2Name = 1175 SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg()) : "mem"; 1176 1177 // One source operand, fix the mask to print all elements in one span. 1178 SmallVector<int, 8> ShuffleMask(Mask.begin(), Mask.end()); 1179 if (Src1Name == Src2Name) 1180 for (int i = 0, e = ShuffleMask.size(); i != e; ++i) 1181 if (ShuffleMask[i] >= e) 1182 ShuffleMask[i] -= e; 1183 1184 raw_string_ostream CS(Comment); 1185 CS << DstName << " = "; 1186 for (int i = 0, e = ShuffleMask.size(); i != e; ++i) { 1187 if (i != 0) 1188 CS << ","; 1189 if (ShuffleMask[i] == SM_SentinelZero) { 1190 CS << "zero"; 1191 continue; 1192 } 1193 1194 // Otherwise, it must come from src1 or src2. Print the span of elements 1195 // that comes from this src. 1196 bool isSrc1 = ShuffleMask[i] < (int)e; 1197 CS << (isSrc1 ? Src1Name : Src2Name) << '['; 1198 1199 bool IsFirst = true; 1200 while (i != e && ShuffleMask[i] != SM_SentinelZero && 1201 (ShuffleMask[i] < (int)e) == isSrc1) { 1202 if (!IsFirst) 1203 CS << ','; 1204 else 1205 IsFirst = false; 1206 if (ShuffleMask[i] == SM_SentinelUndef) 1207 CS << "u"; 1208 else 1209 CS << ShuffleMask[i] % (int)e; 1210 ++i; 1211 } 1212 CS << ']'; 1213 --i; // For loop increments element #. 1214 } 1215 CS.flush(); 1216 1217 return Comment; 1218 } 1219 1220 void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { 1221 X86MCInstLower MCInstLowering(*MF, *this); 1222 const X86RegisterInfo *RI = MF->getSubtarget<X86Subtarget>().getRegisterInfo(); 1223 1224 switch (MI->getOpcode()) { 1225 case TargetOpcode::DBG_VALUE: 1226 llvm_unreachable("Should be handled target independently"); 1227 1228 // Emit nothing here but a comment if we can. 1229 case X86::Int_MemBarrier: 1230 OutStreamer->emitRawComment("MEMBARRIER"); 1231 return; 1232 1233 1234 case X86::EH_RETURN: 1235 case X86::EH_RETURN64: { 1236 // Lower these as normal, but add some comments. 1237 unsigned Reg = MI->getOperand(0).getReg(); 1238 OutStreamer->AddComment(StringRef("eh_return, addr: %") + 1239 X86ATTInstPrinter::getRegisterName(Reg)); 1240 break; 1241 } 1242 case X86::CLEANUPRET: { 1243 // Lower these as normal, but add some comments. 1244 OutStreamer->AddComment("CLEANUPRET"); 1245 break; 1246 } 1247 1248 case X86::CATCHRET: { 1249 // Lower these as normal, but add some comments. 1250 OutStreamer->AddComment("CATCHRET"); 1251 break; 1252 } 1253 1254 case X86::TAILJMPr: 1255 case X86::TAILJMPm: 1256 case X86::TAILJMPd: 1257 case X86::TAILJMPr64: 1258 case X86::TAILJMPm64: 1259 case X86::TAILJMPd64: 1260 case X86::TAILJMPr64_REX: 1261 case X86::TAILJMPm64_REX: 1262 case X86::TAILJMPd64_REX: 1263 // Lower these as normal, but add some comments. 1264 OutStreamer->AddComment("TAILCALL"); 1265 break; 1266 1267 case X86::TLS_addr32: 1268 case X86::TLS_addr64: 1269 case X86::TLS_base_addr32: 1270 case X86::TLS_base_addr64: 1271 return LowerTlsAddr(MCInstLowering, *MI); 1272 1273 case X86::MOVPC32r: { 1274 // This is a pseudo op for a two instruction sequence with a label, which 1275 // looks like: 1276 // call "L1$pb" 1277 // "L1$pb": 1278 // popl %esi 1279 1280 // Emit the call. 1281 MCSymbol *PICBase = MF->getPICBaseSymbol(); 1282 // FIXME: We would like an efficient form for this, so we don't have to do a 1283 // lot of extra uniquing. 1284 EmitAndCountInstruction(MCInstBuilder(X86::CALLpcrel32) 1285 .addExpr(MCSymbolRefExpr::create(PICBase, OutContext))); 1286 1287 const X86FrameLowering* FrameLowering = 1288 MF->getSubtarget<X86Subtarget>().getFrameLowering(); 1289 bool hasFP = FrameLowering->hasFP(*MF); 1290 1291 // TODO: This is needed only if we require precise CFA. 1292 bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() && 1293 !OutStreamer->getDwarfFrameInfos().back().End; 1294 1295 int stackGrowth = -RI->getSlotSize(); 1296 1297 if (HasActiveDwarfFrame && !hasFP) { 1298 OutStreamer->EmitCFIAdjustCfaOffset(-stackGrowth); 1299 } 1300 1301 // Emit the label. 1302 OutStreamer->EmitLabel(PICBase); 1303 1304 // popl $reg 1305 EmitAndCountInstruction(MCInstBuilder(X86::POP32r) 1306 .addReg(MI->getOperand(0).getReg())); 1307 1308 if (HasActiveDwarfFrame && !hasFP) { 1309 OutStreamer->EmitCFIAdjustCfaOffset(stackGrowth); 1310 } 1311 return; 1312 } 1313 1314 case X86::ADD32ri: { 1315 // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri. 1316 if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS) 1317 break; 1318 1319 // Okay, we have something like: 1320 // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL) 1321 1322 // For this, we want to print something like: 1323 // MYGLOBAL + (. - PICBASE) 1324 // However, we can't generate a ".", so just emit a new label here and refer 1325 // to it. 1326 MCSymbol *DotSym = OutContext.createTempSymbol(); 1327 OutStreamer->EmitLabel(DotSym); 1328 1329 // Now that we have emitted the label, lower the complex operand expression. 1330 MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2)); 1331 1332 const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext); 1333 const MCExpr *PICBase = 1334 MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext); 1335 DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext); 1336 1337 DotExpr = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(OpSym,OutContext), 1338 DotExpr, OutContext); 1339 1340 EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri) 1341 .addReg(MI->getOperand(0).getReg()) 1342 .addReg(MI->getOperand(1).getReg()) 1343 .addExpr(DotExpr)); 1344 return; 1345 } 1346 case TargetOpcode::STATEPOINT: 1347 return LowerSTATEPOINT(*MI, MCInstLowering); 1348 1349 case TargetOpcode::FAULTING_LOAD_OP: 1350 return LowerFAULTING_LOAD_OP(*MI, MCInstLowering); 1351 1352 case TargetOpcode::PATCHABLE_OP: 1353 return LowerPATCHABLE_OP(*MI, MCInstLowering); 1354 1355 case TargetOpcode::STACKMAP: 1356 return LowerSTACKMAP(*MI); 1357 1358 case TargetOpcode::PATCHPOINT: 1359 return LowerPATCHPOINT(*MI, MCInstLowering); 1360 1361 case TargetOpcode::PATCHABLE_FUNCTION_ENTER: 1362 return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering); 1363 1364 case TargetOpcode::PATCHABLE_RET: 1365 return LowerPATCHABLE_RET(*MI, MCInstLowering); 1366 1367 case X86::MORESTACK_RET: 1368 EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); 1369 return; 1370 1371 case X86::MORESTACK_RET_RESTORE_R10: 1372 // Return, then restore R10. 1373 EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); 1374 EmitAndCountInstruction(MCInstBuilder(X86::MOV64rr) 1375 .addReg(X86::R10) 1376 .addReg(X86::RAX)); 1377 return; 1378 1379 case X86::SEH_PushReg: 1380 OutStreamer->EmitWinCFIPushReg(RI->getSEHRegNum(MI->getOperand(0).getImm())); 1381 return; 1382 1383 case X86::SEH_SaveReg: 1384 OutStreamer->EmitWinCFISaveReg(RI->getSEHRegNum(MI->getOperand(0).getImm()), 1385 MI->getOperand(1).getImm()); 1386 return; 1387 1388 case X86::SEH_SaveXMM: 1389 OutStreamer->EmitWinCFISaveXMM(RI->getSEHRegNum(MI->getOperand(0).getImm()), 1390 MI->getOperand(1).getImm()); 1391 return; 1392 1393 case X86::SEH_StackAlloc: 1394 OutStreamer->EmitWinCFIAllocStack(MI->getOperand(0).getImm()); 1395 return; 1396 1397 case X86::SEH_SetFrame: 1398 OutStreamer->EmitWinCFISetFrame(RI->getSEHRegNum(MI->getOperand(0).getImm()), 1399 MI->getOperand(1).getImm()); 1400 return; 1401 1402 case X86::SEH_PushFrame: 1403 OutStreamer->EmitWinCFIPushFrame(MI->getOperand(0).getImm()); 1404 return; 1405 1406 case X86::SEH_EndPrologue: 1407 OutStreamer->EmitWinCFIEndProlog(); 1408 return; 1409 1410 case X86::SEH_Epilogue: { 1411 MachineBasicBlock::const_iterator MBBI(MI); 1412 // Check if preceded by a call and emit nop if so. 1413 for (MBBI = PrevCrossBBInst(MBBI); 1414 MBBI != MachineBasicBlock::const_iterator(); 1415 MBBI = PrevCrossBBInst(MBBI)) { 1416 // Conservatively assume that pseudo instructions don't emit code and keep 1417 // looking for a call. We may emit an unnecessary nop in some cases. 1418 if (!MBBI->isPseudo()) { 1419 if (MBBI->isCall()) 1420 EmitAndCountInstruction(MCInstBuilder(X86::NOOP)); 1421 break; 1422 } 1423 } 1424 return; 1425 } 1426 1427 // Lower PSHUFB and VPERMILP normally but add a comment if we can find 1428 // a constant shuffle mask. We won't be able to do this at the MC layer 1429 // because the mask isn't an immediate. 1430 case X86::PSHUFBrm: 1431 case X86::VPSHUFBrm: 1432 case X86::VPSHUFBYrm: 1433 case X86::VPSHUFBZ128rm: 1434 case X86::VPSHUFBZ128rmk: 1435 case X86::VPSHUFBZ128rmkz: 1436 case X86::VPSHUFBZ256rm: 1437 case X86::VPSHUFBZ256rmk: 1438 case X86::VPSHUFBZ256rmkz: 1439 case X86::VPSHUFBZrm: 1440 case X86::VPSHUFBZrmk: 1441 case X86::VPSHUFBZrmkz: { 1442 if (!OutStreamer->isVerboseAsm()) 1443 break; 1444 unsigned SrcIdx, MaskIdx; 1445 switch (MI->getOpcode()) { 1446 default: llvm_unreachable("Invalid opcode"); 1447 case X86::PSHUFBrm: 1448 case X86::VPSHUFBrm: 1449 case X86::VPSHUFBYrm: 1450 case X86::VPSHUFBZ128rm: 1451 case X86::VPSHUFBZ256rm: 1452 case X86::VPSHUFBZrm: 1453 SrcIdx = 1; MaskIdx = 5; break; 1454 case X86::VPSHUFBZ128rmkz: 1455 case X86::VPSHUFBZ256rmkz: 1456 case X86::VPSHUFBZrmkz: 1457 SrcIdx = 2; MaskIdx = 6; break; 1458 case X86::VPSHUFBZ128rmk: 1459 case X86::VPSHUFBZ256rmk: 1460 case X86::VPSHUFBZrmk: 1461 SrcIdx = 3; MaskIdx = 7; break; 1462 } 1463 1464 assert(MI->getNumOperands() >= 6 && 1465 "We should always have at least 6 operands!"); 1466 const MachineOperand &DstOp = MI->getOperand(0); 1467 const MachineOperand &SrcOp = MI->getOperand(SrcIdx); 1468 const MachineOperand &MaskOp = MI->getOperand(MaskIdx); 1469 1470 if (auto *C = getConstantFromPool(*MI, MaskOp)) { 1471 SmallVector<int, 16> Mask; 1472 DecodePSHUFBMask(C, Mask); 1473 if (!Mask.empty()) 1474 OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, SrcOp, Mask)); 1475 } 1476 break; 1477 } 1478 1479 case X86::VPERMILPDrm: 1480 case X86::VPERMILPDYrm: 1481 case X86::VPERMILPDZ128rm: 1482 case X86::VPERMILPDZ256rm: 1483 case X86::VPERMILPDZrm: { 1484 if (!OutStreamer->isVerboseAsm()) 1485 break; 1486 assert(MI->getNumOperands() > 5 && 1487 "We should always have at least 5 operands!"); 1488 const MachineOperand &DstOp = MI->getOperand(0); 1489 const MachineOperand &SrcOp = MI->getOperand(1); 1490 const MachineOperand &MaskOp = MI->getOperand(5); 1491 1492 if (auto *C = getConstantFromPool(*MI, MaskOp)) { 1493 SmallVector<int, 8> Mask; 1494 DecodeVPERMILPMask(C, 64, Mask); 1495 if (!Mask.empty()) 1496 OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, SrcOp, Mask)); 1497 } 1498 break; 1499 } 1500 1501 case X86::VPERMILPSrm: 1502 case X86::VPERMILPSYrm: 1503 case X86::VPERMILPSZ128rm: 1504 case X86::VPERMILPSZ256rm: 1505 case X86::VPERMILPSZrm: { 1506 if (!OutStreamer->isVerboseAsm()) 1507 break; 1508 assert(MI->getNumOperands() > 5 && 1509 "We should always have at least 5 operands!"); 1510 const MachineOperand &DstOp = MI->getOperand(0); 1511 const MachineOperand &SrcOp = MI->getOperand(1); 1512 const MachineOperand &MaskOp = MI->getOperand(5); 1513 1514 if (auto *C = getConstantFromPool(*MI, MaskOp)) { 1515 SmallVector<int, 16> Mask; 1516 DecodeVPERMILPMask(C, 32, Mask); 1517 if (!Mask.empty()) 1518 OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, SrcOp, Mask)); 1519 } 1520 break; 1521 } 1522 1523 case X86::VPERMIL2PDrm: 1524 case X86::VPERMIL2PSrm: 1525 case X86::VPERMIL2PDrmY: 1526 case X86::VPERMIL2PSrmY: { 1527 if (!OutStreamer->isVerboseAsm()) 1528 break; 1529 assert(MI->getNumOperands() > 7 && 1530 "We should always have at least 7 operands!"); 1531 const MachineOperand &DstOp = MI->getOperand(0); 1532 const MachineOperand &SrcOp1 = MI->getOperand(1); 1533 const MachineOperand &SrcOp2 = MI->getOperand(2); 1534 const MachineOperand &MaskOp = MI->getOperand(6); 1535 const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1); 1536 1537 if (!CtrlOp.isImm()) 1538 break; 1539 1540 unsigned ElSize; 1541 switch (MI->getOpcode()) { 1542 default: llvm_unreachable("Invalid opcode"); 1543 case X86::VPERMIL2PSrm: case X86::VPERMIL2PSrmY: ElSize = 32; break; 1544 case X86::VPERMIL2PDrm: case X86::VPERMIL2PDrmY: ElSize = 64; break; 1545 } 1546 1547 if (auto *C = getConstantFromPool(*MI, MaskOp)) { 1548 SmallVector<int, 16> Mask; 1549 DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Mask); 1550 if (!Mask.empty()) 1551 OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp1, SrcOp2, Mask)); 1552 } 1553 break; 1554 } 1555 1556 case X86::VPPERMrrm: { 1557 if (!OutStreamer->isVerboseAsm()) 1558 break; 1559 assert(MI->getNumOperands() > 6 && 1560 "We should always have at least 6 operands!"); 1561 const MachineOperand &DstOp = MI->getOperand(0); 1562 const MachineOperand &SrcOp1 = MI->getOperand(1); 1563 const MachineOperand &SrcOp2 = MI->getOperand(2); 1564 const MachineOperand &MaskOp = MI->getOperand(6); 1565 1566 if (auto *C = getConstantFromPool(*MI, MaskOp)) { 1567 SmallVector<int, 16> Mask; 1568 DecodeVPPERMMask(C, Mask); 1569 if (!Mask.empty()) 1570 OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp1, SrcOp2, Mask)); 1571 } 1572 break; 1573 } 1574 1575 #define MOV_CASE(Prefix, Suffix) \ 1576 case X86::Prefix##MOVAPD##Suffix##rm: \ 1577 case X86::Prefix##MOVAPS##Suffix##rm: \ 1578 case X86::Prefix##MOVUPD##Suffix##rm: \ 1579 case X86::Prefix##MOVUPS##Suffix##rm: \ 1580 case X86::Prefix##MOVDQA##Suffix##rm: \ 1581 case X86::Prefix##MOVDQU##Suffix##rm: 1582 1583 #define MOV_AVX512_CASE(Suffix) \ 1584 case X86::VMOVDQA64##Suffix##rm: \ 1585 case X86::VMOVDQA32##Suffix##rm: \ 1586 case X86::VMOVDQU64##Suffix##rm: \ 1587 case X86::VMOVDQU32##Suffix##rm: \ 1588 case X86::VMOVDQU16##Suffix##rm: \ 1589 case X86::VMOVDQU8##Suffix##rm: \ 1590 case X86::VMOVAPS##Suffix##rm: \ 1591 case X86::VMOVAPD##Suffix##rm: \ 1592 case X86::VMOVUPS##Suffix##rm: \ 1593 case X86::VMOVUPD##Suffix##rm: 1594 1595 #define CASE_ALL_MOV_RM() \ 1596 MOV_CASE(, ) /* SSE */ \ 1597 MOV_CASE(V, ) /* AVX-128 */ \ 1598 MOV_CASE(V, Y) /* AVX-256 */ \ 1599 MOV_AVX512_CASE(Z) \ 1600 MOV_AVX512_CASE(Z256) \ 1601 MOV_AVX512_CASE(Z128) 1602 1603 // For loads from a constant pool to a vector register, print the constant 1604 // loaded. 1605 CASE_ALL_MOV_RM() 1606 if (!OutStreamer->isVerboseAsm()) 1607 break; 1608 if (MI->getNumOperands() > 4) 1609 if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) { 1610 std::string Comment; 1611 raw_string_ostream CS(Comment); 1612 const MachineOperand &DstOp = MI->getOperand(0); 1613 CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = "; 1614 if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) { 1615 CS << "["; 1616 for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements; ++i) { 1617 if (i != 0) 1618 CS << ","; 1619 if (CDS->getElementType()->isIntegerTy()) 1620 CS << CDS->getElementAsInteger(i); 1621 else if (CDS->getElementType()->isFloatTy()) 1622 CS << CDS->getElementAsFloat(i); 1623 else if (CDS->getElementType()->isDoubleTy()) 1624 CS << CDS->getElementAsDouble(i); 1625 else 1626 CS << "?"; 1627 } 1628 CS << "]"; 1629 OutStreamer->AddComment(CS.str()); 1630 } else if (auto *CV = dyn_cast<ConstantVector>(C)) { 1631 CS << "<"; 1632 for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) { 1633 if (i != 0) 1634 CS << ","; 1635 Constant *COp = CV->getOperand(i); 1636 if (isa<UndefValue>(COp)) { 1637 CS << "u"; 1638 } else if (auto *CI = dyn_cast<ConstantInt>(COp)) { 1639 if (CI->getBitWidth() <= 64) { 1640 CS << CI->getZExtValue(); 1641 } else { 1642 // print multi-word constant as (w0,w1) 1643 const auto &Val = CI->getValue(); 1644 CS << "("; 1645 for (int i = 0, N = Val.getNumWords(); i < N; ++i) { 1646 if (i > 0) 1647 CS << ","; 1648 CS << Val.getRawData()[i]; 1649 } 1650 CS << ")"; 1651 } 1652 } else if (auto *CF = dyn_cast<ConstantFP>(COp)) { 1653 SmallString<32> Str; 1654 CF->getValueAPF().toString(Str); 1655 CS << Str; 1656 } else { 1657 CS << "?"; 1658 } 1659 } 1660 CS << ">"; 1661 OutStreamer->AddComment(CS.str()); 1662 } 1663 } 1664 break; 1665 } 1666 1667 MCInst TmpInst; 1668 MCInstLowering.Lower(MI, TmpInst); 1669 1670 // Stackmap shadows cannot include branch targets, so we can count the bytes 1671 // in a call towards the shadow, but must ensure that the no thread returns 1672 // in to the stackmap shadow. The only way to achieve this is if the call 1673 // is at the end of the shadow. 1674 if (MI->isCall()) { 1675 // Count then size of the call towards the shadow 1676 SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get()); 1677 // Then flush the shadow so that we fill with nops before the call, not 1678 // after it. 1679 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); 1680 // Then emit the call 1681 OutStreamer->EmitInstruction(TmpInst, getSubtargetInfo()); 1682 return; 1683 } 1684 1685 EmitAndCountInstruction(TmpInst); 1686 } 1687