1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "AMDGPU.h" 11 #include "AMDKernelCodeT.h" 12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 13 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 14 #include "SIDefines.h" 15 #include "SIInstrInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/ErrorHandling.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetRegistry.h" 53 #include "llvm/Support/raw_ostream.h" 54 #include <algorithm> 55 #include <cassert> 56 #include <cstdint> 57 #include <cstring> 58 #include <iterator> 59 #include <map> 60 #include <memory> 61 #include <string> 62 63 using namespace llvm; 64 using namespace llvm::AMDGPU; 65 using namespace llvm::amdhsa; 66 67 namespace { 68 69 class AMDGPUAsmParser; 70 71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL }; 72 73 //===----------------------------------------------------------------------===// 74 // Operand 75 //===----------------------------------------------------------------------===// 76 77 class AMDGPUOperand : public MCParsedAsmOperand { 78 enum KindTy { 79 Token, 80 Immediate, 81 Register, 82 Expression 83 } Kind; 84 85 SMLoc StartLoc, EndLoc; 86 const AMDGPUAsmParser *AsmParser; 87 88 public: 89 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 90 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 91 92 using Ptr = std::unique_ptr<AMDGPUOperand>; 93 94 struct Modifiers { 95 bool Abs = false; 96 bool Neg = false; 97 bool Sext = false; 98 99 bool hasFPModifiers() const { return Abs || Neg; } 100 bool hasIntModifiers() const { return Sext; } 101 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 102 103 int64_t getFPModifiersOperand() const { 104 int64_t Operand = 0; 105 Operand |= Abs ? SISrcMods::ABS : 0; 106 Operand |= Neg ? SISrcMods::NEG : 0; 107 return Operand; 108 } 109 110 int64_t getIntModifiersOperand() const { 111 int64_t Operand = 0; 112 Operand |= Sext ? SISrcMods::SEXT : 0; 113 return Operand; 114 } 115 116 int64_t getModifiersOperand() const { 117 assert(!(hasFPModifiers() && hasIntModifiers()) 118 && "fp and int modifiers should not be used simultaneously"); 119 if (hasFPModifiers()) { 120 return getFPModifiersOperand(); 121 } else if (hasIntModifiers()) { 122 return getIntModifiersOperand(); 123 } else { 124 return 0; 125 } 126 } 127 128 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 129 }; 130 131 enum ImmTy { 132 ImmTyNone, 133 ImmTyGDS, 134 ImmTyLDS, 135 ImmTyOffen, 136 ImmTyIdxen, 137 ImmTyAddr64, 138 ImmTyOffset, 139 ImmTyInstOffset, 140 ImmTyOffset0, 141 ImmTyOffset1, 142 ImmTyGLC, 143 ImmTySLC, 144 ImmTyTFE, 145 ImmTyD16, 146 ImmTyClampSI, 147 ImmTyOModSI, 148 ImmTyDppCtrl, 149 ImmTyDppRowMask, 150 ImmTyDppBankMask, 151 ImmTyDppBoundCtrl, 152 ImmTySdwaDstSel, 153 ImmTySdwaSrc0Sel, 154 ImmTySdwaSrc1Sel, 155 ImmTySdwaDstUnused, 156 ImmTyDMask, 157 ImmTyUNorm, 158 ImmTyDA, 159 ImmTyR128, 160 ImmTyLWE, 161 ImmTyExpTgt, 162 ImmTyExpCompr, 163 ImmTyExpVM, 164 ImmTyDFMT, 165 ImmTyNFMT, 166 ImmTyHwreg, 167 ImmTyOff, 168 ImmTySendMsg, 169 ImmTyInterpSlot, 170 ImmTyInterpAttr, 171 ImmTyAttrChan, 172 ImmTyOpSel, 173 ImmTyOpSelHi, 174 ImmTyNegLo, 175 ImmTyNegHi, 176 ImmTySwizzle, 177 ImmTyHigh 178 }; 179 180 struct TokOp { 181 const char *Data; 182 unsigned Length; 183 }; 184 185 struct ImmOp { 186 int64_t Val; 187 ImmTy Type; 188 bool IsFPImm; 189 Modifiers Mods; 190 }; 191 192 struct RegOp { 193 unsigned RegNo; 194 bool IsForcedVOP3; 195 Modifiers Mods; 196 }; 197 198 union { 199 TokOp Tok; 200 ImmOp Imm; 201 RegOp Reg; 202 const MCExpr *Expr; 203 }; 204 205 bool isToken() const override { 206 if (Kind == Token) 207 return true; 208 209 if (Kind != Expression || !Expr) 210 return false; 211 212 // When parsing operands, we can't always tell if something was meant to be 213 // a token, like 'gds', or an expression that references a global variable. 214 // In this case, we assume the string is an expression, and if we need to 215 // interpret is a token, then we treat the symbol name as the token. 216 return isa<MCSymbolRefExpr>(Expr); 217 } 218 219 bool isImm() const override { 220 return Kind == Immediate; 221 } 222 223 bool isInlinableImm(MVT type) const; 224 bool isLiteralImm(MVT type) const; 225 226 bool isRegKind() const { 227 return Kind == Register; 228 } 229 230 bool isReg() const override { 231 return isRegKind() && !hasModifiers(); 232 } 233 234 bool isRegOrImmWithInputMods(MVT type) const { 235 return isRegKind() || isInlinableImm(type); 236 } 237 238 bool isRegOrImmWithInt16InputMods() const { 239 return isRegOrImmWithInputMods(MVT::i16); 240 } 241 242 bool isRegOrImmWithInt32InputMods() const { 243 return isRegOrImmWithInputMods(MVT::i32); 244 } 245 246 bool isRegOrImmWithInt64InputMods() const { 247 return isRegOrImmWithInputMods(MVT::i64); 248 } 249 250 bool isRegOrImmWithFP16InputMods() const { 251 return isRegOrImmWithInputMods(MVT::f16); 252 } 253 254 bool isRegOrImmWithFP32InputMods() const { 255 return isRegOrImmWithInputMods(MVT::f32); 256 } 257 258 bool isRegOrImmWithFP64InputMods() const { 259 return isRegOrImmWithInputMods(MVT::f64); 260 } 261 262 bool isVReg() const { 263 return isRegClass(AMDGPU::VGPR_32RegClassID) || 264 isRegClass(AMDGPU::VReg_64RegClassID) || 265 isRegClass(AMDGPU::VReg_96RegClassID) || 266 isRegClass(AMDGPU::VReg_128RegClassID) || 267 isRegClass(AMDGPU::VReg_256RegClassID) || 268 isRegClass(AMDGPU::VReg_512RegClassID); 269 } 270 271 bool isVReg32OrOff() const { 272 return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID); 273 } 274 275 bool isSDWAOperand(MVT type) const; 276 bool isSDWAFP16Operand() const; 277 bool isSDWAFP32Operand() const; 278 bool isSDWAInt16Operand() const; 279 bool isSDWAInt32Operand() const; 280 281 bool isImmTy(ImmTy ImmT) const { 282 return isImm() && Imm.Type == ImmT; 283 } 284 285 bool isImmModifier() const { 286 return isImm() && Imm.Type != ImmTyNone; 287 } 288 289 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 290 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 291 bool isDMask() const { return isImmTy(ImmTyDMask); } 292 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 293 bool isDA() const { return isImmTy(ImmTyDA); } 294 bool isR128() const { return isImmTy(ImmTyR128); } 295 bool isLWE() const { return isImmTy(ImmTyLWE); } 296 bool isOff() const { return isImmTy(ImmTyOff); } 297 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 298 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 299 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 300 bool isOffen() const { return isImmTy(ImmTyOffen); } 301 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 302 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 303 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 304 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); } 305 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 306 307 bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); } 308 bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); } 309 bool isGDS() const { return isImmTy(ImmTyGDS); } 310 bool isLDS() const { return isImmTy(ImmTyLDS); } 311 bool isGLC() const { return isImmTy(ImmTyGLC); } 312 bool isSLC() const { return isImmTy(ImmTySLC); } 313 bool isTFE() const { return isImmTy(ImmTyTFE); } 314 bool isD16() const { return isImmTy(ImmTyD16); } 315 bool isDFMT() const { return isImmTy(ImmTyDFMT) && isUInt<8>(getImm()); } 316 bool isNFMT() const { return isImmTy(ImmTyNFMT) && isUInt<8>(getImm()); } 317 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 318 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 319 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 320 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 321 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 322 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 323 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 324 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 325 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 326 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 327 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 328 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 329 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 330 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 331 bool isHigh() const { return isImmTy(ImmTyHigh); } 332 333 bool isMod() const { 334 return isClampSI() || isOModSI(); 335 } 336 337 bool isRegOrImm() const { 338 return isReg() || isImm(); 339 } 340 341 bool isRegClass(unsigned RCID) const; 342 343 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 344 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 345 } 346 347 bool isSCSrcB16() const { 348 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 349 } 350 351 bool isSCSrcV2B16() const { 352 return isSCSrcB16(); 353 } 354 355 bool isSCSrcB32() const { 356 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 357 } 358 359 bool isSCSrcB64() const { 360 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 361 } 362 363 bool isSCSrcF16() const { 364 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 365 } 366 367 bool isSCSrcV2F16() const { 368 return isSCSrcF16(); 369 } 370 371 bool isSCSrcF32() const { 372 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 373 } 374 375 bool isSCSrcF64() const { 376 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 377 } 378 379 bool isSSrcB32() const { 380 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 381 } 382 383 bool isSSrcB16() const { 384 return isSCSrcB16() || isLiteralImm(MVT::i16); 385 } 386 387 bool isSSrcV2B16() const { 388 llvm_unreachable("cannot happen"); 389 return isSSrcB16(); 390 } 391 392 bool isSSrcB64() const { 393 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 394 // See isVSrc64(). 395 return isSCSrcB64() || isLiteralImm(MVT::i64); 396 } 397 398 bool isSSrcF32() const { 399 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 400 } 401 402 bool isSSrcF64() const { 403 return isSCSrcB64() || isLiteralImm(MVT::f64); 404 } 405 406 bool isSSrcF16() const { 407 return isSCSrcB16() || isLiteralImm(MVT::f16); 408 } 409 410 bool isSSrcV2F16() const { 411 llvm_unreachable("cannot happen"); 412 return isSSrcF16(); 413 } 414 415 bool isVCSrcB32() const { 416 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 417 } 418 419 bool isVCSrcB64() const { 420 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 421 } 422 423 bool isVCSrcB16() const { 424 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 425 } 426 427 bool isVCSrcV2B16() const { 428 return isVCSrcB16(); 429 } 430 431 bool isVCSrcF32() const { 432 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 433 } 434 435 bool isVCSrcF64() const { 436 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 437 } 438 439 bool isVCSrcF16() const { 440 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 441 } 442 443 bool isVCSrcV2F16() const { 444 return isVCSrcF16(); 445 } 446 447 bool isVSrcB32() const { 448 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 449 } 450 451 bool isVSrcB64() const { 452 return isVCSrcF64() || isLiteralImm(MVT::i64); 453 } 454 455 bool isVSrcB16() const { 456 return isVCSrcF16() || isLiteralImm(MVT::i16); 457 } 458 459 bool isVSrcV2B16() const { 460 llvm_unreachable("cannot happen"); 461 return isVSrcB16(); 462 } 463 464 bool isVSrcF32() const { 465 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 466 } 467 468 bool isVSrcF64() const { 469 return isVCSrcF64() || isLiteralImm(MVT::f64); 470 } 471 472 bool isVSrcF16() const { 473 return isVCSrcF16() || isLiteralImm(MVT::f16); 474 } 475 476 bool isVSrcV2F16() const { 477 llvm_unreachable("cannot happen"); 478 return isVSrcF16(); 479 } 480 481 bool isKImmFP32() const { 482 return isLiteralImm(MVT::f32); 483 } 484 485 bool isKImmFP16() const { 486 return isLiteralImm(MVT::f16); 487 } 488 489 bool isMem() const override { 490 return false; 491 } 492 493 bool isExpr() const { 494 return Kind == Expression; 495 } 496 497 bool isSoppBrTarget() const { 498 return isExpr() || isImm(); 499 } 500 501 bool isSWaitCnt() const; 502 bool isHwreg() const; 503 bool isSendMsg() const; 504 bool isSwizzle() const; 505 bool isSMRDOffset8() const; 506 bool isSMRDOffset20() const; 507 bool isSMRDLiteralOffset() const; 508 bool isDPPCtrl() const; 509 bool isGPRIdxMode() const; 510 bool isS16Imm() const; 511 bool isU16Imm() const; 512 513 StringRef getExpressionAsToken() const { 514 assert(isExpr()); 515 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 516 return S->getSymbol().getName(); 517 } 518 519 StringRef getToken() const { 520 assert(isToken()); 521 522 if (Kind == Expression) 523 return getExpressionAsToken(); 524 525 return StringRef(Tok.Data, Tok.Length); 526 } 527 528 int64_t getImm() const { 529 assert(isImm()); 530 return Imm.Val; 531 } 532 533 ImmTy getImmTy() const { 534 assert(isImm()); 535 return Imm.Type; 536 } 537 538 unsigned getReg() const override { 539 return Reg.RegNo; 540 } 541 542 SMLoc getStartLoc() const override { 543 return StartLoc; 544 } 545 546 SMLoc getEndLoc() const override { 547 return EndLoc; 548 } 549 550 SMRange getLocRange() const { 551 return SMRange(StartLoc, EndLoc); 552 } 553 554 Modifiers getModifiers() const { 555 assert(isRegKind() || isImmTy(ImmTyNone)); 556 return isRegKind() ? Reg.Mods : Imm.Mods; 557 } 558 559 void setModifiers(Modifiers Mods) { 560 assert(isRegKind() || isImmTy(ImmTyNone)); 561 if (isRegKind()) 562 Reg.Mods = Mods; 563 else 564 Imm.Mods = Mods; 565 } 566 567 bool hasModifiers() const { 568 return getModifiers().hasModifiers(); 569 } 570 571 bool hasFPModifiers() const { 572 return getModifiers().hasFPModifiers(); 573 } 574 575 bool hasIntModifiers() const { 576 return getModifiers().hasIntModifiers(); 577 } 578 579 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 580 581 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 582 583 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 584 585 template <unsigned Bitwidth> 586 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 587 588 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 589 addKImmFPOperands<16>(Inst, N); 590 } 591 592 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 593 addKImmFPOperands<32>(Inst, N); 594 } 595 596 void addRegOperands(MCInst &Inst, unsigned N) const; 597 598 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 599 if (isRegKind()) 600 addRegOperands(Inst, N); 601 else if (isExpr()) 602 Inst.addOperand(MCOperand::createExpr(Expr)); 603 else 604 addImmOperands(Inst, N); 605 } 606 607 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 608 Modifiers Mods = getModifiers(); 609 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 610 if (isRegKind()) { 611 addRegOperands(Inst, N); 612 } else { 613 addImmOperands(Inst, N, false); 614 } 615 } 616 617 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 618 assert(!hasIntModifiers()); 619 addRegOrImmWithInputModsOperands(Inst, N); 620 } 621 622 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 623 assert(!hasFPModifiers()); 624 addRegOrImmWithInputModsOperands(Inst, N); 625 } 626 627 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 628 Modifiers Mods = getModifiers(); 629 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 630 assert(isRegKind()); 631 addRegOperands(Inst, N); 632 } 633 634 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 635 assert(!hasIntModifiers()); 636 addRegWithInputModsOperands(Inst, N); 637 } 638 639 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 640 assert(!hasFPModifiers()); 641 addRegWithInputModsOperands(Inst, N); 642 } 643 644 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 645 if (isImm()) 646 addImmOperands(Inst, N); 647 else { 648 assert(isExpr()); 649 Inst.addOperand(MCOperand::createExpr(Expr)); 650 } 651 } 652 653 static void printImmTy(raw_ostream& OS, ImmTy Type) { 654 switch (Type) { 655 case ImmTyNone: OS << "None"; break; 656 case ImmTyGDS: OS << "GDS"; break; 657 case ImmTyLDS: OS << "LDS"; break; 658 case ImmTyOffen: OS << "Offen"; break; 659 case ImmTyIdxen: OS << "Idxen"; break; 660 case ImmTyAddr64: OS << "Addr64"; break; 661 case ImmTyOffset: OS << "Offset"; break; 662 case ImmTyInstOffset: OS << "InstOffset"; break; 663 case ImmTyOffset0: OS << "Offset0"; break; 664 case ImmTyOffset1: OS << "Offset1"; break; 665 case ImmTyGLC: OS << "GLC"; break; 666 case ImmTySLC: OS << "SLC"; break; 667 case ImmTyTFE: OS << "TFE"; break; 668 case ImmTyD16: OS << "D16"; break; 669 case ImmTyDFMT: OS << "DFMT"; break; 670 case ImmTyNFMT: OS << "NFMT"; break; 671 case ImmTyClampSI: OS << "ClampSI"; break; 672 case ImmTyOModSI: OS << "OModSI"; break; 673 case ImmTyDppCtrl: OS << "DppCtrl"; break; 674 case ImmTyDppRowMask: OS << "DppRowMask"; break; 675 case ImmTyDppBankMask: OS << "DppBankMask"; break; 676 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 677 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 678 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 679 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 680 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 681 case ImmTyDMask: OS << "DMask"; break; 682 case ImmTyUNorm: OS << "UNorm"; break; 683 case ImmTyDA: OS << "DA"; break; 684 case ImmTyR128: OS << "R128"; break; 685 case ImmTyLWE: OS << "LWE"; break; 686 case ImmTyOff: OS << "Off"; break; 687 case ImmTyExpTgt: OS << "ExpTgt"; break; 688 case ImmTyExpCompr: OS << "ExpCompr"; break; 689 case ImmTyExpVM: OS << "ExpVM"; break; 690 case ImmTyHwreg: OS << "Hwreg"; break; 691 case ImmTySendMsg: OS << "SendMsg"; break; 692 case ImmTyInterpSlot: OS << "InterpSlot"; break; 693 case ImmTyInterpAttr: OS << "InterpAttr"; break; 694 case ImmTyAttrChan: OS << "AttrChan"; break; 695 case ImmTyOpSel: OS << "OpSel"; break; 696 case ImmTyOpSelHi: OS << "OpSelHi"; break; 697 case ImmTyNegLo: OS << "NegLo"; break; 698 case ImmTyNegHi: OS << "NegHi"; break; 699 case ImmTySwizzle: OS << "Swizzle"; break; 700 case ImmTyHigh: OS << "High"; break; 701 } 702 } 703 704 void print(raw_ostream &OS) const override { 705 switch (Kind) { 706 case Register: 707 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 708 break; 709 case Immediate: 710 OS << '<' << getImm(); 711 if (getImmTy() != ImmTyNone) { 712 OS << " type: "; printImmTy(OS, getImmTy()); 713 } 714 OS << " mods: " << Imm.Mods << '>'; 715 break; 716 case Token: 717 OS << '\'' << getToken() << '\''; 718 break; 719 case Expression: 720 OS << "<expr " << *Expr << '>'; 721 break; 722 } 723 } 724 725 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 726 int64_t Val, SMLoc Loc, 727 ImmTy Type = ImmTyNone, 728 bool IsFPImm = false) { 729 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); 730 Op->Imm.Val = Val; 731 Op->Imm.IsFPImm = IsFPImm; 732 Op->Imm.Type = Type; 733 Op->Imm.Mods = Modifiers(); 734 Op->StartLoc = Loc; 735 Op->EndLoc = Loc; 736 return Op; 737 } 738 739 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 740 StringRef Str, SMLoc Loc, 741 bool HasExplicitEncodingSize = true) { 742 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); 743 Res->Tok.Data = Str.data(); 744 Res->Tok.Length = Str.size(); 745 Res->StartLoc = Loc; 746 Res->EndLoc = Loc; 747 return Res; 748 } 749 750 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 751 unsigned RegNo, SMLoc S, 752 SMLoc E, 753 bool ForceVOP3) { 754 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); 755 Op->Reg.RegNo = RegNo; 756 Op->Reg.Mods = Modifiers(); 757 Op->Reg.IsForcedVOP3 = ForceVOP3; 758 Op->StartLoc = S; 759 Op->EndLoc = E; 760 return Op; 761 } 762 763 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 764 const class MCExpr *Expr, SMLoc S) { 765 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); 766 Op->Expr = Expr; 767 Op->StartLoc = S; 768 Op->EndLoc = S; 769 return Op; 770 } 771 }; 772 773 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 774 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 775 return OS; 776 } 777 778 //===----------------------------------------------------------------------===// 779 // AsmParser 780 //===----------------------------------------------------------------------===// 781 782 // Holds info related to the current kernel, e.g. count of SGPRs used. 783 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 784 // .amdgpu_hsa_kernel or at EOF. 785 class KernelScopeInfo { 786 int SgprIndexUnusedMin = -1; 787 int VgprIndexUnusedMin = -1; 788 MCContext *Ctx = nullptr; 789 790 void usesSgprAt(int i) { 791 if (i >= SgprIndexUnusedMin) { 792 SgprIndexUnusedMin = ++i; 793 if (Ctx) { 794 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 795 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 796 } 797 } 798 } 799 800 void usesVgprAt(int i) { 801 if (i >= VgprIndexUnusedMin) { 802 VgprIndexUnusedMin = ++i; 803 if (Ctx) { 804 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 805 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 806 } 807 } 808 } 809 810 public: 811 KernelScopeInfo() = default; 812 813 void initialize(MCContext &Context) { 814 Ctx = &Context; 815 usesSgprAt(SgprIndexUnusedMin = -1); 816 usesVgprAt(VgprIndexUnusedMin = -1); 817 } 818 819 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 820 switch (RegKind) { 821 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 822 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 823 default: break; 824 } 825 } 826 }; 827 828 class AMDGPUAsmParser : public MCTargetAsmParser { 829 MCAsmParser &Parser; 830 831 // Number of extra operands parsed after the first optional operand. 832 // This may be necessary to skip hardcoded mandatory operands. 833 static const unsigned MAX_OPR_LOOKAHEAD = 8; 834 835 unsigned ForcedEncodingSize = 0; 836 bool ForcedDPP = false; 837 bool ForcedSDWA = false; 838 KernelScopeInfo KernelScope; 839 840 /// @name Auto-generated Match Functions 841 /// { 842 843 #define GET_ASSEMBLER_HEADER 844 #include "AMDGPUGenAsmMatcher.inc" 845 846 /// } 847 848 private: 849 bool ParseAsAbsoluteExpression(uint32_t &Ret); 850 bool OutOfRangeError(SMRange Range); 851 /// Calculate VGPR/SGPR blocks required for given target, reserved 852 /// registers, and user-specified NextFreeXGPR values. 853 /// 854 /// \param Features [in] Target features, used for bug corrections. 855 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 856 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 857 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 858 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 859 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 860 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 861 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 862 /// \param VGPRBlocks [out] Result VGPR block count. 863 /// \param SGPRBlocks [out] Result SGPR block count. 864 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 865 bool FlatScrUsed, bool XNACKUsed, 866 unsigned NextFreeVGPR, SMRange VGPRRange, 867 unsigned NextFreeSGPR, SMRange SGPRRange, 868 unsigned &VGPRBlocks, unsigned &SGPRBlocks); 869 bool ParseDirectiveAMDGCNTarget(); 870 bool ParseDirectiveAMDHSAKernel(); 871 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 872 bool ParseDirectiveHSACodeObjectVersion(); 873 bool ParseDirectiveHSACodeObjectISA(); 874 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 875 bool ParseDirectiveAMDKernelCodeT(); 876 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 877 bool ParseDirectiveAMDGPUHsaKernel(); 878 879 bool ParseDirectiveISAVersion(); 880 bool ParseDirectiveHSAMetadata(); 881 bool ParseDirectivePALMetadata(); 882 883 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 884 RegisterKind RegKind, unsigned Reg1, 885 unsigned RegNum); 886 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 887 unsigned& RegNum, unsigned& RegWidth, 888 unsigned *DwordRegIndex); 889 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 890 void initializeGprCountSymbol(RegisterKind RegKind); 891 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 892 unsigned RegWidth); 893 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 894 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 895 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 896 bool IsGdsHardcoded); 897 898 public: 899 enum AMDGPUMatchResultTy { 900 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 901 }; 902 903 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 904 905 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 906 const MCInstrInfo &MII, 907 const MCTargetOptions &Options) 908 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 909 MCAsmParserExtension::Initialize(Parser); 910 911 if (getFeatureBits().none()) { 912 // Set default features. 913 copySTI().ToggleFeature("SOUTHERN_ISLANDS"); 914 } 915 916 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 917 918 { 919 // TODO: make those pre-defined variables read-only. 920 // Currently there is none suitable machinery in the core llvm-mc for this. 921 // MCSymbol::isRedefinable is intended for another purpose, and 922 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 923 AMDGPU::IsaInfo::IsaVersion ISA = 924 AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); 925 MCContext &Ctx = getContext(); 926 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 927 MCSymbol *Sym = 928 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 929 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 930 } else { 931 MCSymbol *Sym = 932 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 933 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 934 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 935 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 936 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 937 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 938 } 939 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 940 initializeGprCountSymbol(IS_VGPR); 941 initializeGprCountSymbol(IS_SGPR); 942 } else 943 KernelScope.initialize(getContext()); 944 } 945 } 946 947 bool hasXNACK() const { 948 return AMDGPU::hasXNACK(getSTI()); 949 } 950 951 bool hasMIMG_R128() const { 952 return AMDGPU::hasMIMG_R128(getSTI()); 953 } 954 955 bool hasPackedD16() const { 956 return AMDGPU::hasPackedD16(getSTI()); 957 } 958 959 bool isSI() const { 960 return AMDGPU::isSI(getSTI()); 961 } 962 963 bool isCI() const { 964 return AMDGPU::isCI(getSTI()); 965 } 966 967 bool isVI() const { 968 return AMDGPU::isVI(getSTI()); 969 } 970 971 bool isGFX9() const { 972 return AMDGPU::isGFX9(getSTI()); 973 } 974 975 bool hasInv2PiInlineImm() const { 976 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 977 } 978 979 bool hasFlatOffsets() const { 980 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 981 } 982 983 bool hasSGPR102_SGPR103() const { 984 return !isVI(); 985 } 986 987 bool hasIntClamp() const { 988 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 989 } 990 991 AMDGPUTargetStreamer &getTargetStreamer() { 992 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 993 return static_cast<AMDGPUTargetStreamer &>(TS); 994 } 995 996 const MCRegisterInfo *getMRI() const { 997 // We need this const_cast because for some reason getContext() is not const 998 // in MCAsmParser. 999 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1000 } 1001 1002 const MCInstrInfo *getMII() const { 1003 return &MII; 1004 } 1005 1006 const FeatureBitset &getFeatureBits() const { 1007 return getSTI().getFeatureBits(); 1008 } 1009 1010 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1011 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1012 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1013 1014 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1015 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1016 bool isForcedDPP() const { return ForcedDPP; } 1017 bool isForcedSDWA() const { return ForcedSDWA; } 1018 ArrayRef<unsigned> getMatchedVariants() const; 1019 1020 std::unique_ptr<AMDGPUOperand> parseRegister(); 1021 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1022 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1023 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1024 unsigned Kind) override; 1025 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1026 OperandVector &Operands, MCStreamer &Out, 1027 uint64_t &ErrorInfo, 1028 bool MatchingInlineAsm) override; 1029 bool ParseDirective(AsmToken DirectiveID) override; 1030 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic); 1031 StringRef parseMnemonicSuffix(StringRef Name); 1032 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1033 SMLoc NameLoc, OperandVector &Operands) override; 1034 //bool ProcessInstruction(MCInst &Inst); 1035 1036 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1037 1038 OperandMatchResultTy 1039 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1040 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1041 bool (*ConvertResult)(int64_t &) = nullptr); 1042 1043 OperandMatchResultTy parseOperandArrayWithPrefix( 1044 const char *Prefix, 1045 OperandVector &Operands, 1046 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1047 bool (*ConvertResult)(int64_t&) = nullptr); 1048 1049 OperandMatchResultTy 1050 parseNamedBit(const char *Name, OperandVector &Operands, 1051 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1052 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1053 StringRef &Value); 1054 1055 bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false); 1056 OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false); 1057 OperandMatchResultTy parseReg(OperandVector &Operands); 1058 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false); 1059 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1060 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1061 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1062 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1063 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1064 1065 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1066 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1067 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1068 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1069 1070 bool parseCnt(int64_t &IntVal); 1071 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1072 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1073 1074 private: 1075 struct OperandInfoTy { 1076 int64_t Id; 1077 bool IsSymbolic = false; 1078 1079 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1080 }; 1081 1082 bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId); 1083 bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1084 1085 void errorExpTgt(); 1086 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1087 1088 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc); 1089 bool validateConstantBusLimitations(const MCInst &Inst); 1090 bool validateEarlyClobberLimitations(const MCInst &Inst); 1091 bool validateIntClampSupported(const MCInst &Inst); 1092 bool validateMIMGAtomicDMask(const MCInst &Inst); 1093 bool validateMIMGGatherDMask(const MCInst &Inst); 1094 bool validateMIMGDataSize(const MCInst &Inst); 1095 bool validateMIMGR128(const MCInst &Inst); 1096 bool validateMIMGD16(const MCInst &Inst); 1097 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1098 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1099 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1100 1101 bool trySkipId(const StringRef Id); 1102 bool trySkipToken(const AsmToken::TokenKind Kind); 1103 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1104 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1105 bool parseExpr(int64_t &Imm); 1106 1107 public: 1108 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1109 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1110 1111 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1112 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1113 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1114 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1115 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1116 1117 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1118 const unsigned MinVal, 1119 const unsigned MaxVal, 1120 const StringRef ErrMsg); 1121 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1122 bool parseSwizzleOffset(int64_t &Imm); 1123 bool parseSwizzleMacro(int64_t &Imm); 1124 bool parseSwizzleQuadPerm(int64_t &Imm); 1125 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1126 bool parseSwizzleBroadcast(int64_t &Imm); 1127 bool parseSwizzleSwap(int64_t &Imm); 1128 bool parseSwizzleReverse(int64_t &Imm); 1129 1130 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1131 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1132 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1133 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1134 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1135 1136 AMDGPUOperand::Ptr defaultGLC() const; 1137 AMDGPUOperand::Ptr defaultSLC() const; 1138 1139 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1140 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1141 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1142 AMDGPUOperand::Ptr defaultOffsetU12() const; 1143 AMDGPUOperand::Ptr defaultOffsetS13() const; 1144 1145 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1146 1147 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1148 OptionalImmIndexMap &OptionalIdx); 1149 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1150 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1151 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1152 1153 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1154 1155 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1156 bool IsAtomic = false); 1157 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1158 1159 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1160 AMDGPUOperand::Ptr defaultRowMask() const; 1161 AMDGPUOperand::Ptr defaultBankMask() const; 1162 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1163 void cvtDPP(MCInst &Inst, const OperandVector &Operands); 1164 1165 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1166 AMDGPUOperand::ImmTy Type); 1167 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1168 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1169 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1170 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1171 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1172 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1173 uint64_t BasicInstType, bool skipVcc = false); 1174 }; 1175 1176 struct OptionalOperand { 1177 const char *Name; 1178 AMDGPUOperand::ImmTy Type; 1179 bool IsBit; 1180 bool (*ConvertResult)(int64_t&); 1181 }; 1182 1183 } // end anonymous namespace 1184 1185 // May be called with integer type with equivalent bitwidth. 1186 static const fltSemantics *getFltSemantics(unsigned Size) { 1187 switch (Size) { 1188 case 4: 1189 return &APFloat::IEEEsingle(); 1190 case 8: 1191 return &APFloat::IEEEdouble(); 1192 case 2: 1193 return &APFloat::IEEEhalf(); 1194 default: 1195 llvm_unreachable("unsupported fp type"); 1196 } 1197 } 1198 1199 static const fltSemantics *getFltSemantics(MVT VT) { 1200 return getFltSemantics(VT.getSizeInBits() / 8); 1201 } 1202 1203 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1204 switch (OperandType) { 1205 case AMDGPU::OPERAND_REG_IMM_INT32: 1206 case AMDGPU::OPERAND_REG_IMM_FP32: 1207 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1208 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1209 return &APFloat::IEEEsingle(); 1210 case AMDGPU::OPERAND_REG_IMM_INT64: 1211 case AMDGPU::OPERAND_REG_IMM_FP64: 1212 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1213 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1214 return &APFloat::IEEEdouble(); 1215 case AMDGPU::OPERAND_REG_IMM_INT16: 1216 case AMDGPU::OPERAND_REG_IMM_FP16: 1217 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1218 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1219 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1220 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1221 return &APFloat::IEEEhalf(); 1222 default: 1223 llvm_unreachable("unsupported fp type"); 1224 } 1225 } 1226 1227 //===----------------------------------------------------------------------===// 1228 // Operand 1229 //===----------------------------------------------------------------------===// 1230 1231 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1232 bool Lost; 1233 1234 // Convert literal to single precision 1235 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1236 APFloat::rmNearestTiesToEven, 1237 &Lost); 1238 // We allow precision lost but not overflow or underflow 1239 if (Status != APFloat::opOK && 1240 Lost && 1241 ((Status & APFloat::opOverflow) != 0 || 1242 (Status & APFloat::opUnderflow) != 0)) { 1243 return false; 1244 } 1245 1246 return true; 1247 } 1248 1249 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1250 if (!isImmTy(ImmTyNone)) { 1251 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1252 return false; 1253 } 1254 // TODO: We should avoid using host float here. It would be better to 1255 // check the float bit values which is what a few other places do. 1256 // We've had bot failures before due to weird NaN support on mips hosts. 1257 1258 APInt Literal(64, Imm.Val); 1259 1260 if (Imm.IsFPImm) { // We got fp literal token 1261 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1262 return AMDGPU::isInlinableLiteral64(Imm.Val, 1263 AsmParser->hasInv2PiInlineImm()); 1264 } 1265 1266 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1267 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1268 return false; 1269 1270 if (type.getScalarSizeInBits() == 16) { 1271 return AMDGPU::isInlinableLiteral16( 1272 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1273 AsmParser->hasInv2PiInlineImm()); 1274 } 1275 1276 // Check if single precision literal is inlinable 1277 return AMDGPU::isInlinableLiteral32( 1278 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1279 AsmParser->hasInv2PiInlineImm()); 1280 } 1281 1282 // We got int literal token. 1283 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1284 return AMDGPU::isInlinableLiteral64(Imm.Val, 1285 AsmParser->hasInv2PiInlineImm()); 1286 } 1287 1288 if (type.getScalarSizeInBits() == 16) { 1289 return AMDGPU::isInlinableLiteral16( 1290 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1291 AsmParser->hasInv2PiInlineImm()); 1292 } 1293 1294 return AMDGPU::isInlinableLiteral32( 1295 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1296 AsmParser->hasInv2PiInlineImm()); 1297 } 1298 1299 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1300 // Check that this immediate can be added as literal 1301 if (!isImmTy(ImmTyNone)) { 1302 return false; 1303 } 1304 1305 if (!Imm.IsFPImm) { 1306 // We got int literal token. 1307 1308 if (type == MVT::f64 && hasFPModifiers()) { 1309 // Cannot apply fp modifiers to int literals preserving the same semantics 1310 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1311 // disable these cases. 1312 return false; 1313 } 1314 1315 unsigned Size = type.getSizeInBits(); 1316 if (Size == 64) 1317 Size = 32; 1318 1319 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1320 // types. 1321 return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val); 1322 } 1323 1324 // We got fp literal token 1325 if (type == MVT::f64) { // Expected 64-bit fp operand 1326 // We would set low 64-bits of literal to zeroes but we accept this literals 1327 return true; 1328 } 1329 1330 if (type == MVT::i64) { // Expected 64-bit int operand 1331 // We don't allow fp literals in 64-bit integer instructions. It is 1332 // unclear how we should encode them. 1333 return false; 1334 } 1335 1336 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1337 return canLosslesslyConvertToFPType(FPLiteral, type); 1338 } 1339 1340 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1341 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1342 } 1343 1344 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1345 if (AsmParser->isVI()) 1346 return isVReg(); 1347 else if (AsmParser->isGFX9()) 1348 return isRegKind() || isInlinableImm(type); 1349 else 1350 return false; 1351 } 1352 1353 bool AMDGPUOperand::isSDWAFP16Operand() const { 1354 return isSDWAOperand(MVT::f16); 1355 } 1356 1357 bool AMDGPUOperand::isSDWAFP32Operand() const { 1358 return isSDWAOperand(MVT::f32); 1359 } 1360 1361 bool AMDGPUOperand::isSDWAInt16Operand() const { 1362 return isSDWAOperand(MVT::i16); 1363 } 1364 1365 bool AMDGPUOperand::isSDWAInt32Operand() const { 1366 return isSDWAOperand(MVT::i32); 1367 } 1368 1369 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1370 { 1371 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1372 assert(Size == 2 || Size == 4 || Size == 8); 1373 1374 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1375 1376 if (Imm.Mods.Abs) { 1377 Val &= ~FpSignMask; 1378 } 1379 if (Imm.Mods.Neg) { 1380 Val ^= FpSignMask; 1381 } 1382 1383 return Val; 1384 } 1385 1386 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1387 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1388 Inst.getNumOperands())) { 1389 addLiteralImmOperand(Inst, Imm.Val, 1390 ApplyModifiers & 1391 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1392 } else { 1393 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1394 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1395 } 1396 } 1397 1398 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1399 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1400 auto OpNum = Inst.getNumOperands(); 1401 // Check that this operand accepts literals 1402 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1403 1404 if (ApplyModifiers) { 1405 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1406 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1407 Val = applyInputFPModifiers(Val, Size); 1408 } 1409 1410 APInt Literal(64, Val); 1411 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1412 1413 if (Imm.IsFPImm) { // We got fp literal token 1414 switch (OpTy) { 1415 case AMDGPU::OPERAND_REG_IMM_INT64: 1416 case AMDGPU::OPERAND_REG_IMM_FP64: 1417 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1418 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1419 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1420 AsmParser->hasInv2PiInlineImm())) { 1421 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1422 return; 1423 } 1424 1425 // Non-inlineable 1426 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1427 // For fp operands we check if low 32 bits are zeros 1428 if (Literal.getLoBits(32) != 0) { 1429 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1430 "Can't encode literal as exact 64-bit floating-point operand. " 1431 "Low 32-bits will be set to zero"); 1432 } 1433 1434 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1435 return; 1436 } 1437 1438 // We don't allow fp literals in 64-bit integer instructions. It is 1439 // unclear how we should encode them. This case should be checked earlier 1440 // in predicate methods (isLiteralImm()) 1441 llvm_unreachable("fp literal in 64-bit integer instruction."); 1442 1443 case AMDGPU::OPERAND_REG_IMM_INT32: 1444 case AMDGPU::OPERAND_REG_IMM_FP32: 1445 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1446 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1447 case AMDGPU::OPERAND_REG_IMM_INT16: 1448 case AMDGPU::OPERAND_REG_IMM_FP16: 1449 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1450 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1451 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1452 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1453 bool lost; 1454 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1455 // Convert literal to single precision 1456 FPLiteral.convert(*getOpFltSemantics(OpTy), 1457 APFloat::rmNearestTiesToEven, &lost); 1458 // We allow precision lost but not overflow or underflow. This should be 1459 // checked earlier in isLiteralImm() 1460 1461 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1462 if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 1463 OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { 1464 ImmVal |= (ImmVal << 16); 1465 } 1466 1467 Inst.addOperand(MCOperand::createImm(ImmVal)); 1468 return; 1469 } 1470 default: 1471 llvm_unreachable("invalid operand size"); 1472 } 1473 1474 return; 1475 } 1476 1477 // We got int literal token. 1478 // Only sign extend inline immediates. 1479 // FIXME: No errors on truncation 1480 switch (OpTy) { 1481 case AMDGPU::OPERAND_REG_IMM_INT32: 1482 case AMDGPU::OPERAND_REG_IMM_FP32: 1483 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1484 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1485 if (isInt<32>(Val) && 1486 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1487 AsmParser->hasInv2PiInlineImm())) { 1488 Inst.addOperand(MCOperand::createImm(Val)); 1489 return; 1490 } 1491 1492 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1493 return; 1494 1495 case AMDGPU::OPERAND_REG_IMM_INT64: 1496 case AMDGPU::OPERAND_REG_IMM_FP64: 1497 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1498 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1499 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1500 Inst.addOperand(MCOperand::createImm(Val)); 1501 return; 1502 } 1503 1504 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1505 return; 1506 1507 case AMDGPU::OPERAND_REG_IMM_INT16: 1508 case AMDGPU::OPERAND_REG_IMM_FP16: 1509 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1510 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1511 if (isInt<16>(Val) && 1512 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1513 AsmParser->hasInv2PiInlineImm())) { 1514 Inst.addOperand(MCOperand::createImm(Val)); 1515 return; 1516 } 1517 1518 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1519 return; 1520 1521 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1522 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1523 auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue()); 1524 assert(AMDGPU::isInlinableLiteral16(LiteralVal, 1525 AsmParser->hasInv2PiInlineImm())); 1526 1527 uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 | 1528 static_cast<uint32_t>(LiteralVal); 1529 Inst.addOperand(MCOperand::createImm(ImmVal)); 1530 return; 1531 } 1532 default: 1533 llvm_unreachable("invalid operand size"); 1534 } 1535 } 1536 1537 template <unsigned Bitwidth> 1538 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1539 APInt Literal(64, Imm.Val); 1540 1541 if (!Imm.IsFPImm) { 1542 // We got int literal token. 1543 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1544 return; 1545 } 1546 1547 bool Lost; 1548 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1549 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1550 APFloat::rmNearestTiesToEven, &Lost); 1551 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1552 } 1553 1554 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1555 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1556 } 1557 1558 //===----------------------------------------------------------------------===// 1559 // AsmParser 1560 //===----------------------------------------------------------------------===// 1561 1562 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1563 if (Is == IS_VGPR) { 1564 switch (RegWidth) { 1565 default: return -1; 1566 case 1: return AMDGPU::VGPR_32RegClassID; 1567 case 2: return AMDGPU::VReg_64RegClassID; 1568 case 3: return AMDGPU::VReg_96RegClassID; 1569 case 4: return AMDGPU::VReg_128RegClassID; 1570 case 8: return AMDGPU::VReg_256RegClassID; 1571 case 16: return AMDGPU::VReg_512RegClassID; 1572 } 1573 } else if (Is == IS_TTMP) { 1574 switch (RegWidth) { 1575 default: return -1; 1576 case 1: return AMDGPU::TTMP_32RegClassID; 1577 case 2: return AMDGPU::TTMP_64RegClassID; 1578 case 4: return AMDGPU::TTMP_128RegClassID; 1579 case 8: return AMDGPU::TTMP_256RegClassID; 1580 case 16: return AMDGPU::TTMP_512RegClassID; 1581 } 1582 } else if (Is == IS_SGPR) { 1583 switch (RegWidth) { 1584 default: return -1; 1585 case 1: return AMDGPU::SGPR_32RegClassID; 1586 case 2: return AMDGPU::SGPR_64RegClassID; 1587 case 4: return AMDGPU::SGPR_128RegClassID; 1588 case 8: return AMDGPU::SGPR_256RegClassID; 1589 case 16: return AMDGPU::SGPR_512RegClassID; 1590 } 1591 } 1592 return -1; 1593 } 1594 1595 static unsigned getSpecialRegForName(StringRef RegName) { 1596 return StringSwitch<unsigned>(RegName) 1597 .Case("exec", AMDGPU::EXEC) 1598 .Case("vcc", AMDGPU::VCC) 1599 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1600 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1601 .Case("m0", AMDGPU::M0) 1602 .Case("scc", AMDGPU::SCC) 1603 .Case("tba", AMDGPU::TBA) 1604 .Case("tma", AMDGPU::TMA) 1605 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1606 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1607 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1608 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1609 .Case("vcc_lo", AMDGPU::VCC_LO) 1610 .Case("vcc_hi", AMDGPU::VCC_HI) 1611 .Case("exec_lo", AMDGPU::EXEC_LO) 1612 .Case("exec_hi", AMDGPU::EXEC_HI) 1613 .Case("tma_lo", AMDGPU::TMA_LO) 1614 .Case("tma_hi", AMDGPU::TMA_HI) 1615 .Case("tba_lo", AMDGPU::TBA_LO) 1616 .Case("tba_hi", AMDGPU::TBA_HI) 1617 .Default(0); 1618 } 1619 1620 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1621 SMLoc &EndLoc) { 1622 auto R = parseRegister(); 1623 if (!R) return true; 1624 assert(R->isReg()); 1625 RegNo = R->getReg(); 1626 StartLoc = R->getStartLoc(); 1627 EndLoc = R->getEndLoc(); 1628 return false; 1629 } 1630 1631 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1632 RegisterKind RegKind, unsigned Reg1, 1633 unsigned RegNum) { 1634 switch (RegKind) { 1635 case IS_SPECIAL: 1636 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1637 Reg = AMDGPU::EXEC; 1638 RegWidth = 2; 1639 return true; 1640 } 1641 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1642 Reg = AMDGPU::FLAT_SCR; 1643 RegWidth = 2; 1644 return true; 1645 } 1646 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1647 Reg = AMDGPU::XNACK_MASK; 1648 RegWidth = 2; 1649 return true; 1650 } 1651 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1652 Reg = AMDGPU::VCC; 1653 RegWidth = 2; 1654 return true; 1655 } 1656 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1657 Reg = AMDGPU::TBA; 1658 RegWidth = 2; 1659 return true; 1660 } 1661 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 1662 Reg = AMDGPU::TMA; 1663 RegWidth = 2; 1664 return true; 1665 } 1666 return false; 1667 case IS_VGPR: 1668 case IS_SGPR: 1669 case IS_TTMP: 1670 if (Reg1 != Reg + RegWidth) { 1671 return false; 1672 } 1673 RegWidth++; 1674 return true; 1675 default: 1676 llvm_unreachable("unexpected register kind"); 1677 } 1678 } 1679 1680 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1681 unsigned &RegNum, unsigned &RegWidth, 1682 unsigned *DwordRegIndex) { 1683 if (DwordRegIndex) { *DwordRegIndex = 0; } 1684 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 1685 if (getLexer().is(AsmToken::Identifier)) { 1686 StringRef RegName = Parser.getTok().getString(); 1687 if ((Reg = getSpecialRegForName(RegName))) { 1688 Parser.Lex(); 1689 RegKind = IS_SPECIAL; 1690 } else { 1691 unsigned RegNumIndex = 0; 1692 if (RegName[0] == 'v') { 1693 RegNumIndex = 1; 1694 RegKind = IS_VGPR; 1695 } else if (RegName[0] == 's') { 1696 RegNumIndex = 1; 1697 RegKind = IS_SGPR; 1698 } else if (RegName.startswith("ttmp")) { 1699 RegNumIndex = strlen("ttmp"); 1700 RegKind = IS_TTMP; 1701 } else { 1702 return false; 1703 } 1704 if (RegName.size() > RegNumIndex) { 1705 // Single 32-bit register: vXX. 1706 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 1707 return false; 1708 Parser.Lex(); 1709 RegWidth = 1; 1710 } else { 1711 // Range of registers: v[XX:YY]. ":YY" is optional. 1712 Parser.Lex(); 1713 int64_t RegLo, RegHi; 1714 if (getLexer().isNot(AsmToken::LBrac)) 1715 return false; 1716 Parser.Lex(); 1717 1718 if (getParser().parseAbsoluteExpression(RegLo)) 1719 return false; 1720 1721 const bool isRBrace = getLexer().is(AsmToken::RBrac); 1722 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 1723 return false; 1724 Parser.Lex(); 1725 1726 if (isRBrace) { 1727 RegHi = RegLo; 1728 } else { 1729 if (getParser().parseAbsoluteExpression(RegHi)) 1730 return false; 1731 1732 if (getLexer().isNot(AsmToken::RBrac)) 1733 return false; 1734 Parser.Lex(); 1735 } 1736 RegNum = (unsigned) RegLo; 1737 RegWidth = (RegHi - RegLo) + 1; 1738 } 1739 } 1740 } else if (getLexer().is(AsmToken::LBrac)) { 1741 // List of consecutive registers: [s0,s1,s2,s3] 1742 Parser.Lex(); 1743 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 1744 return false; 1745 if (RegWidth != 1) 1746 return false; 1747 RegisterKind RegKind1; 1748 unsigned Reg1, RegNum1, RegWidth1; 1749 do { 1750 if (getLexer().is(AsmToken::Comma)) { 1751 Parser.Lex(); 1752 } else if (getLexer().is(AsmToken::RBrac)) { 1753 Parser.Lex(); 1754 break; 1755 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 1756 if (RegWidth1 != 1) { 1757 return false; 1758 } 1759 if (RegKind1 != RegKind) { 1760 return false; 1761 } 1762 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 1763 return false; 1764 } 1765 } else { 1766 return false; 1767 } 1768 } while (true); 1769 } else { 1770 return false; 1771 } 1772 switch (RegKind) { 1773 case IS_SPECIAL: 1774 RegNum = 0; 1775 RegWidth = 1; 1776 break; 1777 case IS_VGPR: 1778 case IS_SGPR: 1779 case IS_TTMP: 1780 { 1781 unsigned Size = 1; 1782 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 1783 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 1784 Size = std::min(RegWidth, 4u); 1785 } 1786 if (RegNum % Size != 0) 1787 return false; 1788 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 1789 RegNum = RegNum / Size; 1790 int RCID = getRegClass(RegKind, RegWidth); 1791 if (RCID == -1) 1792 return false; 1793 const MCRegisterClass RC = TRI->getRegClass(RCID); 1794 if (RegNum >= RC.getNumRegs()) 1795 return false; 1796 Reg = RC.getRegister(RegNum); 1797 break; 1798 } 1799 1800 default: 1801 llvm_unreachable("unexpected register kind"); 1802 } 1803 1804 if (!subtargetHasRegister(*TRI, Reg)) 1805 return false; 1806 return true; 1807 } 1808 1809 Optional<StringRef> 1810 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 1811 switch (RegKind) { 1812 case IS_VGPR: 1813 return StringRef(".amdgcn.next_free_vgpr"); 1814 case IS_SGPR: 1815 return StringRef(".amdgcn.next_free_sgpr"); 1816 default: 1817 return None; 1818 } 1819 } 1820 1821 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 1822 auto SymbolName = getGprCountSymbolName(RegKind); 1823 assert(SymbolName && "initializing invalid register kind"); 1824 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1825 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 1826 } 1827 1828 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 1829 unsigned DwordRegIndex, 1830 unsigned RegWidth) { 1831 // Symbols are only defined for GCN targets 1832 if (AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()).Major < 6) 1833 return true; 1834 1835 auto SymbolName = getGprCountSymbolName(RegKind); 1836 if (!SymbolName) 1837 return true; 1838 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1839 1840 int64_t NewMax = DwordRegIndex + RegWidth - 1; 1841 int64_t OldCount; 1842 1843 if (!Sym->isVariable()) 1844 return !Error(getParser().getTok().getLoc(), 1845 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 1846 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 1847 return !Error( 1848 getParser().getTok().getLoc(), 1849 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 1850 1851 if (OldCount <= NewMax) 1852 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 1853 1854 return true; 1855 } 1856 1857 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 1858 const auto &Tok = Parser.getTok(); 1859 SMLoc StartLoc = Tok.getLoc(); 1860 SMLoc EndLoc = Tok.getEndLoc(); 1861 RegisterKind RegKind; 1862 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 1863 1864 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 1865 return nullptr; 1866 } 1867 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1868 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 1869 return nullptr; 1870 } else 1871 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 1872 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false); 1873 } 1874 1875 bool 1876 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) { 1877 if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) && 1878 (getLexer().getKind() == AsmToken::Integer || 1879 getLexer().getKind() == AsmToken::Real)) { 1880 // This is a workaround for handling operands like these: 1881 // |1.0| 1882 // |-1| 1883 // This syntax is not compatible with syntax of standard 1884 // MC expressions (due to the trailing '|'). 1885 1886 SMLoc EndLoc; 1887 const MCExpr *Expr; 1888 1889 if (getParser().parsePrimaryExpr(Expr, EndLoc)) { 1890 return true; 1891 } 1892 1893 return !Expr->evaluateAsAbsolute(Val); 1894 } 1895 1896 return getParser().parseAbsoluteExpression(Val); 1897 } 1898 1899 OperandMatchResultTy 1900 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) { 1901 // TODO: add syntactic sugar for 1/(2*PI) 1902 bool Minus = false; 1903 if (getLexer().getKind() == AsmToken::Minus) { 1904 const AsmToken NextToken = getLexer().peekTok(); 1905 if (!NextToken.is(AsmToken::Integer) && 1906 !NextToken.is(AsmToken::Real)) { 1907 return MatchOperand_NoMatch; 1908 } 1909 Minus = true; 1910 Parser.Lex(); 1911 } 1912 1913 SMLoc S = Parser.getTok().getLoc(); 1914 switch(getLexer().getKind()) { 1915 case AsmToken::Integer: { 1916 int64_t IntVal; 1917 if (parseAbsoluteExpr(IntVal, AbsMod)) 1918 return MatchOperand_ParseFail; 1919 if (Minus) 1920 IntVal *= -1; 1921 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 1922 return MatchOperand_Success; 1923 } 1924 case AsmToken::Real: { 1925 int64_t IntVal; 1926 if (parseAbsoluteExpr(IntVal, AbsMod)) 1927 return MatchOperand_ParseFail; 1928 1929 APFloat F(BitsToDouble(IntVal)); 1930 if (Minus) 1931 F.changeSign(); 1932 Operands.push_back( 1933 AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S, 1934 AMDGPUOperand::ImmTyNone, true)); 1935 return MatchOperand_Success; 1936 } 1937 default: 1938 return MatchOperand_NoMatch; 1939 } 1940 } 1941 1942 OperandMatchResultTy 1943 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 1944 if (auto R = parseRegister()) { 1945 assert(R->isReg()); 1946 R->Reg.IsForcedVOP3 = isForcedVOP3(); 1947 Operands.push_back(std::move(R)); 1948 return MatchOperand_Success; 1949 } 1950 return MatchOperand_NoMatch; 1951 } 1952 1953 OperandMatchResultTy 1954 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) { 1955 auto res = parseImm(Operands, AbsMod); 1956 if (res != MatchOperand_NoMatch) { 1957 return res; 1958 } 1959 1960 return parseReg(Operands); 1961 } 1962 1963 OperandMatchResultTy 1964 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 1965 bool AllowImm) { 1966 bool Negate = false, Negate2 = false, Abs = false, Abs2 = false; 1967 1968 if (getLexer().getKind()== AsmToken::Minus) { 1969 const AsmToken NextToken = getLexer().peekTok(); 1970 1971 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 1972 if (NextToken.is(AsmToken::Minus)) { 1973 Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier"); 1974 return MatchOperand_ParseFail; 1975 } 1976 1977 // '-' followed by an integer literal N should be interpreted as integer 1978 // negation rather than a floating-point NEG modifier applied to N. 1979 // Beside being contr-intuitive, such use of floating-point NEG modifier 1980 // results in different meaning of integer literals used with VOP1/2/C 1981 // and VOP3, for example: 1982 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 1983 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 1984 // Negative fp literals should be handled likewise for unifomtity 1985 if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) { 1986 Parser.Lex(); 1987 Negate = true; 1988 } 1989 } 1990 1991 if (getLexer().getKind() == AsmToken::Identifier && 1992 Parser.getTok().getString() == "neg") { 1993 if (Negate) { 1994 Error(Parser.getTok().getLoc(), "expected register or immediate"); 1995 return MatchOperand_ParseFail; 1996 } 1997 Parser.Lex(); 1998 Negate2 = true; 1999 if (getLexer().isNot(AsmToken::LParen)) { 2000 Error(Parser.getTok().getLoc(), "expected left paren after neg"); 2001 return MatchOperand_ParseFail; 2002 } 2003 Parser.Lex(); 2004 } 2005 2006 if (getLexer().getKind() == AsmToken::Identifier && 2007 Parser.getTok().getString() == "abs") { 2008 Parser.Lex(); 2009 Abs2 = true; 2010 if (getLexer().isNot(AsmToken::LParen)) { 2011 Error(Parser.getTok().getLoc(), "expected left paren after abs"); 2012 return MatchOperand_ParseFail; 2013 } 2014 Parser.Lex(); 2015 } 2016 2017 if (getLexer().getKind() == AsmToken::Pipe) { 2018 if (Abs2) { 2019 Error(Parser.getTok().getLoc(), "expected register or immediate"); 2020 return MatchOperand_ParseFail; 2021 } 2022 Parser.Lex(); 2023 Abs = true; 2024 } 2025 2026 OperandMatchResultTy Res; 2027 if (AllowImm) { 2028 Res = parseRegOrImm(Operands, Abs); 2029 } else { 2030 Res = parseReg(Operands); 2031 } 2032 if (Res != MatchOperand_Success) { 2033 return Res; 2034 } 2035 2036 AMDGPUOperand::Modifiers Mods; 2037 if (Abs) { 2038 if (getLexer().getKind() != AsmToken::Pipe) { 2039 Error(Parser.getTok().getLoc(), "expected vertical bar"); 2040 return MatchOperand_ParseFail; 2041 } 2042 Parser.Lex(); 2043 Mods.Abs = true; 2044 } 2045 if (Abs2) { 2046 if (getLexer().isNot(AsmToken::RParen)) { 2047 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2048 return MatchOperand_ParseFail; 2049 } 2050 Parser.Lex(); 2051 Mods.Abs = true; 2052 } 2053 2054 if (Negate) { 2055 Mods.Neg = true; 2056 } else if (Negate2) { 2057 if (getLexer().isNot(AsmToken::RParen)) { 2058 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2059 return MatchOperand_ParseFail; 2060 } 2061 Parser.Lex(); 2062 Mods.Neg = true; 2063 } 2064 2065 if (Mods.hasFPModifiers()) { 2066 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2067 Op.setModifiers(Mods); 2068 } 2069 return MatchOperand_Success; 2070 } 2071 2072 OperandMatchResultTy 2073 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2074 bool AllowImm) { 2075 bool Sext = false; 2076 2077 if (getLexer().getKind() == AsmToken::Identifier && 2078 Parser.getTok().getString() == "sext") { 2079 Parser.Lex(); 2080 Sext = true; 2081 if (getLexer().isNot(AsmToken::LParen)) { 2082 Error(Parser.getTok().getLoc(), "expected left paren after sext"); 2083 return MatchOperand_ParseFail; 2084 } 2085 Parser.Lex(); 2086 } 2087 2088 OperandMatchResultTy Res; 2089 if (AllowImm) { 2090 Res = parseRegOrImm(Operands); 2091 } else { 2092 Res = parseReg(Operands); 2093 } 2094 if (Res != MatchOperand_Success) { 2095 return Res; 2096 } 2097 2098 AMDGPUOperand::Modifiers Mods; 2099 if (Sext) { 2100 if (getLexer().isNot(AsmToken::RParen)) { 2101 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2102 return MatchOperand_ParseFail; 2103 } 2104 Parser.Lex(); 2105 Mods.Sext = true; 2106 } 2107 2108 if (Mods.hasIntModifiers()) { 2109 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2110 Op.setModifiers(Mods); 2111 } 2112 2113 return MatchOperand_Success; 2114 } 2115 2116 OperandMatchResultTy 2117 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2118 return parseRegOrImmWithFPInputMods(Operands, false); 2119 } 2120 2121 OperandMatchResultTy 2122 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2123 return parseRegOrImmWithIntInputMods(Operands, false); 2124 } 2125 2126 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2127 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2128 if (Reg) { 2129 Operands.push_back(std::move(Reg)); 2130 return MatchOperand_Success; 2131 } 2132 2133 const AsmToken &Tok = Parser.getTok(); 2134 if (Tok.getString() == "off") { 2135 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(), 2136 AMDGPUOperand::ImmTyOff, false)); 2137 Parser.Lex(); 2138 return MatchOperand_Success; 2139 } 2140 2141 return MatchOperand_NoMatch; 2142 } 2143 2144 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2145 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2146 2147 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2148 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2149 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2150 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2151 return Match_InvalidOperand; 2152 2153 if ((TSFlags & SIInstrFlags::VOP3) && 2154 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2155 getForcedEncodingSize() != 64) 2156 return Match_PreferE32; 2157 2158 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2159 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2160 // v_mac_f32/16 allow only dst_sel == DWORD; 2161 auto OpNum = 2162 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2163 const auto &Op = Inst.getOperand(OpNum); 2164 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2165 return Match_InvalidOperand; 2166 } 2167 } 2168 2169 if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) { 2170 // FIXME: Produces error without correct column reported. 2171 auto OpNum = 2172 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset); 2173 const auto &Op = Inst.getOperand(OpNum); 2174 if (Op.getImm() != 0) 2175 return Match_InvalidOperand; 2176 } 2177 2178 return Match_Success; 2179 } 2180 2181 // What asm variants we should check 2182 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2183 if (getForcedEncodingSize() == 32) { 2184 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2185 return makeArrayRef(Variants); 2186 } 2187 2188 if (isForcedVOP3()) { 2189 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2190 return makeArrayRef(Variants); 2191 } 2192 2193 if (isForcedSDWA()) { 2194 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2195 AMDGPUAsmVariants::SDWA9}; 2196 return makeArrayRef(Variants); 2197 } 2198 2199 if (isForcedDPP()) { 2200 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2201 return makeArrayRef(Variants); 2202 } 2203 2204 static const unsigned Variants[] = { 2205 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2206 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2207 }; 2208 2209 return makeArrayRef(Variants); 2210 } 2211 2212 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2213 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2214 const unsigned Num = Desc.getNumImplicitUses(); 2215 for (unsigned i = 0; i < Num; ++i) { 2216 unsigned Reg = Desc.ImplicitUses[i]; 2217 switch (Reg) { 2218 case AMDGPU::FLAT_SCR: 2219 case AMDGPU::VCC: 2220 case AMDGPU::M0: 2221 return Reg; 2222 default: 2223 break; 2224 } 2225 } 2226 return AMDGPU::NoRegister; 2227 } 2228 2229 // NB: This code is correct only when used to check constant 2230 // bus limitations because GFX7 support no f16 inline constants. 2231 // Note that there are no cases when a GFX7 opcode violates 2232 // constant bus limitations due to the use of an f16 constant. 2233 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2234 unsigned OpIdx) const { 2235 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2236 2237 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2238 return false; 2239 } 2240 2241 const MCOperand &MO = Inst.getOperand(OpIdx); 2242 2243 int64_t Val = MO.getImm(); 2244 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2245 2246 switch (OpSize) { // expected operand size 2247 case 8: 2248 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2249 case 4: 2250 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2251 case 2: { 2252 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2253 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2254 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { 2255 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2256 } else { 2257 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2258 } 2259 } 2260 default: 2261 llvm_unreachable("invalid operand size"); 2262 } 2263 } 2264 2265 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2266 const MCOperand &MO = Inst.getOperand(OpIdx); 2267 if (MO.isImm()) { 2268 return !isInlineConstant(Inst, OpIdx); 2269 } 2270 return !MO.isReg() || 2271 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); 2272 } 2273 2274 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2275 const unsigned Opcode = Inst.getOpcode(); 2276 const MCInstrDesc &Desc = MII.get(Opcode); 2277 unsigned ConstantBusUseCount = 0; 2278 2279 if (Desc.TSFlags & 2280 (SIInstrFlags::VOPC | 2281 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2282 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2283 SIInstrFlags::SDWA)) { 2284 // Check special imm operands (used by madmk, etc) 2285 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2286 ++ConstantBusUseCount; 2287 } 2288 2289 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2290 if (SGPRUsed != AMDGPU::NoRegister) { 2291 ++ConstantBusUseCount; 2292 } 2293 2294 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2295 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2296 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2297 2298 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2299 2300 for (int OpIdx : OpIndices) { 2301 if (OpIdx == -1) break; 2302 2303 const MCOperand &MO = Inst.getOperand(OpIdx); 2304 if (usesConstantBus(Inst, OpIdx)) { 2305 if (MO.isReg()) { 2306 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2307 // Pairs of registers with a partial intersections like these 2308 // s0, s[0:1] 2309 // flat_scratch_lo, flat_scratch 2310 // flat_scratch_lo, flat_scratch_hi 2311 // are theoretically valid but they are disabled anyway. 2312 // Note that this code mimics SIInstrInfo::verifyInstruction 2313 if (Reg != SGPRUsed) { 2314 ++ConstantBusUseCount; 2315 } 2316 SGPRUsed = Reg; 2317 } else { // Expression or a literal 2318 ++ConstantBusUseCount; 2319 } 2320 } 2321 } 2322 } 2323 2324 return ConstantBusUseCount <= 1; 2325 } 2326 2327 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2328 const unsigned Opcode = Inst.getOpcode(); 2329 const MCInstrDesc &Desc = MII.get(Opcode); 2330 2331 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2332 if (DstIdx == -1 || 2333 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2334 return true; 2335 } 2336 2337 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2338 2339 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2340 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2341 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2342 2343 assert(DstIdx != -1); 2344 const MCOperand &Dst = Inst.getOperand(DstIdx); 2345 assert(Dst.isReg()); 2346 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2347 2348 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2349 2350 for (int SrcIdx : SrcIndices) { 2351 if (SrcIdx == -1) break; 2352 const MCOperand &Src = Inst.getOperand(SrcIdx); 2353 if (Src.isReg()) { 2354 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2355 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2356 return false; 2357 } 2358 } 2359 } 2360 2361 return true; 2362 } 2363 2364 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2365 2366 const unsigned Opc = Inst.getOpcode(); 2367 const MCInstrDesc &Desc = MII.get(Opc); 2368 2369 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2370 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2371 assert(ClampIdx != -1); 2372 return Inst.getOperand(ClampIdx).getImm() == 0; 2373 } 2374 2375 return true; 2376 } 2377 2378 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2379 2380 const unsigned Opc = Inst.getOpcode(); 2381 const MCInstrDesc &Desc = MII.get(Opc); 2382 2383 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2384 return true; 2385 2386 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2387 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2388 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2389 2390 assert(VDataIdx != -1); 2391 assert(DMaskIdx != -1); 2392 assert(TFEIdx != -1); 2393 2394 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2395 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2396 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2397 if (DMask == 0) 2398 DMask = 1; 2399 2400 unsigned DataSize = 2401 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2402 if (hasPackedD16()) { 2403 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2404 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2405 DataSize = (DataSize + 1) / 2; 2406 } 2407 2408 return (VDataSize / 4) == DataSize + TFESize; 2409 } 2410 2411 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2412 2413 const unsigned Opc = Inst.getOpcode(); 2414 const MCInstrDesc &Desc = MII.get(Opc); 2415 2416 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2417 return true; 2418 if (!Desc.mayLoad() || !Desc.mayStore()) 2419 return true; // Not atomic 2420 2421 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2422 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2423 2424 // This is an incomplete check because image_atomic_cmpswap 2425 // may only use 0x3 and 0xf while other atomic operations 2426 // may use 0x1 and 0x3. However these limitations are 2427 // verified when we check that dmask matches dst size. 2428 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2429 } 2430 2431 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2432 2433 const unsigned Opc = Inst.getOpcode(); 2434 const MCInstrDesc &Desc = MII.get(Opc); 2435 2436 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2437 return true; 2438 2439 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2440 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2441 2442 // GATHER4 instructions use dmask in a different fashion compared to 2443 // other MIMG instructions. The only useful DMASK values are 2444 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2445 // (red,red,red,red) etc.) The ISA document doesn't mention 2446 // this. 2447 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2448 } 2449 2450 bool AMDGPUAsmParser::validateMIMGR128(const MCInst &Inst) { 2451 2452 const unsigned Opc = Inst.getOpcode(); 2453 const MCInstrDesc &Desc = MII.get(Opc); 2454 2455 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2456 return true; 2457 2458 int Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128); 2459 assert(Idx != -1); 2460 2461 bool R128 = (Inst.getOperand(Idx).getImm() != 0); 2462 2463 return !R128 || hasMIMG_R128(); 2464 } 2465 2466 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2467 2468 const unsigned Opc = Inst.getOpcode(); 2469 const MCInstrDesc &Desc = MII.get(Opc); 2470 2471 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2472 return true; 2473 2474 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2475 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2476 if (isCI() || isSI()) 2477 return false; 2478 } 2479 2480 return true; 2481 } 2482 2483 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 2484 const SMLoc &IDLoc) { 2485 if (!validateConstantBusLimitations(Inst)) { 2486 Error(IDLoc, 2487 "invalid operand (violates constant bus restrictions)"); 2488 return false; 2489 } 2490 if (!validateEarlyClobberLimitations(Inst)) { 2491 Error(IDLoc, 2492 "destination must be different than all sources"); 2493 return false; 2494 } 2495 if (!validateIntClampSupported(Inst)) { 2496 Error(IDLoc, 2497 "integer clamping is not supported on this GPU"); 2498 return false; 2499 } 2500 if (!validateMIMGR128(Inst)) { 2501 Error(IDLoc, 2502 "r128 modifier is not supported on this GPU"); 2503 return false; 2504 } 2505 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 2506 if (!validateMIMGD16(Inst)) { 2507 Error(IDLoc, 2508 "d16 modifier is not supported on this GPU"); 2509 return false; 2510 } 2511 if (!validateMIMGDataSize(Inst)) { 2512 Error(IDLoc, 2513 "image data size does not match dmask and tfe"); 2514 return false; 2515 } 2516 if (!validateMIMGAtomicDMask(Inst)) { 2517 Error(IDLoc, 2518 "invalid atomic image dmask"); 2519 return false; 2520 } 2521 if (!validateMIMGGatherDMask(Inst)) { 2522 Error(IDLoc, 2523 "invalid image_gather dmask: only one bit must be set"); 2524 return false; 2525 } 2526 2527 return true; 2528 } 2529 2530 static std::string AMDGPUMnemonicSpellCheck(StringRef S, uint64_t FBS, 2531 unsigned VariantID = 0); 2532 2533 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 2534 OperandVector &Operands, 2535 MCStreamer &Out, 2536 uint64_t &ErrorInfo, 2537 bool MatchingInlineAsm) { 2538 MCInst Inst; 2539 unsigned Result = Match_Success; 2540 for (auto Variant : getMatchedVariants()) { 2541 uint64_t EI; 2542 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 2543 Variant); 2544 // We order match statuses from least to most specific. We use most specific 2545 // status as resulting 2546 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 2547 if ((R == Match_Success) || 2548 (R == Match_PreferE32) || 2549 (R == Match_MissingFeature && Result != Match_PreferE32) || 2550 (R == Match_InvalidOperand && Result != Match_MissingFeature 2551 && Result != Match_PreferE32) || 2552 (R == Match_MnemonicFail && Result != Match_InvalidOperand 2553 && Result != Match_MissingFeature 2554 && Result != Match_PreferE32)) { 2555 Result = R; 2556 ErrorInfo = EI; 2557 } 2558 if (R == Match_Success) 2559 break; 2560 } 2561 2562 switch (Result) { 2563 default: break; 2564 case Match_Success: 2565 if (!validateInstruction(Inst, IDLoc)) { 2566 return true; 2567 } 2568 Inst.setLoc(IDLoc); 2569 Out.EmitInstruction(Inst, getSTI()); 2570 return false; 2571 2572 case Match_MissingFeature: 2573 return Error(IDLoc, "instruction not supported on this GPU"); 2574 2575 case Match_MnemonicFail: { 2576 uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 2577 std::string Suggestion = AMDGPUMnemonicSpellCheck( 2578 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 2579 return Error(IDLoc, "invalid instruction" + Suggestion, 2580 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 2581 } 2582 2583 case Match_InvalidOperand: { 2584 SMLoc ErrorLoc = IDLoc; 2585 if (ErrorInfo != ~0ULL) { 2586 if (ErrorInfo >= Operands.size()) { 2587 return Error(IDLoc, "too few operands for instruction"); 2588 } 2589 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 2590 if (ErrorLoc == SMLoc()) 2591 ErrorLoc = IDLoc; 2592 } 2593 return Error(ErrorLoc, "invalid operand for instruction"); 2594 } 2595 2596 case Match_PreferE32: 2597 return Error(IDLoc, "internal error: instruction without _e64 suffix " 2598 "should be encoded as e32"); 2599 } 2600 llvm_unreachable("Implement any new match types added!"); 2601 } 2602 2603 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 2604 int64_t Tmp = -1; 2605 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 2606 return true; 2607 } 2608 if (getParser().parseAbsoluteExpression(Tmp)) { 2609 return true; 2610 } 2611 Ret = static_cast<uint32_t>(Tmp); 2612 return false; 2613 } 2614 2615 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 2616 uint32_t &Minor) { 2617 if (ParseAsAbsoluteExpression(Major)) 2618 return TokError("invalid major version"); 2619 2620 if (getLexer().isNot(AsmToken::Comma)) 2621 return TokError("minor version number required, comma expected"); 2622 Lex(); 2623 2624 if (ParseAsAbsoluteExpression(Minor)) 2625 return TokError("invalid minor version"); 2626 2627 return false; 2628 } 2629 2630 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 2631 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2632 return TokError("directive only supported for amdgcn architecture"); 2633 2634 std::string Target; 2635 2636 SMLoc TargetStart = getTok().getLoc(); 2637 if (getParser().parseEscapedString(Target)) 2638 return true; 2639 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 2640 2641 std::string ExpectedTarget; 2642 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 2643 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 2644 2645 if (Target != ExpectedTargetOS.str()) 2646 return getParser().Error(TargetRange.Start, "target must match options", 2647 TargetRange); 2648 2649 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 2650 return false; 2651 } 2652 2653 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 2654 return getParser().Error(Range.Start, "value out of range", Range); 2655 } 2656 2657 bool AMDGPUAsmParser::calculateGPRBlocks( 2658 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 2659 bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange, 2660 unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks, 2661 unsigned &SGPRBlocks) { 2662 // TODO(scott.linder): These calculations are duplicated from 2663 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 2664 IsaInfo::IsaVersion Version = IsaInfo::getIsaVersion(Features); 2665 2666 unsigned NumVGPRs = NextFreeVGPR; 2667 unsigned NumSGPRs = NextFreeSGPR; 2668 unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(Features); 2669 2670 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 2671 NumSGPRs > MaxAddressableNumSGPRs) 2672 return OutOfRangeError(SGPRRange); 2673 2674 NumSGPRs += 2675 IsaInfo::getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, XNACKUsed); 2676 2677 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 2678 NumSGPRs > MaxAddressableNumSGPRs) 2679 return OutOfRangeError(SGPRRange); 2680 2681 if (Features.test(FeatureSGPRInitBug)) 2682 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 2683 2684 VGPRBlocks = IsaInfo::getNumVGPRBlocks(Features, NumVGPRs); 2685 SGPRBlocks = IsaInfo::getNumSGPRBlocks(Features, NumSGPRs); 2686 2687 return false; 2688 } 2689 2690 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 2691 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2692 return TokError("directive only supported for amdgcn architecture"); 2693 2694 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 2695 return TokError("directive only supported for amdhsa OS"); 2696 2697 StringRef KernelName; 2698 if (getParser().parseIdentifier(KernelName)) 2699 return true; 2700 2701 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(); 2702 2703 StringSet<> Seen; 2704 2705 IsaInfo::IsaVersion IVersion = 2706 IsaInfo::getIsaVersion(getSTI().getFeatureBits()); 2707 2708 SMRange VGPRRange; 2709 uint64_t NextFreeVGPR = 0; 2710 SMRange SGPRRange; 2711 uint64_t NextFreeSGPR = 0; 2712 unsigned UserSGPRCount = 0; 2713 bool ReserveVCC = true; 2714 bool ReserveFlatScr = true; 2715 bool ReserveXNACK = hasXNACK(); 2716 2717 while (true) { 2718 while (getLexer().is(AsmToken::EndOfStatement)) 2719 Lex(); 2720 2721 if (getLexer().isNot(AsmToken::Identifier)) 2722 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 2723 2724 StringRef ID = getTok().getIdentifier(); 2725 SMRange IDRange = getTok().getLocRange(); 2726 Lex(); 2727 2728 if (ID == ".end_amdhsa_kernel") 2729 break; 2730 2731 if (Seen.find(ID) != Seen.end()) 2732 return TokError(".amdhsa_ directives cannot be repeated"); 2733 Seen.insert(ID); 2734 2735 SMLoc ValStart = getTok().getLoc(); 2736 int64_t IVal; 2737 if (getParser().parseAbsoluteExpression(IVal)) 2738 return true; 2739 SMLoc ValEnd = getTok().getLoc(); 2740 SMRange ValRange = SMRange(ValStart, ValEnd); 2741 2742 if (IVal < 0) 2743 return OutOfRangeError(ValRange); 2744 2745 uint64_t Val = IVal; 2746 2747 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 2748 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 2749 return OutOfRangeError(RANGE); \ 2750 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 2751 2752 if (ID == ".amdhsa_group_segment_fixed_size") { 2753 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 2754 return OutOfRangeError(ValRange); 2755 KD.group_segment_fixed_size = Val; 2756 } else if (ID == ".amdhsa_private_segment_fixed_size") { 2757 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 2758 return OutOfRangeError(ValRange); 2759 KD.private_segment_fixed_size = Val; 2760 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 2761 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2762 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 2763 Val, ValRange); 2764 UserSGPRCount++; 2765 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 2766 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2767 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 2768 ValRange); 2769 UserSGPRCount++; 2770 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 2771 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2772 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 2773 ValRange); 2774 UserSGPRCount++; 2775 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 2776 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2777 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 2778 Val, ValRange); 2779 UserSGPRCount++; 2780 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 2781 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2782 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 2783 ValRange); 2784 UserSGPRCount++; 2785 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 2786 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2787 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 2788 ValRange); 2789 UserSGPRCount++; 2790 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 2791 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2792 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 2793 Val, ValRange); 2794 UserSGPRCount++; 2795 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 2796 PARSE_BITS_ENTRY( 2797 KD.compute_pgm_rsrc2, 2798 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 2799 ValRange); 2800 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 2801 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2802 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 2803 ValRange); 2804 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 2805 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2806 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 2807 ValRange); 2808 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 2809 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2810 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 2811 ValRange); 2812 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 2813 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2814 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 2815 ValRange); 2816 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 2817 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2818 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 2819 ValRange); 2820 } else if (ID == ".amdhsa_next_free_vgpr") { 2821 VGPRRange = ValRange; 2822 NextFreeVGPR = Val; 2823 } else if (ID == ".amdhsa_next_free_sgpr") { 2824 SGPRRange = ValRange; 2825 NextFreeSGPR = Val; 2826 } else if (ID == ".amdhsa_reserve_vcc") { 2827 if (!isUInt<1>(Val)) 2828 return OutOfRangeError(ValRange); 2829 ReserveVCC = Val; 2830 } else if (ID == ".amdhsa_reserve_flat_scratch") { 2831 if (IVersion.Major < 7) 2832 return getParser().Error(IDRange.Start, "directive requires gfx7+", 2833 IDRange); 2834 if (!isUInt<1>(Val)) 2835 return OutOfRangeError(ValRange); 2836 ReserveFlatScr = Val; 2837 } else if (ID == ".amdhsa_reserve_xnack_mask") { 2838 if (IVersion.Major < 8) 2839 return getParser().Error(IDRange.Start, "directive requires gfx8+", 2840 IDRange); 2841 if (!isUInt<1>(Val)) 2842 return OutOfRangeError(ValRange); 2843 ReserveXNACK = Val; 2844 } else if (ID == ".amdhsa_float_round_mode_32") { 2845 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2846 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 2847 } else if (ID == ".amdhsa_float_round_mode_16_64") { 2848 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2849 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 2850 } else if (ID == ".amdhsa_float_denorm_mode_32") { 2851 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2852 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 2853 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 2854 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2855 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 2856 ValRange); 2857 } else if (ID == ".amdhsa_dx10_clamp") { 2858 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2859 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 2860 } else if (ID == ".amdhsa_ieee_mode") { 2861 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 2862 Val, ValRange); 2863 } else if (ID == ".amdhsa_fp16_overflow") { 2864 if (IVersion.Major < 9) 2865 return getParser().Error(IDRange.Start, "directive requires gfx9+", 2866 IDRange); 2867 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 2868 ValRange); 2869 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 2870 PARSE_BITS_ENTRY( 2871 KD.compute_pgm_rsrc2, 2872 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 2873 ValRange); 2874 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 2875 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2876 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 2877 Val, ValRange); 2878 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 2879 PARSE_BITS_ENTRY( 2880 KD.compute_pgm_rsrc2, 2881 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 2882 ValRange); 2883 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 2884 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2885 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 2886 Val, ValRange); 2887 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 2888 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2889 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 2890 Val, ValRange); 2891 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 2892 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2893 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 2894 Val, ValRange); 2895 } else if (ID == ".amdhsa_exception_int_div_zero") { 2896 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2897 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 2898 Val, ValRange); 2899 } else { 2900 return getParser().Error(IDRange.Start, 2901 "unknown .amdhsa_kernel directive", IDRange); 2902 } 2903 2904 #undef PARSE_BITS_ENTRY 2905 } 2906 2907 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 2908 return TokError(".amdhsa_next_free_vgpr directive is required"); 2909 2910 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 2911 return TokError(".amdhsa_next_free_sgpr directive is required"); 2912 2913 unsigned VGPRBlocks; 2914 unsigned SGPRBlocks; 2915 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 2916 ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR, 2917 SGPRRange, VGPRBlocks, SGPRBlocks)) 2918 return true; 2919 2920 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 2921 VGPRBlocks)) 2922 return OutOfRangeError(VGPRRange); 2923 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 2924 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 2925 2926 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 2927 SGPRBlocks)) 2928 return OutOfRangeError(SGPRRange); 2929 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 2930 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 2931 SGPRBlocks); 2932 2933 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 2934 return TokError("too many user SGPRs enabled"); 2935 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 2936 UserSGPRCount); 2937 2938 getTargetStreamer().EmitAmdhsaKernelDescriptor( 2939 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 2940 ReserveFlatScr, ReserveXNACK); 2941 return false; 2942 } 2943 2944 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 2945 uint32_t Major; 2946 uint32_t Minor; 2947 2948 if (ParseDirectiveMajorMinor(Major, Minor)) 2949 return true; 2950 2951 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 2952 return false; 2953 } 2954 2955 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 2956 uint32_t Major; 2957 uint32_t Minor; 2958 uint32_t Stepping; 2959 StringRef VendorName; 2960 StringRef ArchName; 2961 2962 // If this directive has no arguments, then use the ISA version for the 2963 // targeted GPU. 2964 if (getLexer().is(AsmToken::EndOfStatement)) { 2965 AMDGPU::IsaInfo::IsaVersion ISA = 2966 AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); 2967 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 2968 ISA.Stepping, 2969 "AMD", "AMDGPU"); 2970 return false; 2971 } 2972 2973 if (ParseDirectiveMajorMinor(Major, Minor)) 2974 return true; 2975 2976 if (getLexer().isNot(AsmToken::Comma)) 2977 return TokError("stepping version number required, comma expected"); 2978 Lex(); 2979 2980 if (ParseAsAbsoluteExpression(Stepping)) 2981 return TokError("invalid stepping version"); 2982 2983 if (getLexer().isNot(AsmToken::Comma)) 2984 return TokError("vendor name required, comma expected"); 2985 Lex(); 2986 2987 if (getLexer().isNot(AsmToken::String)) 2988 return TokError("invalid vendor name"); 2989 2990 VendorName = getLexer().getTok().getStringContents(); 2991 Lex(); 2992 2993 if (getLexer().isNot(AsmToken::Comma)) 2994 return TokError("arch name required, comma expected"); 2995 Lex(); 2996 2997 if (getLexer().isNot(AsmToken::String)) 2998 return TokError("invalid arch name"); 2999 3000 ArchName = getLexer().getTok().getStringContents(); 3001 Lex(); 3002 3003 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3004 VendorName, ArchName); 3005 return false; 3006 } 3007 3008 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3009 amd_kernel_code_t &Header) { 3010 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3011 // assembly for backwards compatibility. 3012 if (ID == "max_scratch_backing_memory_byte_size") { 3013 Parser.eatToEndOfStatement(); 3014 return false; 3015 } 3016 3017 SmallString<40> ErrStr; 3018 raw_svector_ostream Err(ErrStr); 3019 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 3020 return TokError(Err.str()); 3021 } 3022 Lex(); 3023 return false; 3024 } 3025 3026 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3027 amd_kernel_code_t Header; 3028 AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits()); 3029 3030 while (true) { 3031 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3032 // will set the current token to EndOfStatement. 3033 while(getLexer().is(AsmToken::EndOfStatement)) 3034 Lex(); 3035 3036 if (getLexer().isNot(AsmToken::Identifier)) 3037 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3038 3039 StringRef ID = getLexer().getTok().getIdentifier(); 3040 Lex(); 3041 3042 if (ID == ".end_amd_kernel_code_t") 3043 break; 3044 3045 if (ParseAMDKernelCodeTValue(ID, Header)) 3046 return true; 3047 } 3048 3049 getTargetStreamer().EmitAMDKernelCodeT(Header); 3050 3051 return false; 3052 } 3053 3054 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 3055 if (getLexer().isNot(AsmToken::Identifier)) 3056 return TokError("expected symbol name"); 3057 3058 StringRef KernelName = Parser.getTok().getString(); 3059 3060 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 3061 ELF::STT_AMDGPU_HSA_KERNEL); 3062 Lex(); 3063 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 3064 KernelScope.initialize(getContext()); 3065 return false; 3066 } 3067 3068 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 3069 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 3070 return Error(getParser().getTok().getLoc(), 3071 ".amd_amdgpu_isa directive is not available on non-amdgcn " 3072 "architectures"); 3073 } 3074 3075 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 3076 3077 std::string ISAVersionStringFromSTI; 3078 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 3079 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 3080 3081 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 3082 return Error(getParser().getTok().getLoc(), 3083 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 3084 "arguments specified through the command line"); 3085 } 3086 3087 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 3088 Lex(); 3089 3090 return false; 3091 } 3092 3093 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 3094 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 3095 return Error(getParser().getTok().getLoc(), 3096 (Twine(HSAMD::AssemblerDirectiveBegin) + Twine(" directive is " 3097 "not available on non-amdhsa OSes")).str()); 3098 } 3099 3100 std::string HSAMetadataString; 3101 raw_string_ostream YamlStream(HSAMetadataString); 3102 3103 getLexer().setSkipSpace(false); 3104 3105 bool FoundEnd = false; 3106 while (!getLexer().is(AsmToken::Eof)) { 3107 while (getLexer().is(AsmToken::Space)) { 3108 YamlStream << getLexer().getTok().getString(); 3109 Lex(); 3110 } 3111 3112 if (getLexer().is(AsmToken::Identifier)) { 3113 StringRef ID = getLexer().getTok().getIdentifier(); 3114 if (ID == AMDGPU::HSAMD::AssemblerDirectiveEnd) { 3115 Lex(); 3116 FoundEnd = true; 3117 break; 3118 } 3119 } 3120 3121 YamlStream << Parser.parseStringToEndOfStatement() 3122 << getContext().getAsmInfo()->getSeparatorString(); 3123 3124 Parser.eatToEndOfStatement(); 3125 } 3126 3127 getLexer().setSkipSpace(true); 3128 3129 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 3130 return TokError(Twine("expected directive ") + 3131 Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found")); 3132 } 3133 3134 YamlStream.flush(); 3135 3136 if (!getTargetStreamer().EmitHSAMetadata(HSAMetadataString)) 3137 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3138 3139 return false; 3140 } 3141 3142 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 3143 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 3144 return Error(getParser().getTok().getLoc(), 3145 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 3146 "not available on non-amdpal OSes")).str()); 3147 } 3148 3149 PALMD::Metadata PALMetadata; 3150 for (;;) { 3151 uint32_t Value; 3152 if (ParseAsAbsoluteExpression(Value)) { 3153 return TokError(Twine("invalid value in ") + 3154 Twine(PALMD::AssemblerDirective)); 3155 } 3156 PALMetadata.push_back(Value); 3157 if (getLexer().isNot(AsmToken::Comma)) 3158 break; 3159 Lex(); 3160 } 3161 getTargetStreamer().EmitPALMetadata(PALMetadata); 3162 return false; 3163 } 3164 3165 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 3166 StringRef IDVal = DirectiveID.getString(); 3167 3168 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 3169 if (IDVal == ".amdgcn_target") 3170 return ParseDirectiveAMDGCNTarget(); 3171 3172 if (IDVal == ".amdhsa_kernel") 3173 return ParseDirectiveAMDHSAKernel(); 3174 } else { 3175 if (IDVal == ".hsa_code_object_version") 3176 return ParseDirectiveHSACodeObjectVersion(); 3177 3178 if (IDVal == ".hsa_code_object_isa") 3179 return ParseDirectiveHSACodeObjectISA(); 3180 3181 if (IDVal == ".amd_kernel_code_t") 3182 return ParseDirectiveAMDKernelCodeT(); 3183 3184 if (IDVal == ".amdgpu_hsa_kernel") 3185 return ParseDirectiveAMDGPUHsaKernel(); 3186 3187 if (IDVal == ".amd_amdgpu_isa") 3188 return ParseDirectiveISAVersion(); 3189 } 3190 3191 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 3192 return ParseDirectiveHSAMetadata(); 3193 3194 if (IDVal == PALMD::AssemblerDirective) 3195 return ParseDirectivePALMetadata(); 3196 3197 return true; 3198 } 3199 3200 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 3201 unsigned RegNo) const { 3202 3203 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 3204 R.isValid(); ++R) { 3205 if (*R == RegNo) 3206 return isGFX9(); 3207 } 3208 3209 switch (RegNo) { 3210 case AMDGPU::TBA: 3211 case AMDGPU::TBA_LO: 3212 case AMDGPU::TBA_HI: 3213 case AMDGPU::TMA: 3214 case AMDGPU::TMA_LO: 3215 case AMDGPU::TMA_HI: 3216 return !isGFX9(); 3217 case AMDGPU::XNACK_MASK: 3218 case AMDGPU::XNACK_MASK_LO: 3219 case AMDGPU::XNACK_MASK_HI: 3220 return !isCI() && !isSI() && hasXNACK(); 3221 default: 3222 break; 3223 } 3224 3225 if (isCI()) 3226 return true; 3227 3228 if (isSI()) { 3229 // No flat_scr 3230 switch (RegNo) { 3231 case AMDGPU::FLAT_SCR: 3232 case AMDGPU::FLAT_SCR_LO: 3233 case AMDGPU::FLAT_SCR_HI: 3234 return false; 3235 default: 3236 return true; 3237 } 3238 } 3239 3240 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 3241 // SI/CI have. 3242 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 3243 R.isValid(); ++R) { 3244 if (*R == RegNo) 3245 return false; 3246 } 3247 3248 return true; 3249 } 3250 3251 OperandMatchResultTy 3252 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { 3253 // Try to parse with a custom parser 3254 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 3255 3256 // If we successfully parsed the operand or if there as an error parsing, 3257 // we are done. 3258 // 3259 // If we are parsing after we reach EndOfStatement then this means we 3260 // are appending default values to the Operands list. This is only done 3261 // by custom parser, so we shouldn't continue on to the generic parsing. 3262 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 3263 getLexer().is(AsmToken::EndOfStatement)) 3264 return ResTy; 3265 3266 ResTy = parseRegOrImm(Operands); 3267 3268 if (ResTy == MatchOperand_Success) 3269 return ResTy; 3270 3271 const auto &Tok = Parser.getTok(); 3272 SMLoc S = Tok.getLoc(); 3273 3274 const MCExpr *Expr = nullptr; 3275 if (!Parser.parseExpression(Expr)) { 3276 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 3277 return MatchOperand_Success; 3278 } 3279 3280 // Possibly this is an instruction flag like 'gds'. 3281 if (Tok.getKind() == AsmToken::Identifier) { 3282 Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S)); 3283 Parser.Lex(); 3284 return MatchOperand_Success; 3285 } 3286 3287 return MatchOperand_NoMatch; 3288 } 3289 3290 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 3291 // Clear any forced encodings from the previous instruction. 3292 setForcedEncodingSize(0); 3293 setForcedDPP(false); 3294 setForcedSDWA(false); 3295 3296 if (Name.endswith("_e64")) { 3297 setForcedEncodingSize(64); 3298 return Name.substr(0, Name.size() - 4); 3299 } else if (Name.endswith("_e32")) { 3300 setForcedEncodingSize(32); 3301 return Name.substr(0, Name.size() - 4); 3302 } else if (Name.endswith("_dpp")) { 3303 setForcedDPP(true); 3304 return Name.substr(0, Name.size() - 4); 3305 } else if (Name.endswith("_sdwa")) { 3306 setForcedSDWA(true); 3307 return Name.substr(0, Name.size() - 5); 3308 } 3309 return Name; 3310 } 3311 3312 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 3313 StringRef Name, 3314 SMLoc NameLoc, OperandVector &Operands) { 3315 // Add the instruction mnemonic 3316 Name = parseMnemonicSuffix(Name); 3317 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 3318 3319 while (!getLexer().is(AsmToken::EndOfStatement)) { 3320 OperandMatchResultTy Res = parseOperand(Operands, Name); 3321 3322 // Eat the comma or space if there is one. 3323 if (getLexer().is(AsmToken::Comma)) 3324 Parser.Lex(); 3325 3326 switch (Res) { 3327 case MatchOperand_Success: break; 3328 case MatchOperand_ParseFail: 3329 Error(getLexer().getLoc(), "failed parsing operand."); 3330 while (!getLexer().is(AsmToken::EndOfStatement)) { 3331 Parser.Lex(); 3332 } 3333 return true; 3334 case MatchOperand_NoMatch: 3335 Error(getLexer().getLoc(), "not a valid operand."); 3336 while (!getLexer().is(AsmToken::EndOfStatement)) { 3337 Parser.Lex(); 3338 } 3339 return true; 3340 } 3341 } 3342 3343 return false; 3344 } 3345 3346 //===----------------------------------------------------------------------===// 3347 // Utility functions 3348 //===----------------------------------------------------------------------===// 3349 3350 OperandMatchResultTy 3351 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) { 3352 switch(getLexer().getKind()) { 3353 default: return MatchOperand_NoMatch; 3354 case AsmToken::Identifier: { 3355 StringRef Name = Parser.getTok().getString(); 3356 if (!Name.equals(Prefix)) { 3357 return MatchOperand_NoMatch; 3358 } 3359 3360 Parser.Lex(); 3361 if (getLexer().isNot(AsmToken::Colon)) 3362 return MatchOperand_ParseFail; 3363 3364 Parser.Lex(); 3365 3366 bool IsMinus = false; 3367 if (getLexer().getKind() == AsmToken::Minus) { 3368 Parser.Lex(); 3369 IsMinus = true; 3370 } 3371 3372 if (getLexer().isNot(AsmToken::Integer)) 3373 return MatchOperand_ParseFail; 3374 3375 if (getParser().parseAbsoluteExpression(Int)) 3376 return MatchOperand_ParseFail; 3377 3378 if (IsMinus) 3379 Int = -Int; 3380 break; 3381 } 3382 } 3383 return MatchOperand_Success; 3384 } 3385 3386 OperandMatchResultTy 3387 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 3388 AMDGPUOperand::ImmTy ImmTy, 3389 bool (*ConvertResult)(int64_t&)) { 3390 SMLoc S = Parser.getTok().getLoc(); 3391 int64_t Value = 0; 3392 3393 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 3394 if (Res != MatchOperand_Success) 3395 return Res; 3396 3397 if (ConvertResult && !ConvertResult(Value)) { 3398 return MatchOperand_ParseFail; 3399 } 3400 3401 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 3402 return MatchOperand_Success; 3403 } 3404 3405 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix( 3406 const char *Prefix, 3407 OperandVector &Operands, 3408 AMDGPUOperand::ImmTy ImmTy, 3409 bool (*ConvertResult)(int64_t&)) { 3410 StringRef Name = Parser.getTok().getString(); 3411 if (!Name.equals(Prefix)) 3412 return MatchOperand_NoMatch; 3413 3414 Parser.Lex(); 3415 if (getLexer().isNot(AsmToken::Colon)) 3416 return MatchOperand_ParseFail; 3417 3418 Parser.Lex(); 3419 if (getLexer().isNot(AsmToken::LBrac)) 3420 return MatchOperand_ParseFail; 3421 Parser.Lex(); 3422 3423 unsigned Val = 0; 3424 SMLoc S = Parser.getTok().getLoc(); 3425 3426 // FIXME: How to verify the number of elements matches the number of src 3427 // operands? 3428 for (int I = 0; I < 4; ++I) { 3429 if (I != 0) { 3430 if (getLexer().is(AsmToken::RBrac)) 3431 break; 3432 3433 if (getLexer().isNot(AsmToken::Comma)) 3434 return MatchOperand_ParseFail; 3435 Parser.Lex(); 3436 } 3437 3438 if (getLexer().isNot(AsmToken::Integer)) 3439 return MatchOperand_ParseFail; 3440 3441 int64_t Op; 3442 if (getParser().parseAbsoluteExpression(Op)) 3443 return MatchOperand_ParseFail; 3444 3445 if (Op != 0 && Op != 1) 3446 return MatchOperand_ParseFail; 3447 Val |= (Op << I); 3448 } 3449 3450 Parser.Lex(); 3451 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 3452 return MatchOperand_Success; 3453 } 3454 3455 OperandMatchResultTy 3456 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 3457 AMDGPUOperand::ImmTy ImmTy) { 3458 int64_t Bit = 0; 3459 SMLoc S = Parser.getTok().getLoc(); 3460 3461 // We are at the end of the statement, and this is a default argument, so 3462 // use a default value. 3463 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3464 switch(getLexer().getKind()) { 3465 case AsmToken::Identifier: { 3466 StringRef Tok = Parser.getTok().getString(); 3467 if (Tok == Name) { 3468 Bit = 1; 3469 Parser.Lex(); 3470 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 3471 Bit = 0; 3472 Parser.Lex(); 3473 } else { 3474 return MatchOperand_NoMatch; 3475 } 3476 break; 3477 } 3478 default: 3479 return MatchOperand_NoMatch; 3480 } 3481 } 3482 3483 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 3484 return MatchOperand_Success; 3485 } 3486 3487 static void addOptionalImmOperand( 3488 MCInst& Inst, const OperandVector& Operands, 3489 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 3490 AMDGPUOperand::ImmTy ImmT, 3491 int64_t Default = 0) { 3492 auto i = OptionalIdx.find(ImmT); 3493 if (i != OptionalIdx.end()) { 3494 unsigned Idx = i->second; 3495 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 3496 } else { 3497 Inst.addOperand(MCOperand::createImm(Default)); 3498 } 3499 } 3500 3501 OperandMatchResultTy 3502 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 3503 if (getLexer().isNot(AsmToken::Identifier)) { 3504 return MatchOperand_NoMatch; 3505 } 3506 StringRef Tok = Parser.getTok().getString(); 3507 if (Tok != Prefix) { 3508 return MatchOperand_NoMatch; 3509 } 3510 3511 Parser.Lex(); 3512 if (getLexer().isNot(AsmToken::Colon)) { 3513 return MatchOperand_ParseFail; 3514 } 3515 3516 Parser.Lex(); 3517 if (getLexer().isNot(AsmToken::Identifier)) { 3518 return MatchOperand_ParseFail; 3519 } 3520 3521 Value = Parser.getTok().getString(); 3522 return MatchOperand_Success; 3523 } 3524 3525 //===----------------------------------------------------------------------===// 3526 // ds 3527 //===----------------------------------------------------------------------===// 3528 3529 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 3530 const OperandVector &Operands) { 3531 OptionalImmIndexMap OptionalIdx; 3532 3533 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3534 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3535 3536 // Add the register arguments 3537 if (Op.isReg()) { 3538 Op.addRegOperands(Inst, 1); 3539 continue; 3540 } 3541 3542 // Handle optional arguments 3543 OptionalIdx[Op.getImmTy()] = i; 3544 } 3545 3546 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 3547 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 3548 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3549 3550 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3551 } 3552 3553 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 3554 bool IsGdsHardcoded) { 3555 OptionalImmIndexMap OptionalIdx; 3556 3557 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3558 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3559 3560 // Add the register arguments 3561 if (Op.isReg()) { 3562 Op.addRegOperands(Inst, 1); 3563 continue; 3564 } 3565 3566 if (Op.isToken() && Op.getToken() == "gds") { 3567 IsGdsHardcoded = true; 3568 continue; 3569 } 3570 3571 // Handle optional arguments 3572 OptionalIdx[Op.getImmTy()] = i; 3573 } 3574 3575 AMDGPUOperand::ImmTy OffsetType = 3576 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si || 3577 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 3578 AMDGPUOperand::ImmTyOffset; 3579 3580 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 3581 3582 if (!IsGdsHardcoded) { 3583 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3584 } 3585 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3586 } 3587 3588 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 3589 OptionalImmIndexMap OptionalIdx; 3590 3591 unsigned OperandIdx[4]; 3592 unsigned EnMask = 0; 3593 int SrcIdx = 0; 3594 3595 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3596 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3597 3598 // Add the register arguments 3599 if (Op.isReg()) { 3600 assert(SrcIdx < 4); 3601 OperandIdx[SrcIdx] = Inst.size(); 3602 Op.addRegOperands(Inst, 1); 3603 ++SrcIdx; 3604 continue; 3605 } 3606 3607 if (Op.isOff()) { 3608 assert(SrcIdx < 4); 3609 OperandIdx[SrcIdx] = Inst.size(); 3610 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 3611 ++SrcIdx; 3612 continue; 3613 } 3614 3615 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 3616 Op.addImmOperands(Inst, 1); 3617 continue; 3618 } 3619 3620 if (Op.isToken() && Op.getToken() == "done") 3621 continue; 3622 3623 // Handle optional arguments 3624 OptionalIdx[Op.getImmTy()] = i; 3625 } 3626 3627 assert(SrcIdx == 4); 3628 3629 bool Compr = false; 3630 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 3631 Compr = true; 3632 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 3633 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 3634 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 3635 } 3636 3637 for (auto i = 0; i < SrcIdx; ++i) { 3638 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 3639 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 3640 } 3641 } 3642 3643 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 3644 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 3645 3646 Inst.addOperand(MCOperand::createImm(EnMask)); 3647 } 3648 3649 //===----------------------------------------------------------------------===// 3650 // s_waitcnt 3651 //===----------------------------------------------------------------------===// 3652 3653 static bool 3654 encodeCnt( 3655 const AMDGPU::IsaInfo::IsaVersion ISA, 3656 int64_t &IntVal, 3657 int64_t CntVal, 3658 bool Saturate, 3659 unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned), 3660 unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned)) 3661 { 3662 bool Failed = false; 3663 3664 IntVal = encode(ISA, IntVal, CntVal); 3665 if (CntVal != decode(ISA, IntVal)) { 3666 if (Saturate) { 3667 IntVal = encode(ISA, IntVal, -1); 3668 } else { 3669 Failed = true; 3670 } 3671 } 3672 return Failed; 3673 } 3674 3675 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 3676 StringRef CntName = Parser.getTok().getString(); 3677 int64_t CntVal; 3678 3679 Parser.Lex(); 3680 if (getLexer().isNot(AsmToken::LParen)) 3681 return true; 3682 3683 Parser.Lex(); 3684 if (getLexer().isNot(AsmToken::Integer)) 3685 return true; 3686 3687 SMLoc ValLoc = Parser.getTok().getLoc(); 3688 if (getParser().parseAbsoluteExpression(CntVal)) 3689 return true; 3690 3691 AMDGPU::IsaInfo::IsaVersion ISA = 3692 AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); 3693 3694 bool Failed = true; 3695 bool Sat = CntName.endswith("_sat"); 3696 3697 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 3698 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 3699 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 3700 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 3701 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 3702 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 3703 } 3704 3705 if (Failed) { 3706 Error(ValLoc, "too large value for " + CntName); 3707 return true; 3708 } 3709 3710 if (getLexer().isNot(AsmToken::RParen)) { 3711 return true; 3712 } 3713 3714 Parser.Lex(); 3715 if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { 3716 const AsmToken NextToken = getLexer().peekTok(); 3717 if (NextToken.is(AsmToken::Identifier)) { 3718 Parser.Lex(); 3719 } 3720 } 3721 3722 return false; 3723 } 3724 3725 OperandMatchResultTy 3726 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 3727 AMDGPU::IsaInfo::IsaVersion ISA = 3728 AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); 3729 int64_t Waitcnt = getWaitcntBitMask(ISA); 3730 SMLoc S = Parser.getTok().getLoc(); 3731 3732 switch(getLexer().getKind()) { 3733 default: return MatchOperand_ParseFail; 3734 case AsmToken::Integer: 3735 // The operand can be an integer value. 3736 if (getParser().parseAbsoluteExpression(Waitcnt)) 3737 return MatchOperand_ParseFail; 3738 break; 3739 3740 case AsmToken::Identifier: 3741 do { 3742 if (parseCnt(Waitcnt)) 3743 return MatchOperand_ParseFail; 3744 } while(getLexer().isNot(AsmToken::EndOfStatement)); 3745 break; 3746 } 3747 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 3748 return MatchOperand_Success; 3749 } 3750 3751 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, 3752 int64_t &Width) { 3753 using namespace llvm::AMDGPU::Hwreg; 3754 3755 if (Parser.getTok().getString() != "hwreg") 3756 return true; 3757 Parser.Lex(); 3758 3759 if (getLexer().isNot(AsmToken::LParen)) 3760 return true; 3761 Parser.Lex(); 3762 3763 if (getLexer().is(AsmToken::Identifier)) { 3764 HwReg.IsSymbolic = true; 3765 HwReg.Id = ID_UNKNOWN_; 3766 const StringRef tok = Parser.getTok().getString(); 3767 int Last = ID_SYMBOLIC_LAST_; 3768 if (isSI() || isCI() || isVI()) 3769 Last = ID_SYMBOLIC_FIRST_GFX9_; 3770 for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) { 3771 if (tok == IdSymbolic[i]) { 3772 HwReg.Id = i; 3773 break; 3774 } 3775 } 3776 Parser.Lex(); 3777 } else { 3778 HwReg.IsSymbolic = false; 3779 if (getLexer().isNot(AsmToken::Integer)) 3780 return true; 3781 if (getParser().parseAbsoluteExpression(HwReg.Id)) 3782 return true; 3783 } 3784 3785 if (getLexer().is(AsmToken::RParen)) { 3786 Parser.Lex(); 3787 return false; 3788 } 3789 3790 // optional params 3791 if (getLexer().isNot(AsmToken::Comma)) 3792 return true; 3793 Parser.Lex(); 3794 3795 if (getLexer().isNot(AsmToken::Integer)) 3796 return true; 3797 if (getParser().parseAbsoluteExpression(Offset)) 3798 return true; 3799 3800 if (getLexer().isNot(AsmToken::Comma)) 3801 return true; 3802 Parser.Lex(); 3803 3804 if (getLexer().isNot(AsmToken::Integer)) 3805 return true; 3806 if (getParser().parseAbsoluteExpression(Width)) 3807 return true; 3808 3809 if (getLexer().isNot(AsmToken::RParen)) 3810 return true; 3811 Parser.Lex(); 3812 3813 return false; 3814 } 3815 3816 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 3817 using namespace llvm::AMDGPU::Hwreg; 3818 3819 int64_t Imm16Val = 0; 3820 SMLoc S = Parser.getTok().getLoc(); 3821 3822 switch(getLexer().getKind()) { 3823 default: return MatchOperand_NoMatch; 3824 case AsmToken::Integer: 3825 // The operand can be an integer value. 3826 if (getParser().parseAbsoluteExpression(Imm16Val)) 3827 return MatchOperand_NoMatch; 3828 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 3829 Error(S, "invalid immediate: only 16-bit values are legal"); 3830 // Do not return error code, but create an imm operand anyway and proceed 3831 // to the next operand, if any. That avoids unneccessary error messages. 3832 } 3833 break; 3834 3835 case AsmToken::Identifier: { 3836 OperandInfoTy HwReg(ID_UNKNOWN_); 3837 int64_t Offset = OFFSET_DEFAULT_; 3838 int64_t Width = WIDTH_M1_DEFAULT_ + 1; 3839 if (parseHwregConstruct(HwReg, Offset, Width)) 3840 return MatchOperand_ParseFail; 3841 if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) { 3842 if (HwReg.IsSymbolic) 3843 Error(S, "invalid symbolic name of hardware register"); 3844 else 3845 Error(S, "invalid code of hardware register: only 6-bit values are legal"); 3846 } 3847 if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset)) 3848 Error(S, "invalid bit offset: only 5-bit values are legal"); 3849 if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1)) 3850 Error(S, "invalid bitfield width: only values from 1 to 32 are legal"); 3851 Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_); 3852 } 3853 break; 3854 } 3855 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg)); 3856 return MatchOperand_Success; 3857 } 3858 3859 bool AMDGPUOperand::isSWaitCnt() const { 3860 return isImm(); 3861 } 3862 3863 bool AMDGPUOperand::isHwreg() const { 3864 return isImmTy(ImmTyHwreg); 3865 } 3866 3867 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) { 3868 using namespace llvm::AMDGPU::SendMsg; 3869 3870 if (Parser.getTok().getString() != "sendmsg") 3871 return true; 3872 Parser.Lex(); 3873 3874 if (getLexer().isNot(AsmToken::LParen)) 3875 return true; 3876 Parser.Lex(); 3877 3878 if (getLexer().is(AsmToken::Identifier)) { 3879 Msg.IsSymbolic = true; 3880 Msg.Id = ID_UNKNOWN_; 3881 const std::string tok = Parser.getTok().getString(); 3882 for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { 3883 switch(i) { 3884 default: continue; // Omit gaps. 3885 case ID_INTERRUPT: case ID_GS: case ID_GS_DONE: case ID_SYSMSG: break; 3886 } 3887 if (tok == IdSymbolic[i]) { 3888 Msg.Id = i; 3889 break; 3890 } 3891 } 3892 Parser.Lex(); 3893 } else { 3894 Msg.IsSymbolic = false; 3895 if (getLexer().isNot(AsmToken::Integer)) 3896 return true; 3897 if (getParser().parseAbsoluteExpression(Msg.Id)) 3898 return true; 3899 if (getLexer().is(AsmToken::Integer)) 3900 if (getParser().parseAbsoluteExpression(Msg.Id)) 3901 Msg.Id = ID_UNKNOWN_; 3902 } 3903 if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest. 3904 return false; 3905 3906 if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) { 3907 if (getLexer().isNot(AsmToken::RParen)) 3908 return true; 3909 Parser.Lex(); 3910 return false; 3911 } 3912 3913 if (getLexer().isNot(AsmToken::Comma)) 3914 return true; 3915 Parser.Lex(); 3916 3917 assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG); 3918 Operation.Id = ID_UNKNOWN_; 3919 if (getLexer().is(AsmToken::Identifier)) { 3920 Operation.IsSymbolic = true; 3921 const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 3922 const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 3923 const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 3924 const StringRef Tok = Parser.getTok().getString(); 3925 for (int i = F; i < L; ++i) { 3926 if (Tok == S[i]) { 3927 Operation.Id = i; 3928 break; 3929 } 3930 } 3931 Parser.Lex(); 3932 } else { 3933 Operation.IsSymbolic = false; 3934 if (getLexer().isNot(AsmToken::Integer)) 3935 return true; 3936 if (getParser().parseAbsoluteExpression(Operation.Id)) 3937 return true; 3938 } 3939 3940 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 3941 // Stream id is optional. 3942 if (getLexer().is(AsmToken::RParen)) { 3943 Parser.Lex(); 3944 return false; 3945 } 3946 3947 if (getLexer().isNot(AsmToken::Comma)) 3948 return true; 3949 Parser.Lex(); 3950 3951 if (getLexer().isNot(AsmToken::Integer)) 3952 return true; 3953 if (getParser().parseAbsoluteExpression(StreamId)) 3954 return true; 3955 } 3956 3957 if (getLexer().isNot(AsmToken::RParen)) 3958 return true; 3959 Parser.Lex(); 3960 return false; 3961 } 3962 3963 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 3964 if (getLexer().getKind() != AsmToken::Identifier) 3965 return MatchOperand_NoMatch; 3966 3967 StringRef Str = Parser.getTok().getString(); 3968 int Slot = StringSwitch<int>(Str) 3969 .Case("p10", 0) 3970 .Case("p20", 1) 3971 .Case("p0", 2) 3972 .Default(-1); 3973 3974 SMLoc S = Parser.getTok().getLoc(); 3975 if (Slot == -1) 3976 return MatchOperand_ParseFail; 3977 3978 Parser.Lex(); 3979 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 3980 AMDGPUOperand::ImmTyInterpSlot)); 3981 return MatchOperand_Success; 3982 } 3983 3984 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 3985 if (getLexer().getKind() != AsmToken::Identifier) 3986 return MatchOperand_NoMatch; 3987 3988 StringRef Str = Parser.getTok().getString(); 3989 if (!Str.startswith("attr")) 3990 return MatchOperand_NoMatch; 3991 3992 StringRef Chan = Str.take_back(2); 3993 int AttrChan = StringSwitch<int>(Chan) 3994 .Case(".x", 0) 3995 .Case(".y", 1) 3996 .Case(".z", 2) 3997 .Case(".w", 3) 3998 .Default(-1); 3999 if (AttrChan == -1) 4000 return MatchOperand_ParseFail; 4001 4002 Str = Str.drop_back(2).drop_front(4); 4003 4004 uint8_t Attr; 4005 if (Str.getAsInteger(10, Attr)) 4006 return MatchOperand_ParseFail; 4007 4008 SMLoc S = Parser.getTok().getLoc(); 4009 Parser.Lex(); 4010 if (Attr > 63) { 4011 Error(S, "out of bounds attr"); 4012 return MatchOperand_Success; 4013 } 4014 4015 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 4016 4017 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 4018 AMDGPUOperand::ImmTyInterpAttr)); 4019 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 4020 AMDGPUOperand::ImmTyAttrChan)); 4021 return MatchOperand_Success; 4022 } 4023 4024 void AMDGPUAsmParser::errorExpTgt() { 4025 Error(Parser.getTok().getLoc(), "invalid exp target"); 4026 } 4027 4028 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 4029 uint8_t &Val) { 4030 if (Str == "null") { 4031 Val = 9; 4032 return MatchOperand_Success; 4033 } 4034 4035 if (Str.startswith("mrt")) { 4036 Str = Str.drop_front(3); 4037 if (Str == "z") { // == mrtz 4038 Val = 8; 4039 return MatchOperand_Success; 4040 } 4041 4042 if (Str.getAsInteger(10, Val)) 4043 return MatchOperand_ParseFail; 4044 4045 if (Val > 7) 4046 errorExpTgt(); 4047 4048 return MatchOperand_Success; 4049 } 4050 4051 if (Str.startswith("pos")) { 4052 Str = Str.drop_front(3); 4053 if (Str.getAsInteger(10, Val)) 4054 return MatchOperand_ParseFail; 4055 4056 if (Val > 3) 4057 errorExpTgt(); 4058 4059 Val += 12; 4060 return MatchOperand_Success; 4061 } 4062 4063 if (Str.startswith("param")) { 4064 Str = Str.drop_front(5); 4065 if (Str.getAsInteger(10, Val)) 4066 return MatchOperand_ParseFail; 4067 4068 if (Val >= 32) 4069 errorExpTgt(); 4070 4071 Val += 32; 4072 return MatchOperand_Success; 4073 } 4074 4075 if (Str.startswith("invalid_target_")) { 4076 Str = Str.drop_front(15); 4077 if (Str.getAsInteger(10, Val)) 4078 return MatchOperand_ParseFail; 4079 4080 errorExpTgt(); 4081 return MatchOperand_Success; 4082 } 4083 4084 return MatchOperand_NoMatch; 4085 } 4086 4087 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 4088 uint8_t Val; 4089 StringRef Str = Parser.getTok().getString(); 4090 4091 auto Res = parseExpTgtImpl(Str, Val); 4092 if (Res != MatchOperand_Success) 4093 return Res; 4094 4095 SMLoc S = Parser.getTok().getLoc(); 4096 Parser.Lex(); 4097 4098 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 4099 AMDGPUOperand::ImmTyExpTgt)); 4100 return MatchOperand_Success; 4101 } 4102 4103 OperandMatchResultTy 4104 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 4105 using namespace llvm::AMDGPU::SendMsg; 4106 4107 int64_t Imm16Val = 0; 4108 SMLoc S = Parser.getTok().getLoc(); 4109 4110 switch(getLexer().getKind()) { 4111 default: 4112 return MatchOperand_NoMatch; 4113 case AsmToken::Integer: 4114 // The operand can be an integer value. 4115 if (getParser().parseAbsoluteExpression(Imm16Val)) 4116 return MatchOperand_NoMatch; 4117 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4118 Error(S, "invalid immediate: only 16-bit values are legal"); 4119 // Do not return error code, but create an imm operand anyway and proceed 4120 // to the next operand, if any. That avoids unneccessary error messages. 4121 } 4122 break; 4123 case AsmToken::Identifier: { 4124 OperandInfoTy Msg(ID_UNKNOWN_); 4125 OperandInfoTy Operation(OP_UNKNOWN_); 4126 int64_t StreamId = STREAM_ID_DEFAULT_; 4127 if (parseSendMsgConstruct(Msg, Operation, StreamId)) 4128 return MatchOperand_ParseFail; 4129 do { 4130 // Validate and encode message ID. 4131 if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE) 4132 || Msg.Id == ID_SYSMSG)) { 4133 if (Msg.IsSymbolic) 4134 Error(S, "invalid/unsupported symbolic name of message"); 4135 else 4136 Error(S, "invalid/unsupported code of message"); 4137 break; 4138 } 4139 Imm16Val = (Msg.Id << ID_SHIFT_); 4140 // Validate and encode operation ID. 4141 if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) { 4142 if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) { 4143 if (Operation.IsSymbolic) 4144 Error(S, "invalid symbolic name of GS_OP"); 4145 else 4146 Error(S, "invalid code of GS_OP: only 2-bit values are legal"); 4147 break; 4148 } 4149 if (Operation.Id == OP_GS_NOP 4150 && Msg.Id != ID_GS_DONE) { 4151 Error(S, "invalid GS_OP: NOP is for GS_DONE only"); 4152 break; 4153 } 4154 Imm16Val |= (Operation.Id << OP_SHIFT_); 4155 } 4156 if (Msg.Id == ID_SYSMSG) { 4157 if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) { 4158 if (Operation.IsSymbolic) 4159 Error(S, "invalid/unsupported symbolic name of SYSMSG_OP"); 4160 else 4161 Error(S, "invalid/unsupported code of SYSMSG_OP"); 4162 break; 4163 } 4164 Imm16Val |= (Operation.Id << OP_SHIFT_); 4165 } 4166 // Validate and encode stream ID. 4167 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4168 if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) { 4169 Error(S, "invalid stream id: only 2-bit values are legal"); 4170 break; 4171 } 4172 Imm16Val |= (StreamId << STREAM_ID_SHIFT_); 4173 } 4174 } while (false); 4175 } 4176 break; 4177 } 4178 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg)); 4179 return MatchOperand_Success; 4180 } 4181 4182 bool AMDGPUOperand::isSendMsg() const { 4183 return isImmTy(ImmTySendMsg); 4184 } 4185 4186 //===----------------------------------------------------------------------===// 4187 // parser helpers 4188 //===----------------------------------------------------------------------===// 4189 4190 bool 4191 AMDGPUAsmParser::trySkipId(const StringRef Id) { 4192 if (getLexer().getKind() == AsmToken::Identifier && 4193 Parser.getTok().getString() == Id) { 4194 Parser.Lex(); 4195 return true; 4196 } 4197 return false; 4198 } 4199 4200 bool 4201 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 4202 if (getLexer().getKind() == Kind) { 4203 Parser.Lex(); 4204 return true; 4205 } 4206 return false; 4207 } 4208 4209 bool 4210 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 4211 const StringRef ErrMsg) { 4212 if (!trySkipToken(Kind)) { 4213 Error(Parser.getTok().getLoc(), ErrMsg); 4214 return false; 4215 } 4216 return true; 4217 } 4218 4219 bool 4220 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 4221 return !getParser().parseAbsoluteExpression(Imm); 4222 } 4223 4224 bool 4225 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 4226 SMLoc S = Parser.getTok().getLoc(); 4227 if (getLexer().getKind() == AsmToken::String) { 4228 Val = Parser.getTok().getStringContents(); 4229 Parser.Lex(); 4230 return true; 4231 } else { 4232 Error(S, ErrMsg); 4233 return false; 4234 } 4235 } 4236 4237 //===----------------------------------------------------------------------===// 4238 // swizzle 4239 //===----------------------------------------------------------------------===// 4240 4241 LLVM_READNONE 4242 static unsigned 4243 encodeBitmaskPerm(const unsigned AndMask, 4244 const unsigned OrMask, 4245 const unsigned XorMask) { 4246 using namespace llvm::AMDGPU::Swizzle; 4247 4248 return BITMASK_PERM_ENC | 4249 (AndMask << BITMASK_AND_SHIFT) | 4250 (OrMask << BITMASK_OR_SHIFT) | 4251 (XorMask << BITMASK_XOR_SHIFT); 4252 } 4253 4254 bool 4255 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 4256 const unsigned MinVal, 4257 const unsigned MaxVal, 4258 const StringRef ErrMsg) { 4259 for (unsigned i = 0; i < OpNum; ++i) { 4260 if (!skipToken(AsmToken::Comma, "expected a comma")){ 4261 return false; 4262 } 4263 SMLoc ExprLoc = Parser.getTok().getLoc(); 4264 if (!parseExpr(Op[i])) { 4265 return false; 4266 } 4267 if (Op[i] < MinVal || Op[i] > MaxVal) { 4268 Error(ExprLoc, ErrMsg); 4269 return false; 4270 } 4271 } 4272 4273 return true; 4274 } 4275 4276 bool 4277 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 4278 using namespace llvm::AMDGPU::Swizzle; 4279 4280 int64_t Lane[LANE_NUM]; 4281 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 4282 "expected a 2-bit lane id")) { 4283 Imm = QUAD_PERM_ENC; 4284 for (auto i = 0; i < LANE_NUM; ++i) { 4285 Imm |= Lane[i] << (LANE_SHIFT * i); 4286 } 4287 return true; 4288 } 4289 return false; 4290 } 4291 4292 bool 4293 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 4294 using namespace llvm::AMDGPU::Swizzle; 4295 4296 SMLoc S = Parser.getTok().getLoc(); 4297 int64_t GroupSize; 4298 int64_t LaneIdx; 4299 4300 if (!parseSwizzleOperands(1, &GroupSize, 4301 2, 32, 4302 "group size must be in the interval [2,32]")) { 4303 return false; 4304 } 4305 if (!isPowerOf2_64(GroupSize)) { 4306 Error(S, "group size must be a power of two"); 4307 return false; 4308 } 4309 if (parseSwizzleOperands(1, &LaneIdx, 4310 0, GroupSize - 1, 4311 "lane id must be in the interval [0,group size - 1]")) { 4312 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 4313 return true; 4314 } 4315 return false; 4316 } 4317 4318 bool 4319 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 4320 using namespace llvm::AMDGPU::Swizzle; 4321 4322 SMLoc S = Parser.getTok().getLoc(); 4323 int64_t GroupSize; 4324 4325 if (!parseSwizzleOperands(1, &GroupSize, 4326 2, 32, "group size must be in the interval [2,32]")) { 4327 return false; 4328 } 4329 if (!isPowerOf2_64(GroupSize)) { 4330 Error(S, "group size must be a power of two"); 4331 return false; 4332 } 4333 4334 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 4335 return true; 4336 } 4337 4338 bool 4339 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 4340 using namespace llvm::AMDGPU::Swizzle; 4341 4342 SMLoc S = Parser.getTok().getLoc(); 4343 int64_t GroupSize; 4344 4345 if (!parseSwizzleOperands(1, &GroupSize, 4346 1, 16, "group size must be in the interval [1,16]")) { 4347 return false; 4348 } 4349 if (!isPowerOf2_64(GroupSize)) { 4350 Error(S, "group size must be a power of two"); 4351 return false; 4352 } 4353 4354 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 4355 return true; 4356 } 4357 4358 bool 4359 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 4360 using namespace llvm::AMDGPU::Swizzle; 4361 4362 if (!skipToken(AsmToken::Comma, "expected a comma")) { 4363 return false; 4364 } 4365 4366 StringRef Ctl; 4367 SMLoc StrLoc = Parser.getTok().getLoc(); 4368 if (!parseString(Ctl)) { 4369 return false; 4370 } 4371 if (Ctl.size() != BITMASK_WIDTH) { 4372 Error(StrLoc, "expected a 5-character mask"); 4373 return false; 4374 } 4375 4376 unsigned AndMask = 0; 4377 unsigned OrMask = 0; 4378 unsigned XorMask = 0; 4379 4380 for (size_t i = 0; i < Ctl.size(); ++i) { 4381 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 4382 switch(Ctl[i]) { 4383 default: 4384 Error(StrLoc, "invalid mask"); 4385 return false; 4386 case '0': 4387 break; 4388 case '1': 4389 OrMask |= Mask; 4390 break; 4391 case 'p': 4392 AndMask |= Mask; 4393 break; 4394 case 'i': 4395 AndMask |= Mask; 4396 XorMask |= Mask; 4397 break; 4398 } 4399 } 4400 4401 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 4402 return true; 4403 } 4404 4405 bool 4406 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 4407 4408 SMLoc OffsetLoc = Parser.getTok().getLoc(); 4409 4410 if (!parseExpr(Imm)) { 4411 return false; 4412 } 4413 if (!isUInt<16>(Imm)) { 4414 Error(OffsetLoc, "expected a 16-bit offset"); 4415 return false; 4416 } 4417 return true; 4418 } 4419 4420 bool 4421 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 4422 using namespace llvm::AMDGPU::Swizzle; 4423 4424 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 4425 4426 SMLoc ModeLoc = Parser.getTok().getLoc(); 4427 bool Ok = false; 4428 4429 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 4430 Ok = parseSwizzleQuadPerm(Imm); 4431 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 4432 Ok = parseSwizzleBitmaskPerm(Imm); 4433 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 4434 Ok = parseSwizzleBroadcast(Imm); 4435 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 4436 Ok = parseSwizzleSwap(Imm); 4437 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 4438 Ok = parseSwizzleReverse(Imm); 4439 } else { 4440 Error(ModeLoc, "expected a swizzle mode"); 4441 } 4442 4443 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 4444 } 4445 4446 return false; 4447 } 4448 4449 OperandMatchResultTy 4450 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 4451 SMLoc S = Parser.getTok().getLoc(); 4452 int64_t Imm = 0; 4453 4454 if (trySkipId("offset")) { 4455 4456 bool Ok = false; 4457 if (skipToken(AsmToken::Colon, "expected a colon")) { 4458 if (trySkipId("swizzle")) { 4459 Ok = parseSwizzleMacro(Imm); 4460 } else { 4461 Ok = parseSwizzleOffset(Imm); 4462 } 4463 } 4464 4465 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 4466 4467 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 4468 } else { 4469 // Swizzle "offset" operand is optional. 4470 // If it is omitted, try parsing other optional operands. 4471 return parseOptionalOpr(Operands); 4472 } 4473 } 4474 4475 bool 4476 AMDGPUOperand::isSwizzle() const { 4477 return isImmTy(ImmTySwizzle); 4478 } 4479 4480 //===----------------------------------------------------------------------===// 4481 // sopp branch targets 4482 //===----------------------------------------------------------------------===// 4483 4484 OperandMatchResultTy 4485 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 4486 SMLoc S = Parser.getTok().getLoc(); 4487 4488 switch (getLexer().getKind()) { 4489 default: return MatchOperand_ParseFail; 4490 case AsmToken::Integer: { 4491 int64_t Imm; 4492 if (getParser().parseAbsoluteExpression(Imm)) 4493 return MatchOperand_ParseFail; 4494 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); 4495 return MatchOperand_Success; 4496 } 4497 4498 case AsmToken::Identifier: 4499 Operands.push_back(AMDGPUOperand::CreateExpr(this, 4500 MCSymbolRefExpr::create(getContext().getOrCreateSymbol( 4501 Parser.getTok().getString()), getContext()), S)); 4502 Parser.Lex(); 4503 return MatchOperand_Success; 4504 } 4505 } 4506 4507 //===----------------------------------------------------------------------===// 4508 // mubuf 4509 //===----------------------------------------------------------------------===// 4510 4511 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 4512 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 4513 } 4514 4515 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 4516 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 4517 } 4518 4519 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 4520 const OperandVector &Operands, 4521 bool IsAtomic, 4522 bool IsAtomicReturn, 4523 bool IsLds) { 4524 bool IsLdsOpcode = IsLds; 4525 bool HasLdsModifier = false; 4526 OptionalImmIndexMap OptionalIdx; 4527 assert(IsAtomicReturn ? IsAtomic : true); 4528 4529 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4530 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4531 4532 // Add the register arguments 4533 if (Op.isReg()) { 4534 Op.addRegOperands(Inst, 1); 4535 continue; 4536 } 4537 4538 // Handle the case where soffset is an immediate 4539 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 4540 Op.addImmOperands(Inst, 1); 4541 continue; 4542 } 4543 4544 HasLdsModifier = Op.isLDS(); 4545 4546 // Handle tokens like 'offen' which are sometimes hard-coded into the 4547 // asm string. There are no MCInst operands for these. 4548 if (Op.isToken()) { 4549 continue; 4550 } 4551 assert(Op.isImm()); 4552 4553 // Handle optional arguments 4554 OptionalIdx[Op.getImmTy()] = i; 4555 } 4556 4557 // This is a workaround for an llvm quirk which may result in an 4558 // incorrect instruction selection. Lds and non-lds versions of 4559 // MUBUF instructions are identical except that lds versions 4560 // have mandatory 'lds' modifier. However this modifier follows 4561 // optional modifiers and llvm asm matcher regards this 'lds' 4562 // modifier as an optional one. As a result, an lds version 4563 // of opcode may be selected even if it has no 'lds' modifier. 4564 if (IsLdsOpcode && !HasLdsModifier) { 4565 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 4566 if (NoLdsOpcode != -1) { // Got lds version - correct it. 4567 Inst.setOpcode(NoLdsOpcode); 4568 IsLdsOpcode = false; 4569 } 4570 } 4571 4572 // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns. 4573 if (IsAtomicReturn) { 4574 MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning. 4575 Inst.insert(I, *I); 4576 } 4577 4578 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 4579 if (!IsAtomic) { // glc is hard-coded. 4580 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4581 } 4582 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4583 4584 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 4585 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4586 } 4587 } 4588 4589 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 4590 OptionalImmIndexMap OptionalIdx; 4591 4592 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4593 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4594 4595 // Add the register arguments 4596 if (Op.isReg()) { 4597 Op.addRegOperands(Inst, 1); 4598 continue; 4599 } 4600 4601 // Handle the case where soffset is an immediate 4602 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 4603 Op.addImmOperands(Inst, 1); 4604 continue; 4605 } 4606 4607 // Handle tokens like 'offen' which are sometimes hard-coded into the 4608 // asm string. There are no MCInst operands for these. 4609 if (Op.isToken()) { 4610 continue; 4611 } 4612 assert(Op.isImm()); 4613 4614 // Handle optional arguments 4615 OptionalIdx[Op.getImmTy()] = i; 4616 } 4617 4618 addOptionalImmOperand(Inst, Operands, OptionalIdx, 4619 AMDGPUOperand::ImmTyOffset); 4620 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDFMT); 4621 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyNFMT); 4622 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4623 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4624 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4625 } 4626 4627 //===----------------------------------------------------------------------===// 4628 // mimg 4629 //===----------------------------------------------------------------------===// 4630 4631 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 4632 bool IsAtomic) { 4633 unsigned I = 1; 4634 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 4635 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 4636 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 4637 } 4638 4639 if (IsAtomic) { 4640 // Add src, same as dst 4641 assert(Desc.getNumDefs() == 1); 4642 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 4643 } 4644 4645 OptionalImmIndexMap OptionalIdx; 4646 4647 for (unsigned E = Operands.size(); I != E; ++I) { 4648 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 4649 4650 // Add the register arguments 4651 if (Op.isReg()) { 4652 Op.addRegOperands(Inst, 1); 4653 } else if (Op.isImmModifier()) { 4654 OptionalIdx[Op.getImmTy()] = I; 4655 } else { 4656 llvm_unreachable("unexpected operand type"); 4657 } 4658 } 4659 4660 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 4661 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 4662 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4663 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4664 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128); 4665 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4666 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 4667 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 4668 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 4669 } 4670 4671 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 4672 cvtMIMG(Inst, Operands, true); 4673 } 4674 4675 //===----------------------------------------------------------------------===// 4676 // smrd 4677 //===----------------------------------------------------------------------===// 4678 4679 bool AMDGPUOperand::isSMRDOffset8() const { 4680 return isImm() && isUInt<8>(getImm()); 4681 } 4682 4683 bool AMDGPUOperand::isSMRDOffset20() const { 4684 return isImm() && isUInt<20>(getImm()); 4685 } 4686 4687 bool AMDGPUOperand::isSMRDLiteralOffset() const { 4688 // 32-bit literals are only supported on CI and we only want to use them 4689 // when the offset is > 8-bits. 4690 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 4691 } 4692 4693 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 4694 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4695 } 4696 4697 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 4698 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4699 } 4700 4701 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 4702 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4703 } 4704 4705 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { 4706 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4707 } 4708 4709 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const { 4710 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4711 } 4712 4713 //===----------------------------------------------------------------------===// 4714 // vop3 4715 //===----------------------------------------------------------------------===// 4716 4717 static bool ConvertOmodMul(int64_t &Mul) { 4718 if (Mul != 1 && Mul != 2 && Mul != 4) 4719 return false; 4720 4721 Mul >>= 1; 4722 return true; 4723 } 4724 4725 static bool ConvertOmodDiv(int64_t &Div) { 4726 if (Div == 1) { 4727 Div = 0; 4728 return true; 4729 } 4730 4731 if (Div == 2) { 4732 Div = 3; 4733 return true; 4734 } 4735 4736 return false; 4737 } 4738 4739 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 4740 if (BoundCtrl == 0) { 4741 BoundCtrl = 1; 4742 return true; 4743 } 4744 4745 if (BoundCtrl == -1) { 4746 BoundCtrl = 0; 4747 return true; 4748 } 4749 4750 return false; 4751 } 4752 4753 // Note: the order in this table matches the order of operands in AsmString. 4754 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 4755 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 4756 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 4757 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 4758 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 4759 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 4760 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 4761 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 4762 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 4763 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 4764 {"dfmt", AMDGPUOperand::ImmTyDFMT, false, nullptr}, 4765 {"nfmt", AMDGPUOperand::ImmTyNFMT, false, nullptr}, 4766 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 4767 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 4768 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 4769 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 4770 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 4771 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 4772 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 4773 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 4774 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 4775 {"r128", AMDGPUOperand::ImmTyR128, true, nullptr}, 4776 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 4777 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 4778 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 4779 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 4780 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 4781 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 4782 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 4783 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 4784 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 4785 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 4786 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 4787 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 4788 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 4789 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 4790 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 4791 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr} 4792 }; 4793 4794 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 4795 unsigned size = Operands.size(); 4796 assert(size > 0); 4797 4798 OperandMatchResultTy res = parseOptionalOpr(Operands); 4799 4800 // This is a hack to enable hardcoded mandatory operands which follow 4801 // optional operands. 4802 // 4803 // Current design assumes that all operands after the first optional operand 4804 // are also optional. However implementation of some instructions violates 4805 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 4806 // 4807 // To alleviate this problem, we have to (implicitly) parse extra operands 4808 // to make sure autogenerated parser of custom operands never hit hardcoded 4809 // mandatory operands. 4810 4811 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 4812 4813 // We have parsed the first optional operand. 4814 // Parse as many operands as necessary to skip all mandatory operands. 4815 4816 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 4817 if (res != MatchOperand_Success || 4818 getLexer().is(AsmToken::EndOfStatement)) break; 4819 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 4820 res = parseOptionalOpr(Operands); 4821 } 4822 } 4823 4824 return res; 4825 } 4826 4827 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 4828 OperandMatchResultTy res; 4829 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 4830 // try to parse any optional operand here 4831 if (Op.IsBit) { 4832 res = parseNamedBit(Op.Name, Operands, Op.Type); 4833 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 4834 res = parseOModOperand(Operands); 4835 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 4836 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 4837 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 4838 res = parseSDWASel(Operands, Op.Name, Op.Type); 4839 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 4840 res = parseSDWADstUnused(Operands); 4841 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 4842 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 4843 Op.Type == AMDGPUOperand::ImmTyNegLo || 4844 Op.Type == AMDGPUOperand::ImmTyNegHi) { 4845 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 4846 Op.ConvertResult); 4847 } else { 4848 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 4849 } 4850 if (res != MatchOperand_NoMatch) { 4851 return res; 4852 } 4853 } 4854 return MatchOperand_NoMatch; 4855 } 4856 4857 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 4858 StringRef Name = Parser.getTok().getString(); 4859 if (Name == "mul") { 4860 return parseIntWithPrefix("mul", Operands, 4861 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 4862 } 4863 4864 if (Name == "div") { 4865 return parseIntWithPrefix("div", Operands, 4866 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 4867 } 4868 4869 return MatchOperand_NoMatch; 4870 } 4871 4872 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 4873 cvtVOP3P(Inst, Operands); 4874 4875 int Opc = Inst.getOpcode(); 4876 4877 int SrcNum; 4878 const int Ops[] = { AMDGPU::OpName::src0, 4879 AMDGPU::OpName::src1, 4880 AMDGPU::OpName::src2 }; 4881 for (SrcNum = 0; 4882 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 4883 ++SrcNum); 4884 assert(SrcNum > 0); 4885 4886 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4887 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4888 4889 if ((OpSel & (1 << SrcNum)) != 0) { 4890 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 4891 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 4892 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 4893 } 4894 } 4895 4896 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 4897 // 1. This operand is input modifiers 4898 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 4899 // 2. This is not last operand 4900 && Desc.NumOperands > (OpNum + 1) 4901 // 3. Next operand is register class 4902 && Desc.OpInfo[OpNum + 1].RegClass != -1 4903 // 4. Next register is not tied to any other operand 4904 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 4905 } 4906 4907 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 4908 { 4909 OptionalImmIndexMap OptionalIdx; 4910 unsigned Opc = Inst.getOpcode(); 4911 4912 unsigned I = 1; 4913 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 4914 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 4915 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 4916 } 4917 4918 for (unsigned E = Operands.size(); I != E; ++I) { 4919 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 4920 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 4921 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 4922 } else if (Op.isInterpSlot() || 4923 Op.isInterpAttr() || 4924 Op.isAttrChan()) { 4925 Inst.addOperand(MCOperand::createImm(Op.Imm.Val)); 4926 } else if (Op.isImmModifier()) { 4927 OptionalIdx[Op.getImmTy()] = I; 4928 } else { 4929 llvm_unreachable("unhandled operand type"); 4930 } 4931 } 4932 4933 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 4934 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 4935 } 4936 4937 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 4938 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 4939 } 4940 4941 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 4942 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 4943 } 4944 } 4945 4946 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 4947 OptionalImmIndexMap &OptionalIdx) { 4948 unsigned Opc = Inst.getOpcode(); 4949 4950 unsigned I = 1; 4951 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 4952 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 4953 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 4954 } 4955 4956 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 4957 // This instruction has src modifiers 4958 for (unsigned E = Operands.size(); I != E; ++I) { 4959 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 4960 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 4961 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 4962 } else if (Op.isImmModifier()) { 4963 OptionalIdx[Op.getImmTy()] = I; 4964 } else if (Op.isRegOrImm()) { 4965 Op.addRegOrImmOperands(Inst, 1); 4966 } else { 4967 llvm_unreachable("unhandled operand type"); 4968 } 4969 } 4970 } else { 4971 // No src modifiers 4972 for (unsigned E = Operands.size(); I != E; ++I) { 4973 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 4974 if (Op.isMod()) { 4975 OptionalIdx[Op.getImmTy()] = I; 4976 } else { 4977 Op.addRegOrImmOperands(Inst, 1); 4978 } 4979 } 4980 } 4981 4982 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 4983 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 4984 } 4985 4986 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 4987 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 4988 } 4989 4990 // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906): 4991 // it has src2 register operand that is tied to dst operand 4992 // we don't allow modifiers for this operand in assembler so src2_modifiers 4993 // should be 0. 4994 if (Opc == AMDGPU::V_MAC_F32_e64_si || 4995 Opc == AMDGPU::V_MAC_F32_e64_vi || 4996 Opc == AMDGPU::V_MAC_F16_e64_vi || 4997 Opc == AMDGPU::V_FMAC_F32_e64_vi) { 4998 auto it = Inst.begin(); 4999 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 5000 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 5001 ++it; 5002 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5003 } 5004 } 5005 5006 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 5007 OptionalImmIndexMap OptionalIdx; 5008 cvtVOP3(Inst, Operands, OptionalIdx); 5009 } 5010 5011 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 5012 const OperandVector &Operands) { 5013 OptionalImmIndexMap OptIdx; 5014 const int Opc = Inst.getOpcode(); 5015 const MCInstrDesc &Desc = MII.get(Opc); 5016 5017 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 5018 5019 cvtVOP3(Inst, Operands, OptIdx); 5020 5021 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 5022 assert(!IsPacked); 5023 Inst.addOperand(Inst.getOperand(0)); 5024 } 5025 5026 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 5027 // instruction, and then figure out where to actually put the modifiers 5028 5029 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 5030 5031 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 5032 if (OpSelHiIdx != -1) { 5033 int DefaultVal = IsPacked ? -1 : 0; 5034 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 5035 DefaultVal); 5036 } 5037 5038 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 5039 if (NegLoIdx != -1) { 5040 assert(IsPacked); 5041 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 5042 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 5043 } 5044 5045 const int Ops[] = { AMDGPU::OpName::src0, 5046 AMDGPU::OpName::src1, 5047 AMDGPU::OpName::src2 }; 5048 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 5049 AMDGPU::OpName::src1_modifiers, 5050 AMDGPU::OpName::src2_modifiers }; 5051 5052 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5053 5054 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5055 unsigned OpSelHi = 0; 5056 unsigned NegLo = 0; 5057 unsigned NegHi = 0; 5058 5059 if (OpSelHiIdx != -1) { 5060 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 5061 } 5062 5063 if (NegLoIdx != -1) { 5064 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 5065 NegLo = Inst.getOperand(NegLoIdx).getImm(); 5066 NegHi = Inst.getOperand(NegHiIdx).getImm(); 5067 } 5068 5069 for (int J = 0; J < 3; ++J) { 5070 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 5071 if (OpIdx == -1) 5072 break; 5073 5074 uint32_t ModVal = 0; 5075 5076 if ((OpSel & (1 << J)) != 0) 5077 ModVal |= SISrcMods::OP_SEL_0; 5078 5079 if ((OpSelHi & (1 << J)) != 0) 5080 ModVal |= SISrcMods::OP_SEL_1; 5081 5082 if ((NegLo & (1 << J)) != 0) 5083 ModVal |= SISrcMods::NEG; 5084 5085 if ((NegHi & (1 << J)) != 0) 5086 ModVal |= SISrcMods::NEG_HI; 5087 5088 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 5089 5090 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 5091 } 5092 } 5093 5094 //===----------------------------------------------------------------------===// 5095 // dpp 5096 //===----------------------------------------------------------------------===// 5097 5098 bool AMDGPUOperand::isDPPCtrl() const { 5099 using namespace AMDGPU::DPP; 5100 5101 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 5102 if (result) { 5103 int64_t Imm = getImm(); 5104 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 5105 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 5106 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 5107 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 5108 (Imm == DppCtrl::WAVE_SHL1) || 5109 (Imm == DppCtrl::WAVE_ROL1) || 5110 (Imm == DppCtrl::WAVE_SHR1) || 5111 (Imm == DppCtrl::WAVE_ROR1) || 5112 (Imm == DppCtrl::ROW_MIRROR) || 5113 (Imm == DppCtrl::ROW_HALF_MIRROR) || 5114 (Imm == DppCtrl::BCAST15) || 5115 (Imm == DppCtrl::BCAST31); 5116 } 5117 return false; 5118 } 5119 5120 bool AMDGPUOperand::isGPRIdxMode() const { 5121 return isImm() && isUInt<4>(getImm()); 5122 } 5123 5124 bool AMDGPUOperand::isS16Imm() const { 5125 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 5126 } 5127 5128 bool AMDGPUOperand::isU16Imm() const { 5129 return isImm() && isUInt<16>(getImm()); 5130 } 5131 5132 OperandMatchResultTy 5133 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 5134 using namespace AMDGPU::DPP; 5135 5136 SMLoc S = Parser.getTok().getLoc(); 5137 StringRef Prefix; 5138 int64_t Int; 5139 5140 if (getLexer().getKind() == AsmToken::Identifier) { 5141 Prefix = Parser.getTok().getString(); 5142 } else { 5143 return MatchOperand_NoMatch; 5144 } 5145 5146 if (Prefix == "row_mirror") { 5147 Int = DppCtrl::ROW_MIRROR; 5148 Parser.Lex(); 5149 } else if (Prefix == "row_half_mirror") { 5150 Int = DppCtrl::ROW_HALF_MIRROR; 5151 Parser.Lex(); 5152 } else { 5153 // Check to prevent parseDPPCtrlOps from eating invalid tokens 5154 if (Prefix != "quad_perm" 5155 && Prefix != "row_shl" 5156 && Prefix != "row_shr" 5157 && Prefix != "row_ror" 5158 && Prefix != "wave_shl" 5159 && Prefix != "wave_rol" 5160 && Prefix != "wave_shr" 5161 && Prefix != "wave_ror" 5162 && Prefix != "row_bcast") { 5163 return MatchOperand_NoMatch; 5164 } 5165 5166 Parser.Lex(); 5167 if (getLexer().isNot(AsmToken::Colon)) 5168 return MatchOperand_ParseFail; 5169 5170 if (Prefix == "quad_perm") { 5171 // quad_perm:[%d,%d,%d,%d] 5172 Parser.Lex(); 5173 if (getLexer().isNot(AsmToken::LBrac)) 5174 return MatchOperand_ParseFail; 5175 Parser.Lex(); 5176 5177 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 5178 return MatchOperand_ParseFail; 5179 5180 for (int i = 0; i < 3; ++i) { 5181 if (getLexer().isNot(AsmToken::Comma)) 5182 return MatchOperand_ParseFail; 5183 Parser.Lex(); 5184 5185 int64_t Temp; 5186 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 5187 return MatchOperand_ParseFail; 5188 const int shift = i*2 + 2; 5189 Int += (Temp << shift); 5190 } 5191 5192 if (getLexer().isNot(AsmToken::RBrac)) 5193 return MatchOperand_ParseFail; 5194 Parser.Lex(); 5195 } else { 5196 // sel:%d 5197 Parser.Lex(); 5198 if (getParser().parseAbsoluteExpression(Int)) 5199 return MatchOperand_ParseFail; 5200 5201 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 5202 Int |= DppCtrl::ROW_SHL0; 5203 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 5204 Int |= DppCtrl::ROW_SHR0; 5205 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 5206 Int |= DppCtrl::ROW_ROR0; 5207 } else if (Prefix == "wave_shl" && 1 == Int) { 5208 Int = DppCtrl::WAVE_SHL1; 5209 } else if (Prefix == "wave_rol" && 1 == Int) { 5210 Int = DppCtrl::WAVE_ROL1; 5211 } else if (Prefix == "wave_shr" && 1 == Int) { 5212 Int = DppCtrl::WAVE_SHR1; 5213 } else if (Prefix == "wave_ror" && 1 == Int) { 5214 Int = DppCtrl::WAVE_ROR1; 5215 } else if (Prefix == "row_bcast") { 5216 if (Int == 15) { 5217 Int = DppCtrl::BCAST15; 5218 } else if (Int == 31) { 5219 Int = DppCtrl::BCAST31; 5220 } else { 5221 return MatchOperand_ParseFail; 5222 } 5223 } else { 5224 return MatchOperand_ParseFail; 5225 } 5226 } 5227 } 5228 5229 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 5230 return MatchOperand_Success; 5231 } 5232 5233 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 5234 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 5235 } 5236 5237 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 5238 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 5239 } 5240 5241 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 5242 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 5243 } 5244 5245 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) { 5246 OptionalImmIndexMap OptionalIdx; 5247 5248 unsigned I = 1; 5249 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5250 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5251 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5252 } 5253 5254 // All DPP instructions with at least one source operand have a fake "old" 5255 // source at the beginning that's tied to the dst operand. Handle it here. 5256 if (Desc.getNumOperands() >= 2) 5257 Inst.addOperand(Inst.getOperand(0)); 5258 5259 for (unsigned E = Operands.size(); I != E; ++I) { 5260 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5261 // Add the register arguments 5262 if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { 5263 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 5264 // Skip it. 5265 continue; 5266 } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5267 Op.addRegWithFPInputModsOperands(Inst, 2); 5268 } else if (Op.isDPPCtrl()) { 5269 Op.addImmOperands(Inst, 1); 5270 } else if (Op.isImm()) { 5271 // Handle optional arguments 5272 OptionalIdx[Op.getImmTy()] = I; 5273 } else { 5274 llvm_unreachable("Invalid operand type"); 5275 } 5276 } 5277 5278 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 5279 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 5280 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 5281 } 5282 5283 //===----------------------------------------------------------------------===// 5284 // sdwa 5285 //===----------------------------------------------------------------------===// 5286 5287 OperandMatchResultTy 5288 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 5289 AMDGPUOperand::ImmTy Type) { 5290 using namespace llvm::AMDGPU::SDWA; 5291 5292 SMLoc S = Parser.getTok().getLoc(); 5293 StringRef Value; 5294 OperandMatchResultTy res; 5295 5296 res = parseStringWithPrefix(Prefix, Value); 5297 if (res != MatchOperand_Success) { 5298 return res; 5299 } 5300 5301 int64_t Int; 5302 Int = StringSwitch<int64_t>(Value) 5303 .Case("BYTE_0", SdwaSel::BYTE_0) 5304 .Case("BYTE_1", SdwaSel::BYTE_1) 5305 .Case("BYTE_2", SdwaSel::BYTE_2) 5306 .Case("BYTE_3", SdwaSel::BYTE_3) 5307 .Case("WORD_0", SdwaSel::WORD_0) 5308 .Case("WORD_1", SdwaSel::WORD_1) 5309 .Case("DWORD", SdwaSel::DWORD) 5310 .Default(0xffffffff); 5311 Parser.Lex(); // eat last token 5312 5313 if (Int == 0xffffffff) { 5314 return MatchOperand_ParseFail; 5315 } 5316 5317 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 5318 return MatchOperand_Success; 5319 } 5320 5321 OperandMatchResultTy 5322 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 5323 using namespace llvm::AMDGPU::SDWA; 5324 5325 SMLoc S = Parser.getTok().getLoc(); 5326 StringRef Value; 5327 OperandMatchResultTy res; 5328 5329 res = parseStringWithPrefix("dst_unused", Value); 5330 if (res != MatchOperand_Success) { 5331 return res; 5332 } 5333 5334 int64_t Int; 5335 Int = StringSwitch<int64_t>(Value) 5336 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 5337 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 5338 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 5339 .Default(0xffffffff); 5340 Parser.Lex(); // eat last token 5341 5342 if (Int == 0xffffffff) { 5343 return MatchOperand_ParseFail; 5344 } 5345 5346 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 5347 return MatchOperand_Success; 5348 } 5349 5350 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 5351 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 5352 } 5353 5354 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 5355 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 5356 } 5357 5358 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 5359 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 5360 } 5361 5362 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 5363 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 5364 } 5365 5366 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 5367 uint64_t BasicInstType, bool skipVcc) { 5368 using namespace llvm::AMDGPU::SDWA; 5369 5370 OptionalImmIndexMap OptionalIdx; 5371 bool skippedVcc = false; 5372 5373 unsigned I = 1; 5374 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5375 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5376 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5377 } 5378 5379 for (unsigned E = Operands.size(); I != E; ++I) { 5380 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5381 if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { 5382 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 5383 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 5384 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 5385 // Skip VCC only if we didn't skip it on previous iteration. 5386 if (BasicInstType == SIInstrFlags::VOP2 && 5387 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 5388 skippedVcc = true; 5389 continue; 5390 } else if (BasicInstType == SIInstrFlags::VOPC && 5391 Inst.getNumOperands() == 0) { 5392 skippedVcc = true; 5393 continue; 5394 } 5395 } 5396 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5397 Op.addRegOrImmWithInputModsOperands(Inst, 2); 5398 } else if (Op.isImm()) { 5399 // Handle optional arguments 5400 OptionalIdx[Op.getImmTy()] = I; 5401 } else { 5402 llvm_unreachable("Invalid operand type"); 5403 } 5404 skippedVcc = false; 5405 } 5406 5407 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 5408 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 5409 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 5410 switch (BasicInstType) { 5411 case SIInstrFlags::VOP1: 5412 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5413 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5414 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5415 } 5416 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5417 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5418 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5419 break; 5420 5421 case SIInstrFlags::VOP2: 5422 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5423 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5424 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5425 } 5426 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5427 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5428 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5429 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5430 break; 5431 5432 case SIInstrFlags::VOPC: 5433 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5434 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5435 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5436 break; 5437 5438 default: 5439 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 5440 } 5441 } 5442 5443 // special case v_mac_{f16, f32}: 5444 // it has src2 register operand that is tied to dst operand 5445 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 5446 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 5447 auto it = Inst.begin(); 5448 std::advance( 5449 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 5450 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5451 } 5452 } 5453 5454 /// Force static initialization. 5455 extern "C" void LLVMInitializeAMDGPUAsmParser() { 5456 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 5457 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 5458 } 5459 5460 #define GET_REGISTER_MATCHER 5461 #define GET_MATCHER_IMPLEMENTATION 5462 #define GET_MNEMONIC_SPELL_CHECKER 5463 #include "AMDGPUGenAsmMatcher.inc" 5464 5465 // This fuction should be defined after auto-generated include so that we have 5466 // MatchClassKind enum defined 5467 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 5468 unsigned Kind) { 5469 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 5470 // But MatchInstructionImpl() expects to meet token and fails to validate 5471 // operand. This method checks if we are given immediate operand but expect to 5472 // get corresponding token. 5473 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 5474 switch (Kind) { 5475 case MCK_addr64: 5476 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 5477 case MCK_gds: 5478 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 5479 case MCK_lds: 5480 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 5481 case MCK_glc: 5482 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 5483 case MCK_idxen: 5484 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 5485 case MCK_offen: 5486 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 5487 case MCK_SSrcB32: 5488 // When operands have expression values, they will return true for isToken, 5489 // because it is not possible to distinguish between a token and an 5490 // expression at parse time. MatchInstructionImpl() will always try to 5491 // match an operand as a token, when isToken returns true, and when the 5492 // name of the expression is not a valid token, the match will fail, 5493 // so we need to handle it here. 5494 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 5495 case MCK_SSrcF32: 5496 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 5497 case MCK_SoppBrTarget: 5498 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 5499 case MCK_VReg32OrOff: 5500 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 5501 case MCK_InterpSlot: 5502 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 5503 case MCK_Attr: 5504 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 5505 case MCK_AttrChan: 5506 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 5507 default: 5508 return Match_InvalidOperand; 5509 } 5510 } 5511