1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "MCTargetDesc/X86BaseInfo.h" 11 #include "X86AsmInstrumentation.h" 12 #include "X86AsmParserCommon.h" 13 #include "X86Operand.h" 14 #include "llvm/ADT/APFloat.h" 15 #include "llvm/ADT/STLExtras.h" 16 #include "llvm/ADT/SmallString.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/ADT/Twine.h" 20 #include "llvm/MC/MCContext.h" 21 #include "llvm/MC/MCExpr.h" 22 #include "llvm/MC/MCInst.h" 23 #include "llvm/MC/MCInstrInfo.h" 24 #include "llvm/MC/MCParser/MCAsmLexer.h" 25 #include "llvm/MC/MCParser/MCAsmParser.h" 26 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 27 #include "llvm/MC/MCRegisterInfo.h" 28 #include "llvm/MC/MCStreamer.h" 29 #include "llvm/MC/MCSubtargetInfo.h" 30 #include "llvm/MC/MCSymbol.h" 31 #include "llvm/MC/MCTargetAsmParser.h" 32 #include "llvm/Support/SourceMgr.h" 33 #include "llvm/Support/TargetRegistry.h" 34 #include "llvm/Support/raw_ostream.h" 35 #include <memory> 36 37 using namespace llvm; 38 39 namespace { 40 41 static const char OpPrecedence[] = { 42 0, // IC_OR 43 1, // IC_AND 44 2, // IC_LSHIFT 45 2, // IC_RSHIFT 46 3, // IC_PLUS 47 3, // IC_MINUS 48 4, // IC_MULTIPLY 49 4, // IC_DIVIDE 50 5, // IC_RPAREN 51 6, // IC_LPAREN 52 0, // IC_IMM 53 0 // IC_REGISTER 54 }; 55 56 class X86AsmParser : public MCTargetAsmParser { 57 MCSubtargetInfo &STI; 58 MCAsmParser &Parser; 59 const MCInstrInfo &MII; 60 ParseInstructionInfo *InstInfo; 61 std::unique_ptr<X86AsmInstrumentation> Instrumentation; 62 private: 63 SMLoc consumeToken() { 64 SMLoc Result = Parser.getTok().getLoc(); 65 Parser.Lex(); 66 return Result; 67 } 68 69 enum InfixCalculatorTok { 70 IC_OR = 0, 71 IC_AND, 72 IC_LSHIFT, 73 IC_RSHIFT, 74 IC_PLUS, 75 IC_MINUS, 76 IC_MULTIPLY, 77 IC_DIVIDE, 78 IC_RPAREN, 79 IC_LPAREN, 80 IC_IMM, 81 IC_REGISTER 82 }; 83 84 class InfixCalculator { 85 typedef std::pair< InfixCalculatorTok, int64_t > ICToken; 86 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack; 87 SmallVector<ICToken, 4> PostfixStack; 88 89 public: 90 int64_t popOperand() { 91 assert (!PostfixStack.empty() && "Poped an empty stack!"); 92 ICToken Op = PostfixStack.pop_back_val(); 93 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER) 94 && "Expected and immediate or register!"); 95 return Op.second; 96 } 97 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) { 98 assert ((Op == IC_IMM || Op == IC_REGISTER) && 99 "Unexpected operand!"); 100 PostfixStack.push_back(std::make_pair(Op, Val)); 101 } 102 103 void popOperator() { InfixOperatorStack.pop_back(); } 104 void pushOperator(InfixCalculatorTok Op) { 105 // Push the new operator if the stack is empty. 106 if (InfixOperatorStack.empty()) { 107 InfixOperatorStack.push_back(Op); 108 return; 109 } 110 111 // Push the new operator if it has a higher precedence than the operator 112 // on the top of the stack or the operator on the top of the stack is a 113 // left parentheses. 114 unsigned Idx = InfixOperatorStack.size() - 1; 115 InfixCalculatorTok StackOp = InfixOperatorStack[Idx]; 116 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) { 117 InfixOperatorStack.push_back(Op); 118 return; 119 } 120 121 // The operator on the top of the stack has higher precedence than the 122 // new operator. 123 unsigned ParenCount = 0; 124 while (1) { 125 // Nothing to process. 126 if (InfixOperatorStack.empty()) 127 break; 128 129 Idx = InfixOperatorStack.size() - 1; 130 StackOp = InfixOperatorStack[Idx]; 131 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount)) 132 break; 133 134 // If we have an even parentheses count and we see a left parentheses, 135 // then stop processing. 136 if (!ParenCount && StackOp == IC_LPAREN) 137 break; 138 139 if (StackOp == IC_RPAREN) { 140 ++ParenCount; 141 InfixOperatorStack.pop_back(); 142 } else if (StackOp == IC_LPAREN) { 143 --ParenCount; 144 InfixOperatorStack.pop_back(); 145 } else { 146 InfixOperatorStack.pop_back(); 147 PostfixStack.push_back(std::make_pair(StackOp, 0)); 148 } 149 } 150 // Push the new operator. 151 InfixOperatorStack.push_back(Op); 152 } 153 int64_t execute() { 154 // Push any remaining operators onto the postfix stack. 155 while (!InfixOperatorStack.empty()) { 156 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val(); 157 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN) 158 PostfixStack.push_back(std::make_pair(StackOp, 0)); 159 } 160 161 if (PostfixStack.empty()) 162 return 0; 163 164 SmallVector<ICToken, 16> OperandStack; 165 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) { 166 ICToken Op = PostfixStack[i]; 167 if (Op.first == IC_IMM || Op.first == IC_REGISTER) { 168 OperandStack.push_back(Op); 169 } else { 170 assert (OperandStack.size() > 1 && "Too few operands."); 171 int64_t Val; 172 ICToken Op2 = OperandStack.pop_back_val(); 173 ICToken Op1 = OperandStack.pop_back_val(); 174 switch (Op.first) { 175 default: 176 report_fatal_error("Unexpected operator!"); 177 break; 178 case IC_PLUS: 179 Val = Op1.second + Op2.second; 180 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 181 break; 182 case IC_MINUS: 183 Val = Op1.second - Op2.second; 184 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 185 break; 186 case IC_MULTIPLY: 187 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 188 "Multiply operation with an immediate and a register!"); 189 Val = Op1.second * Op2.second; 190 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 191 break; 192 case IC_DIVIDE: 193 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 194 "Divide operation with an immediate and a register!"); 195 assert (Op2.second != 0 && "Division by zero!"); 196 Val = Op1.second / Op2.second; 197 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 198 break; 199 case IC_OR: 200 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 201 "Or operation with an immediate and a register!"); 202 Val = Op1.second | Op2.second; 203 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 204 break; 205 case IC_AND: 206 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 207 "And operation with an immediate and a register!"); 208 Val = Op1.second & Op2.second; 209 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 210 break; 211 case IC_LSHIFT: 212 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 213 "Left shift operation with an immediate and a register!"); 214 Val = Op1.second << Op2.second; 215 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 216 break; 217 case IC_RSHIFT: 218 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 219 "Right shift operation with an immediate and a register!"); 220 Val = Op1.second >> Op2.second; 221 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 222 break; 223 } 224 } 225 } 226 assert (OperandStack.size() == 1 && "Expected a single result."); 227 return OperandStack.pop_back_val().second; 228 } 229 }; 230 231 enum IntelExprState { 232 IES_OR, 233 IES_AND, 234 IES_LSHIFT, 235 IES_RSHIFT, 236 IES_PLUS, 237 IES_MINUS, 238 IES_NOT, 239 IES_MULTIPLY, 240 IES_DIVIDE, 241 IES_LBRAC, 242 IES_RBRAC, 243 IES_LPAREN, 244 IES_RPAREN, 245 IES_REGISTER, 246 IES_INTEGER, 247 IES_IDENTIFIER, 248 IES_ERROR 249 }; 250 251 class IntelExprStateMachine { 252 IntelExprState State, PrevState; 253 unsigned BaseReg, IndexReg, TmpReg, Scale; 254 int64_t Imm; 255 const MCExpr *Sym; 256 StringRef SymName; 257 bool StopOnLBrac, AddImmPrefix; 258 InfixCalculator IC; 259 InlineAsmIdentifierInfo Info; 260 public: 261 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) : 262 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0), 263 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac), 264 AddImmPrefix(addimmprefix) { Info.clear(); } 265 266 unsigned getBaseReg() { return BaseReg; } 267 unsigned getIndexReg() { return IndexReg; } 268 unsigned getScale() { return Scale; } 269 const MCExpr *getSym() { return Sym; } 270 StringRef getSymName() { return SymName; } 271 int64_t getImm() { return Imm + IC.execute(); } 272 bool isValidEndState() { 273 return State == IES_RBRAC || State == IES_INTEGER; 274 } 275 bool getStopOnLBrac() { return StopOnLBrac; } 276 bool getAddImmPrefix() { return AddImmPrefix; } 277 bool hadError() { return State == IES_ERROR; } 278 279 InlineAsmIdentifierInfo &getIdentifierInfo() { 280 return Info; 281 } 282 283 void onOr() { 284 IntelExprState CurrState = State; 285 switch (State) { 286 default: 287 State = IES_ERROR; 288 break; 289 case IES_INTEGER: 290 case IES_RPAREN: 291 case IES_REGISTER: 292 State = IES_OR; 293 IC.pushOperator(IC_OR); 294 break; 295 } 296 PrevState = CurrState; 297 } 298 void onAnd() { 299 IntelExprState CurrState = State; 300 switch (State) { 301 default: 302 State = IES_ERROR; 303 break; 304 case IES_INTEGER: 305 case IES_RPAREN: 306 case IES_REGISTER: 307 State = IES_AND; 308 IC.pushOperator(IC_AND); 309 break; 310 } 311 PrevState = CurrState; 312 } 313 void onLShift() { 314 IntelExprState CurrState = State; 315 switch (State) { 316 default: 317 State = IES_ERROR; 318 break; 319 case IES_INTEGER: 320 case IES_RPAREN: 321 case IES_REGISTER: 322 State = IES_LSHIFT; 323 IC.pushOperator(IC_LSHIFT); 324 break; 325 } 326 PrevState = CurrState; 327 } 328 void onRShift() { 329 IntelExprState CurrState = State; 330 switch (State) { 331 default: 332 State = IES_ERROR; 333 break; 334 case IES_INTEGER: 335 case IES_RPAREN: 336 case IES_REGISTER: 337 State = IES_RSHIFT; 338 IC.pushOperator(IC_RSHIFT); 339 break; 340 } 341 PrevState = CurrState; 342 } 343 void onPlus() { 344 IntelExprState CurrState = State; 345 switch (State) { 346 default: 347 State = IES_ERROR; 348 break; 349 case IES_INTEGER: 350 case IES_RPAREN: 351 case IES_REGISTER: 352 State = IES_PLUS; 353 IC.pushOperator(IC_PLUS); 354 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 355 // If we already have a BaseReg, then assume this is the IndexReg with 356 // a scale of 1. 357 if (!BaseReg) { 358 BaseReg = TmpReg; 359 } else { 360 assert (!IndexReg && "BaseReg/IndexReg already set!"); 361 IndexReg = TmpReg; 362 Scale = 1; 363 } 364 } 365 break; 366 } 367 PrevState = CurrState; 368 } 369 void onMinus() { 370 IntelExprState CurrState = State; 371 switch (State) { 372 default: 373 State = IES_ERROR; 374 break; 375 case IES_PLUS: 376 case IES_NOT: 377 case IES_MULTIPLY: 378 case IES_DIVIDE: 379 case IES_LPAREN: 380 case IES_RPAREN: 381 case IES_LBRAC: 382 case IES_RBRAC: 383 case IES_INTEGER: 384 case IES_REGISTER: 385 State = IES_MINUS; 386 // Only push the minus operator if it is not a unary operator. 387 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS || 388 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE || 389 CurrState == IES_LPAREN || CurrState == IES_LBRAC)) 390 IC.pushOperator(IC_MINUS); 391 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 392 // If we already have a BaseReg, then assume this is the IndexReg with 393 // a scale of 1. 394 if (!BaseReg) { 395 BaseReg = TmpReg; 396 } else { 397 assert (!IndexReg && "BaseReg/IndexReg already set!"); 398 IndexReg = TmpReg; 399 Scale = 1; 400 } 401 } 402 break; 403 } 404 PrevState = CurrState; 405 } 406 void onNot() { 407 IntelExprState CurrState = State; 408 switch (State) { 409 default: 410 State = IES_ERROR; 411 break; 412 case IES_PLUS: 413 case IES_NOT: 414 State = IES_NOT; 415 break; 416 } 417 PrevState = CurrState; 418 } 419 void onRegister(unsigned Reg) { 420 IntelExprState CurrState = State; 421 switch (State) { 422 default: 423 State = IES_ERROR; 424 break; 425 case IES_PLUS: 426 case IES_LPAREN: 427 State = IES_REGISTER; 428 TmpReg = Reg; 429 IC.pushOperand(IC_REGISTER); 430 break; 431 case IES_MULTIPLY: 432 // Index Register - Scale * Register 433 if (PrevState == IES_INTEGER) { 434 assert (!IndexReg && "IndexReg already set!"); 435 State = IES_REGISTER; 436 IndexReg = Reg; 437 // Get the scale and replace the 'Scale * Register' with '0'. 438 Scale = IC.popOperand(); 439 IC.pushOperand(IC_IMM); 440 IC.popOperator(); 441 } else { 442 State = IES_ERROR; 443 } 444 break; 445 } 446 PrevState = CurrState; 447 } 448 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) { 449 PrevState = State; 450 switch (State) { 451 default: 452 State = IES_ERROR; 453 break; 454 case IES_PLUS: 455 case IES_MINUS: 456 case IES_NOT: 457 State = IES_INTEGER; 458 Sym = SymRef; 459 SymName = SymRefName; 460 IC.pushOperand(IC_IMM); 461 break; 462 } 463 } 464 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) { 465 IntelExprState CurrState = State; 466 switch (State) { 467 default: 468 State = IES_ERROR; 469 break; 470 case IES_PLUS: 471 case IES_MINUS: 472 case IES_NOT: 473 case IES_OR: 474 case IES_AND: 475 case IES_LSHIFT: 476 case IES_RSHIFT: 477 case IES_DIVIDE: 478 case IES_MULTIPLY: 479 case IES_LPAREN: 480 State = IES_INTEGER; 481 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { 482 // Index Register - Register * Scale 483 assert (!IndexReg && "IndexReg already set!"); 484 IndexReg = TmpReg; 485 Scale = TmpInt; 486 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) { 487 ErrMsg = "scale factor in address must be 1, 2, 4 or 8"; 488 return true; 489 } 490 // Get the scale and replace the 'Register * Scale' with '0'. 491 IC.popOperator(); 492 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS || 493 PrevState == IES_OR || PrevState == IES_AND || 494 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || 495 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || 496 PrevState == IES_LPAREN || PrevState == IES_LBRAC || 497 PrevState == IES_NOT) && 498 CurrState == IES_MINUS) { 499 // Unary minus. No need to pop the minus operand because it was never 500 // pushed. 501 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm. 502 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS || 503 PrevState == IES_OR || PrevState == IES_AND || 504 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || 505 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || 506 PrevState == IES_LPAREN || PrevState == IES_LBRAC || 507 PrevState == IES_NOT) && 508 CurrState == IES_NOT) { 509 // Unary not. No need to pop the not operand because it was never 510 // pushed. 511 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm. 512 } else { 513 IC.pushOperand(IC_IMM, TmpInt); 514 } 515 break; 516 } 517 PrevState = CurrState; 518 return false; 519 } 520 void onStar() { 521 PrevState = State; 522 switch (State) { 523 default: 524 State = IES_ERROR; 525 break; 526 case IES_INTEGER: 527 case IES_REGISTER: 528 case IES_RPAREN: 529 State = IES_MULTIPLY; 530 IC.pushOperator(IC_MULTIPLY); 531 break; 532 } 533 } 534 void onDivide() { 535 PrevState = State; 536 switch (State) { 537 default: 538 State = IES_ERROR; 539 break; 540 case IES_INTEGER: 541 case IES_RPAREN: 542 State = IES_DIVIDE; 543 IC.pushOperator(IC_DIVIDE); 544 break; 545 } 546 } 547 void onLBrac() { 548 PrevState = State; 549 switch (State) { 550 default: 551 State = IES_ERROR; 552 break; 553 case IES_RBRAC: 554 State = IES_PLUS; 555 IC.pushOperator(IC_PLUS); 556 break; 557 } 558 } 559 void onRBrac() { 560 IntelExprState CurrState = State; 561 switch (State) { 562 default: 563 State = IES_ERROR; 564 break; 565 case IES_INTEGER: 566 case IES_REGISTER: 567 case IES_RPAREN: 568 State = IES_RBRAC; 569 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 570 // If we already have a BaseReg, then assume this is the IndexReg with 571 // a scale of 1. 572 if (!BaseReg) { 573 BaseReg = TmpReg; 574 } else { 575 assert (!IndexReg && "BaseReg/IndexReg already set!"); 576 IndexReg = TmpReg; 577 Scale = 1; 578 } 579 } 580 break; 581 } 582 PrevState = CurrState; 583 } 584 void onLParen() { 585 IntelExprState CurrState = State; 586 switch (State) { 587 default: 588 State = IES_ERROR; 589 break; 590 case IES_PLUS: 591 case IES_MINUS: 592 case IES_NOT: 593 case IES_OR: 594 case IES_AND: 595 case IES_LSHIFT: 596 case IES_RSHIFT: 597 case IES_MULTIPLY: 598 case IES_DIVIDE: 599 case IES_LPAREN: 600 // FIXME: We don't handle this type of unary minus or not, yet. 601 if ((PrevState == IES_PLUS || PrevState == IES_MINUS || 602 PrevState == IES_OR || PrevState == IES_AND || 603 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || 604 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || 605 PrevState == IES_LPAREN || PrevState == IES_LBRAC || 606 PrevState == IES_NOT) && 607 (CurrState == IES_MINUS || CurrState == IES_NOT)) { 608 State = IES_ERROR; 609 break; 610 } 611 State = IES_LPAREN; 612 IC.pushOperator(IC_LPAREN); 613 break; 614 } 615 PrevState = CurrState; 616 } 617 void onRParen() { 618 PrevState = State; 619 switch (State) { 620 default: 621 State = IES_ERROR; 622 break; 623 case IES_INTEGER: 624 case IES_REGISTER: 625 case IES_RPAREN: 626 State = IES_RPAREN; 627 IC.pushOperator(IC_RPAREN); 628 break; 629 } 630 } 631 }; 632 633 MCAsmParser &getParser() const { return Parser; } 634 635 MCAsmLexer &getLexer() const { return Parser.getLexer(); } 636 637 bool Error(SMLoc L, const Twine &Msg, 638 ArrayRef<SMRange> Ranges = None, 639 bool MatchingInlineAsm = false) { 640 if (MatchingInlineAsm) return true; 641 return Parser.Error(L, Msg, Ranges); 642 } 643 644 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg, 645 ArrayRef<SMRange> Ranges = None, 646 bool MatchingInlineAsm = false) { 647 Parser.eatToEndOfStatement(); 648 return Error(L, Msg, Ranges, MatchingInlineAsm); 649 } 650 651 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) { 652 Error(Loc, Msg); 653 return nullptr; 654 } 655 656 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc); 657 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc); 658 std::unique_ptr<X86Operand> ParseOperand(); 659 std::unique_ptr<X86Operand> ParseATTOperand(); 660 std::unique_ptr<X86Operand> ParseIntelOperand(); 661 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator(); 662 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp); 663 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind); 664 std::unique_ptr<X86Operand> 665 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size); 666 std::unique_ptr<X86Operand> 667 ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size); 668 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End); 669 std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg, 670 SMLoc Start, 671 int64_t ImmDisp, 672 unsigned Size); 673 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier, 674 InlineAsmIdentifierInfo &Info, 675 bool IsUnevaluatedOperand, SMLoc &End); 676 677 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc); 678 679 std::unique_ptr<X86Operand> 680 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, 681 unsigned IndexReg, unsigned Scale, SMLoc Start, 682 SMLoc End, unsigned Size, StringRef Identifier, 683 InlineAsmIdentifierInfo &Info); 684 685 bool ParseDirectiveWord(unsigned Size, SMLoc L); 686 bool ParseDirectiveCode(StringRef IDVal, SMLoc L); 687 688 bool processInstruction(MCInst &Inst, const OperandVector &Ops); 689 690 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds 691 /// instrumentation around Inst. 692 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out); 693 694 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 695 OperandVector &Operands, MCStreamer &Out, 696 unsigned &ErrorInfo, 697 bool MatchingInlineAsm) override; 698 699 /// doSrcDstMatch - Returns true if operands are matching in their 700 /// word size (%si and %di, %esi and %edi, etc.). Order depends on 701 /// the parsing mode (Intel vs. AT&T). 702 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2); 703 704 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z}) 705 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required. 706 /// \return \c true if no parsing errors occurred, \c false otherwise. 707 bool HandleAVX512Operand(OperandVector &Operands, 708 const MCParsedAsmOperand &Op); 709 710 bool is64BitMode() const { 711 // FIXME: Can tablegen auto-generate this? 712 return (STI.getFeatureBits() & X86::Mode64Bit) != 0; 713 } 714 bool is32BitMode() const { 715 // FIXME: Can tablegen auto-generate this? 716 return (STI.getFeatureBits() & X86::Mode32Bit) != 0; 717 } 718 bool is16BitMode() const { 719 // FIXME: Can tablegen auto-generate this? 720 return (STI.getFeatureBits() & X86::Mode16Bit) != 0; 721 } 722 void SwitchMode(uint64_t mode) { 723 uint64_t oldMode = STI.getFeatureBits() & 724 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit); 725 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode)); 726 setAvailableFeatures(FB); 727 assert(mode == (STI.getFeatureBits() & 728 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit))); 729 } 730 731 bool isParsingIntelSyntax() { 732 return getParser().getAssemblerDialect(); 733 } 734 735 /// @name Auto-generated Matcher Functions 736 /// { 737 738 #define GET_ASSEMBLER_HEADER 739 #include "X86GenAsmMatcher.inc" 740 741 /// } 742 743 public: 744 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser, 745 const MCInstrInfo &mii, 746 const MCTargetOptions &Options) 747 : MCTargetAsmParser(), STI(sti), Parser(parser), MII(mii), 748 InstInfo(nullptr) { 749 750 // Initialize the set of available features. 751 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); 752 Instrumentation.reset( 753 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI)); 754 } 755 756 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 757 758 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 759 SMLoc NameLoc, OperandVector &Operands) override; 760 761 bool ParseDirective(AsmToken DirectiveID) override; 762 }; 763 } // end anonymous namespace 764 765 /// @name Auto-generated Match Functions 766 /// { 767 768 static unsigned MatchRegisterName(StringRef Name); 769 770 /// } 771 772 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg, 773 StringRef &ErrMsg) { 774 // If we have both a base register and an index register make sure they are 775 // both 64-bit or 32-bit registers. 776 // To support VSIB, IndexReg can be 128-bit or 256-bit registers. 777 if (BaseReg != 0 && IndexReg != 0) { 778 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && 779 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 780 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) && 781 IndexReg != X86::RIZ) { 782 ErrMsg = "base register is 64-bit, but index register is not"; 783 return true; 784 } 785 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) && 786 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 787 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) && 788 IndexReg != X86::EIZ){ 789 ErrMsg = "base register is 32-bit, but index register is not"; 790 return true; 791 } 792 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) { 793 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || 794 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) { 795 ErrMsg = "base register is 16-bit, but index register is not"; 796 return true; 797 } 798 if (((BaseReg == X86::BX || BaseReg == X86::BP) && 799 IndexReg != X86::SI && IndexReg != X86::DI) || 800 ((BaseReg == X86::SI || BaseReg == X86::DI) && 801 IndexReg != X86::BX && IndexReg != X86::BP)) { 802 ErrMsg = "invalid 16-bit base/index register combination"; 803 return true; 804 } 805 } 806 } 807 return false; 808 } 809 810 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2) 811 { 812 // Return true and let a normal complaint about bogus operands happen. 813 if (!Op1.isMem() || !Op2.isMem()) 814 return true; 815 816 // Actually these might be the other way round if Intel syntax is 817 // being used. It doesn't matter. 818 unsigned diReg = Op1.Mem.BaseReg; 819 unsigned siReg = Op2.Mem.BaseReg; 820 821 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg)) 822 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg); 823 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg)) 824 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg); 825 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg)) 826 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg); 827 // Again, return true and let another error happen. 828 return true; 829 } 830 831 bool X86AsmParser::ParseRegister(unsigned &RegNo, 832 SMLoc &StartLoc, SMLoc &EndLoc) { 833 RegNo = 0; 834 const AsmToken &PercentTok = Parser.getTok(); 835 StartLoc = PercentTok.getLoc(); 836 837 // If we encounter a %, ignore it. This code handles registers with and 838 // without the prefix, unprefixed registers can occur in cfi directives. 839 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) 840 Parser.Lex(); // Eat percent token. 841 842 const AsmToken &Tok = Parser.getTok(); 843 EndLoc = Tok.getEndLoc(); 844 845 if (Tok.isNot(AsmToken::Identifier)) { 846 if (isParsingIntelSyntax()) return true; 847 return Error(StartLoc, "invalid register name", 848 SMRange(StartLoc, EndLoc)); 849 } 850 851 RegNo = MatchRegisterName(Tok.getString()); 852 853 // If the match failed, try the register name as lowercase. 854 if (RegNo == 0) 855 RegNo = MatchRegisterName(Tok.getString().lower()); 856 857 if (!is64BitMode()) { 858 // FIXME: This should be done using Requires<Not64BitMode> and 859 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also 860 // checked. 861 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a 862 // REX prefix. 863 if (RegNo == X86::RIZ || 864 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) || 865 X86II::isX86_64NonExtLowByteReg(RegNo) || 866 X86II::isX86_64ExtendedReg(RegNo)) 867 return Error(StartLoc, "register %" 868 + Tok.getString() + " is only available in 64-bit mode", 869 SMRange(StartLoc, EndLoc)); 870 } 871 872 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. 873 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) { 874 RegNo = X86::ST0; 875 Parser.Lex(); // Eat 'st' 876 877 // Check to see if we have '(4)' after %st. 878 if (getLexer().isNot(AsmToken::LParen)) 879 return false; 880 // Lex the paren. 881 getParser().Lex(); 882 883 const AsmToken &IntTok = Parser.getTok(); 884 if (IntTok.isNot(AsmToken::Integer)) 885 return Error(IntTok.getLoc(), "expected stack index"); 886 switch (IntTok.getIntVal()) { 887 case 0: RegNo = X86::ST0; break; 888 case 1: RegNo = X86::ST1; break; 889 case 2: RegNo = X86::ST2; break; 890 case 3: RegNo = X86::ST3; break; 891 case 4: RegNo = X86::ST4; break; 892 case 5: RegNo = X86::ST5; break; 893 case 6: RegNo = X86::ST6; break; 894 case 7: RegNo = X86::ST7; break; 895 default: return Error(IntTok.getLoc(), "invalid stack index"); 896 } 897 898 if (getParser().Lex().isNot(AsmToken::RParen)) 899 return Error(Parser.getTok().getLoc(), "expected ')'"); 900 901 EndLoc = Parser.getTok().getEndLoc(); 902 Parser.Lex(); // Eat ')' 903 return false; 904 } 905 906 EndLoc = Parser.getTok().getEndLoc(); 907 908 // If this is "db[0-7]", match it as an alias 909 // for dr[0-7]. 910 if (RegNo == 0 && Tok.getString().size() == 3 && 911 Tok.getString().startswith("db")) { 912 switch (Tok.getString()[2]) { 913 case '0': RegNo = X86::DR0; break; 914 case '1': RegNo = X86::DR1; break; 915 case '2': RegNo = X86::DR2; break; 916 case '3': RegNo = X86::DR3; break; 917 case '4': RegNo = X86::DR4; break; 918 case '5': RegNo = X86::DR5; break; 919 case '6': RegNo = X86::DR6; break; 920 case '7': RegNo = X86::DR7; break; 921 } 922 923 if (RegNo != 0) { 924 EndLoc = Parser.getTok().getEndLoc(); 925 Parser.Lex(); // Eat it. 926 return false; 927 } 928 } 929 930 if (RegNo == 0) { 931 if (isParsingIntelSyntax()) return true; 932 return Error(StartLoc, "invalid register name", 933 SMRange(StartLoc, EndLoc)); 934 } 935 936 Parser.Lex(); // Eat identifier token. 937 return false; 938 } 939 940 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) { 941 unsigned basereg = 942 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI); 943 const MCExpr *Disp = MCConstantExpr::Create(0, getContext()); 944 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg, 945 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0); 946 } 947 948 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) { 949 unsigned basereg = 950 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI); 951 const MCExpr *Disp = MCConstantExpr::Create(0, getContext()); 952 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/basereg, 953 /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0); 954 } 955 956 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() { 957 if (isParsingIntelSyntax()) 958 return ParseIntelOperand(); 959 return ParseATTOperand(); 960 } 961 962 /// getIntelMemOperandSize - Return intel memory operand size. 963 static unsigned getIntelMemOperandSize(StringRef OpStr) { 964 unsigned Size = StringSwitch<unsigned>(OpStr) 965 .Cases("BYTE", "byte", 8) 966 .Cases("WORD", "word", 16) 967 .Cases("DWORD", "dword", 32) 968 .Cases("QWORD", "qword", 64) 969 .Cases("XWORD", "xword", 80) 970 .Cases("XMMWORD", "xmmword", 128) 971 .Cases("YMMWORD", "ymmword", 256) 972 .Cases("ZMMWORD", "zmmword", 512) 973 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter 974 .Default(0); 975 return Size; 976 } 977 978 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm( 979 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, 980 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier, 981 InlineAsmIdentifierInfo &Info) { 982 // If this is not a VarDecl then assume it is a FuncDecl or some other label 983 // reference. We need an 'r' constraint here, so we need to create register 984 // operand to ensure proper matching. Just pick a GPR based on the size of 985 // a pointer. 986 if (isa<MCSymbolRefExpr>(Disp) && !Info.IsVarDecl) { 987 unsigned RegNo = 988 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX); 989 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true, 990 SMLoc(), Identifier, Info.OpDecl); 991 } 992 993 // We either have a direct symbol reference, or an offset from a symbol. The 994 // parser always puts the symbol on the LHS, so look there for size 995 // calculation purposes. 996 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp); 997 bool IsSymRef = 998 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp); 999 if (IsSymRef) { 1000 if (!Size) { 1001 Size = Info.Type * 8; // Size is in terms of bits in this context. 1002 if (Size) 1003 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start, 1004 /*Len=*/0, Size)); 1005 } 1006 } 1007 1008 // When parsing inline assembly we set the base register to a non-zero value 1009 // if we don't know the actual value at this time. This is necessary to 1010 // get the matching correct in some cases. 1011 BaseReg = BaseReg ? BaseReg : 1; 1012 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start, 1013 End, Size, Identifier, Info.OpDecl); 1014 } 1015 1016 static void 1017 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites, 1018 StringRef SymName, int64_t ImmDisp, 1019 int64_t FinalImmDisp, SMLoc &BracLoc, 1020 SMLoc &StartInBrac, SMLoc &End) { 1021 // Remove the '[' and ']' from the IR string. 1022 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1)); 1023 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1)); 1024 1025 // If ImmDisp is non-zero, then we parsed a displacement before the 1026 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp]) 1027 // If ImmDisp doesn't match the displacement computed by the state machine 1028 // then we have an additional displacement in the bracketed expression. 1029 if (ImmDisp != FinalImmDisp) { 1030 if (ImmDisp) { 1031 // We have an immediate displacement before the bracketed expression. 1032 // Adjust this to match the final immediate displacement. 1033 bool Found = false; 1034 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(), 1035 E = AsmRewrites->end(); I != E; ++I) { 1036 if ((*I).Loc.getPointer() > BracLoc.getPointer()) 1037 continue; 1038 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) { 1039 assert (!Found && "ImmDisp already rewritten."); 1040 (*I).Kind = AOK_Imm; 1041 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer(); 1042 (*I).Val = FinalImmDisp; 1043 Found = true; 1044 break; 1045 } 1046 } 1047 assert (Found && "Unable to rewrite ImmDisp."); 1048 (void)Found; 1049 } else { 1050 // We have a symbolic and an immediate displacement, but no displacement 1051 // before the bracketed expression. Put the immediate displacement 1052 // before the bracketed expression. 1053 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp)); 1054 } 1055 } 1056 // Remove all the ImmPrefix rewrites within the brackets. 1057 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(), 1058 E = AsmRewrites->end(); I != E; ++I) { 1059 if ((*I).Loc.getPointer() < StartInBrac.getPointer()) 1060 continue; 1061 if ((*I).Kind == AOK_ImmPrefix) 1062 (*I).Kind = AOK_Delete; 1063 } 1064 const char *SymLocPtr = SymName.data(); 1065 // Skip everything before the symbol. 1066 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) { 1067 assert(Len > 0 && "Expected a non-negative length."); 1068 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len)); 1069 } 1070 // Skip everything after the symbol. 1071 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) { 1072 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size()); 1073 assert(Len > 0 && "Expected a non-negative length."); 1074 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len)); 1075 } 1076 } 1077 1078 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { 1079 const AsmToken &Tok = Parser.getTok(); 1080 1081 bool Done = false; 1082 while (!Done) { 1083 bool UpdateLocLex = true; 1084 1085 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an 1086 // identifier. Don't try an parse it as a register. 1087 if (Tok.getString().startswith(".")) 1088 break; 1089 1090 // If we're parsing an immediate expression, we don't expect a '['. 1091 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac) 1092 break; 1093 1094 AsmToken::TokenKind TK = getLexer().getKind(); 1095 switch (TK) { 1096 default: { 1097 if (SM.isValidEndState()) { 1098 Done = true; 1099 break; 1100 } 1101 return Error(Tok.getLoc(), "unknown token in expression"); 1102 } 1103 case AsmToken::EndOfStatement: { 1104 Done = true; 1105 break; 1106 } 1107 case AsmToken::String: 1108 case AsmToken::Identifier: { 1109 // This could be a register or a symbolic displacement. 1110 unsigned TmpReg; 1111 const MCExpr *Val; 1112 SMLoc IdentLoc = Tok.getLoc(); 1113 StringRef Identifier = Tok.getString(); 1114 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) { 1115 SM.onRegister(TmpReg); 1116 UpdateLocLex = false; 1117 break; 1118 } else { 1119 if (!isParsingInlineAsm()) { 1120 if (getParser().parsePrimaryExpr(Val, End)) 1121 return Error(Tok.getLoc(), "Unexpected identifier!"); 1122 } else { 1123 // This is a dot operator, not an adjacent identifier. 1124 if (Identifier.find('.') != StringRef::npos) { 1125 return false; 1126 } else { 1127 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); 1128 if (ParseIntelIdentifier(Val, Identifier, Info, 1129 /*Unevaluated=*/false, End)) 1130 return true; 1131 } 1132 } 1133 SM.onIdentifierExpr(Val, Identifier); 1134 UpdateLocLex = false; 1135 break; 1136 } 1137 return Error(Tok.getLoc(), "Unexpected identifier!"); 1138 } 1139 case AsmToken::Integer: { 1140 StringRef ErrMsg; 1141 if (isParsingInlineAsm() && SM.getAddImmPrefix()) 1142 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, 1143 Tok.getLoc())); 1144 // Look for 'b' or 'f' following an Integer as a directional label 1145 SMLoc Loc = getTok().getLoc(); 1146 int64_t IntVal = getTok().getIntVal(); 1147 End = consumeToken(); 1148 UpdateLocLex = false; 1149 if (getLexer().getKind() == AsmToken::Identifier) { 1150 StringRef IDVal = getTok().getString(); 1151 if (IDVal == "f" || IDVal == "b") { 1152 MCSymbol *Sym = 1153 getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b"); 1154 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 1155 const MCExpr *Val = 1156 MCSymbolRefExpr::Create(Sym, Variant, getContext()); 1157 if (IDVal == "b" && Sym->isUndefined()) 1158 return Error(Loc, "invalid reference to undefined symbol"); 1159 StringRef Identifier = Sym->getName(); 1160 SM.onIdentifierExpr(Val, Identifier); 1161 End = consumeToken(); 1162 } else { 1163 if (SM.onInteger(IntVal, ErrMsg)) 1164 return Error(Loc, ErrMsg); 1165 } 1166 } else { 1167 if (SM.onInteger(IntVal, ErrMsg)) 1168 return Error(Loc, ErrMsg); 1169 } 1170 break; 1171 } 1172 case AsmToken::Plus: SM.onPlus(); break; 1173 case AsmToken::Minus: SM.onMinus(); break; 1174 case AsmToken::Tilde: SM.onNot(); break; 1175 case AsmToken::Star: SM.onStar(); break; 1176 case AsmToken::Slash: SM.onDivide(); break; 1177 case AsmToken::Pipe: SM.onOr(); break; 1178 case AsmToken::Amp: SM.onAnd(); break; 1179 case AsmToken::LessLess: 1180 SM.onLShift(); break; 1181 case AsmToken::GreaterGreater: 1182 SM.onRShift(); break; 1183 case AsmToken::LBrac: SM.onLBrac(); break; 1184 case AsmToken::RBrac: SM.onRBrac(); break; 1185 case AsmToken::LParen: SM.onLParen(); break; 1186 case AsmToken::RParen: SM.onRParen(); break; 1187 } 1188 if (SM.hadError()) 1189 return Error(Tok.getLoc(), "unknown token in expression"); 1190 1191 if (!Done && UpdateLocLex) 1192 End = consumeToken(); 1193 } 1194 return false; 1195 } 1196 1197 std::unique_ptr<X86Operand> 1198 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start, 1199 int64_t ImmDisp, unsigned Size) { 1200 const AsmToken &Tok = Parser.getTok(); 1201 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc(); 1202 if (getLexer().isNot(AsmToken::LBrac)) 1203 return ErrorOperand(BracLoc, "Expected '[' token!"); 1204 Parser.Lex(); // Eat '[' 1205 1206 SMLoc StartInBrac = Tok.getLoc(); 1207 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We 1208 // may have already parsed an immediate displacement before the bracketed 1209 // expression. 1210 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true); 1211 if (ParseIntelExpression(SM, End)) 1212 return nullptr; 1213 1214 const MCExpr *Disp = nullptr; 1215 if (const MCExpr *Sym = SM.getSym()) { 1216 // A symbolic displacement. 1217 Disp = Sym; 1218 if (isParsingInlineAsm()) 1219 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(), 1220 ImmDisp, SM.getImm(), BracLoc, StartInBrac, 1221 End); 1222 } 1223 1224 if (SM.getImm() || !Disp) { 1225 const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext()); 1226 if (Disp) 1227 Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext()); 1228 else 1229 Disp = Imm; // An immediate displacement only. 1230 } 1231 1232 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC 1233 // will in fact do global lookup the field name inside all global typedefs, 1234 // but we don't emulate that. 1235 if (Tok.getString().find('.') != StringRef::npos) { 1236 const MCExpr *NewDisp; 1237 if (ParseIntelDotOperator(Disp, NewDisp)) 1238 return nullptr; 1239 1240 End = Tok.getEndLoc(); 1241 Parser.Lex(); // Eat the field. 1242 Disp = NewDisp; 1243 } 1244 1245 int BaseReg = SM.getBaseReg(); 1246 int IndexReg = SM.getIndexReg(); 1247 int Scale = SM.getScale(); 1248 if (!isParsingInlineAsm()) { 1249 // handle [-42] 1250 if (!BaseReg && !IndexReg) { 1251 if (!SegReg) 1252 return X86Operand::CreateMem(Disp, Start, End, Size); 1253 else 1254 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size); 1255 } 1256 StringRef ErrMsg; 1257 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) { 1258 Error(StartInBrac, ErrMsg); 1259 return nullptr; 1260 } 1261 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start, 1262 End, Size); 1263 } 1264 1265 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); 1266 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start, 1267 End, Size, SM.getSymName(), Info); 1268 } 1269 1270 // Inline assembly may use variable names with namespace alias qualifiers. 1271 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val, 1272 StringRef &Identifier, 1273 InlineAsmIdentifierInfo &Info, 1274 bool IsUnevaluatedOperand, SMLoc &End) { 1275 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly."); 1276 Val = nullptr; 1277 1278 StringRef LineBuf(Identifier.data()); 1279 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand); 1280 1281 const AsmToken &Tok = Parser.getTok(); 1282 1283 // Advance the token stream until the end of the current token is 1284 // after the end of what the frontend claimed. 1285 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size(); 1286 while (true) { 1287 End = Tok.getEndLoc(); 1288 getLexer().Lex(); 1289 1290 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?"); 1291 if (End.getPointer() == EndPtr) break; 1292 } 1293 1294 // Create the symbol reference. 1295 Identifier = LineBuf; 1296 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier); 1297 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 1298 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext()); 1299 return false; 1300 } 1301 1302 /// \brief Parse intel style segment override. 1303 std::unique_ptr<X86Operand> 1304 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, 1305 unsigned Size) { 1306 assert(SegReg != 0 && "Tried to parse a segment override without a segment!"); 1307 const AsmToken &Tok = Parser.getTok(); // Eat colon. 1308 if (Tok.isNot(AsmToken::Colon)) 1309 return ErrorOperand(Tok.getLoc(), "Expected ':' token!"); 1310 Parser.Lex(); // Eat ':' 1311 1312 int64_t ImmDisp = 0; 1313 if (getLexer().is(AsmToken::Integer)) { 1314 ImmDisp = Tok.getIntVal(); 1315 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer. 1316 1317 if (isParsingInlineAsm()) 1318 InstInfo->AsmRewrites->push_back( 1319 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc())); 1320 1321 if (getLexer().isNot(AsmToken::LBrac)) { 1322 // An immediate following a 'segment register', 'colon' token sequence can 1323 // be followed by a bracketed expression. If it isn't we know we have our 1324 // final segment override. 1325 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext()); 1326 return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0, 1327 /*Scale=*/1, Start, ImmDispToken.getEndLoc(), 1328 Size); 1329 } 1330 } 1331 1332 if (getLexer().is(AsmToken::LBrac)) 1333 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size); 1334 1335 const MCExpr *Val; 1336 SMLoc End; 1337 if (!isParsingInlineAsm()) { 1338 if (getParser().parsePrimaryExpr(Val, End)) 1339 return ErrorOperand(Tok.getLoc(), "unknown token in expression"); 1340 1341 return X86Operand::CreateMem(Val, Start, End, Size); 1342 } 1343 1344 InlineAsmIdentifierInfo Info; 1345 StringRef Identifier = Tok.getString(); 1346 if (ParseIntelIdentifier(Val, Identifier, Info, 1347 /*Unevaluated=*/false, End)) 1348 return nullptr; 1349 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0, 1350 /*Scale=*/1, Start, End, Size, Identifier, Info); 1351 } 1352 1353 /// ParseIntelMemOperand - Parse intel style memory operand. 1354 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, 1355 SMLoc Start, 1356 unsigned Size) { 1357 const AsmToken &Tok = Parser.getTok(); 1358 SMLoc End; 1359 1360 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. 1361 if (getLexer().is(AsmToken::LBrac)) 1362 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size); 1363 assert(ImmDisp == 0); 1364 1365 const MCExpr *Val; 1366 if (!isParsingInlineAsm()) { 1367 if (getParser().parsePrimaryExpr(Val, End)) 1368 return ErrorOperand(Tok.getLoc(), "unknown token in expression"); 1369 1370 return X86Operand::CreateMem(Val, Start, End, Size); 1371 } 1372 1373 InlineAsmIdentifierInfo Info; 1374 StringRef Identifier = Tok.getString(); 1375 if (ParseIntelIdentifier(Val, Identifier, Info, 1376 /*Unevaluated=*/false, End)) 1377 return nullptr; 1378 1379 if (!getLexer().is(AsmToken::LBrac)) 1380 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0, 1381 /*Scale=*/1, Start, End, Size, Identifier, Info); 1382 1383 Parser.Lex(); // Eat '[' 1384 1385 // Parse Identifier [ ImmDisp ] 1386 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true, 1387 /*AddImmPrefix=*/false); 1388 if (ParseIntelExpression(SM, End)) 1389 return nullptr; 1390 1391 if (SM.getSym()) { 1392 Error(Start, "cannot use more than one symbol in memory operand"); 1393 return nullptr; 1394 } 1395 if (SM.getBaseReg()) { 1396 Error(Start, "cannot use base register with variable reference"); 1397 return nullptr; 1398 } 1399 if (SM.getIndexReg()) { 1400 Error(Start, "cannot use index register with variable reference"); 1401 return nullptr; 1402 } 1403 1404 const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext()); 1405 // BaseReg is non-zero to avoid assertions. In the context of inline asm, 1406 // we're pointing to a local variable in memory, so the base register is 1407 // really the frame or stack pointer. 1408 return X86Operand::CreateMem(/*SegReg=*/0, Disp, /*BaseReg=*/1, /*IndexReg=*/0, 1409 /*Scale=*/1, Start, End, Size, Identifier, 1410 Info.OpDecl); 1411 } 1412 1413 /// Parse the '.' operator. 1414 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, 1415 const MCExpr *&NewDisp) { 1416 const AsmToken &Tok = Parser.getTok(); 1417 int64_t OrigDispVal, DotDispVal; 1418 1419 // FIXME: Handle non-constant expressions. 1420 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) 1421 OrigDispVal = OrigDisp->getValue(); 1422 else 1423 return Error(Tok.getLoc(), "Non-constant offsets are not supported!"); 1424 1425 // Drop the optional '.'. 1426 StringRef DotDispStr = Tok.getString(); 1427 if (DotDispStr.startswith(".")) 1428 DotDispStr = DotDispStr.drop_front(1); 1429 1430 // .Imm gets lexed as a real. 1431 if (Tok.is(AsmToken::Real)) { 1432 APInt DotDisp; 1433 DotDispStr.getAsInteger(10, DotDisp); 1434 DotDispVal = DotDisp.getZExtValue(); 1435 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { 1436 unsigned DotDisp; 1437 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.'); 1438 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second, 1439 DotDisp)) 1440 return Error(Tok.getLoc(), "Unable to lookup field reference!"); 1441 DotDispVal = DotDisp; 1442 } else 1443 return Error(Tok.getLoc(), "Unexpected token type!"); 1444 1445 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { 1446 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data()); 1447 unsigned Len = DotDispStr.size(); 1448 unsigned Val = OrigDispVal + DotDispVal; 1449 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len, 1450 Val)); 1451 } 1452 1453 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext()); 1454 return false; 1455 } 1456 1457 /// Parse the 'offset' operator. This operator is used to specify the 1458 /// location rather then the content of a variable. 1459 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() { 1460 const AsmToken &Tok = Parser.getTok(); 1461 SMLoc OffsetOfLoc = Tok.getLoc(); 1462 Parser.Lex(); // Eat offset. 1463 1464 const MCExpr *Val; 1465 InlineAsmIdentifierInfo Info; 1466 SMLoc Start = Tok.getLoc(), End; 1467 StringRef Identifier = Tok.getString(); 1468 if (ParseIntelIdentifier(Val, Identifier, Info, 1469 /*Unevaluated=*/false, End)) 1470 return nullptr; 1471 1472 // Don't emit the offset operator. 1473 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7)); 1474 1475 // The offset operator will have an 'r' constraint, thus we need to create 1476 // register operand to ensure proper matching. Just pick a GPR based on 1477 // the size of a pointer. 1478 unsigned RegNo = 1479 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX); 1480 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true, 1481 OffsetOfLoc, Identifier, Info.OpDecl); 1482 } 1483 1484 enum IntelOperatorKind { 1485 IOK_LENGTH, 1486 IOK_SIZE, 1487 IOK_TYPE 1488 }; 1489 1490 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator 1491 /// returns the number of elements in an array. It returns the value 1 for 1492 /// non-array variables. The SIZE operator returns the size of a C or C++ 1493 /// variable. A variable's size is the product of its LENGTH and TYPE. The 1494 /// TYPE operator returns the size of a C or C++ type or variable. If the 1495 /// variable is an array, TYPE returns the size of a single element. 1496 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) { 1497 const AsmToken &Tok = Parser.getTok(); 1498 SMLoc TypeLoc = Tok.getLoc(); 1499 Parser.Lex(); // Eat operator. 1500 1501 const MCExpr *Val = nullptr; 1502 InlineAsmIdentifierInfo Info; 1503 SMLoc Start = Tok.getLoc(), End; 1504 StringRef Identifier = Tok.getString(); 1505 if (ParseIntelIdentifier(Val, Identifier, Info, 1506 /*Unevaluated=*/true, End)) 1507 return nullptr; 1508 1509 if (!Info.OpDecl) 1510 return ErrorOperand(Start, "unable to lookup expression"); 1511 1512 unsigned CVal = 0; 1513 switch(OpKind) { 1514 default: llvm_unreachable("Unexpected operand kind!"); 1515 case IOK_LENGTH: CVal = Info.Length; break; 1516 case IOK_SIZE: CVal = Info.Size; break; 1517 case IOK_TYPE: CVal = Info.Type; break; 1518 } 1519 1520 // Rewrite the type operator and the C or C++ type or variable in terms of an 1521 // immediate. E.g. TYPE foo -> $$4 1522 unsigned Len = End.getPointer() - TypeLoc.getPointer(); 1523 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal)); 1524 1525 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext()); 1526 return X86Operand::CreateImm(Imm, Start, End); 1527 } 1528 1529 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() { 1530 const AsmToken &Tok = Parser.getTok(); 1531 SMLoc Start, End; 1532 1533 // Offset, length, type and size operators. 1534 if (isParsingInlineAsm()) { 1535 StringRef AsmTokStr = Tok.getString(); 1536 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET") 1537 return ParseIntelOffsetOfOperator(); 1538 if (AsmTokStr == "length" || AsmTokStr == "LENGTH") 1539 return ParseIntelOperator(IOK_LENGTH); 1540 if (AsmTokStr == "size" || AsmTokStr == "SIZE") 1541 return ParseIntelOperator(IOK_SIZE); 1542 if (AsmTokStr == "type" || AsmTokStr == "TYPE") 1543 return ParseIntelOperator(IOK_TYPE); 1544 } 1545 1546 unsigned Size = getIntelMemOperandSize(Tok.getString()); 1547 if (Size) { 1548 Parser.Lex(); // Eat operand size (e.g., byte, word). 1549 if (Tok.getString() != "PTR" && Tok.getString() != "ptr") 1550 return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!"); 1551 Parser.Lex(); // Eat ptr. 1552 } 1553 Start = Tok.getLoc(); 1554 1555 // Immediate. 1556 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) || 1557 getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) { 1558 AsmToken StartTok = Tok; 1559 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true, 1560 /*AddImmPrefix=*/false); 1561 if (ParseIntelExpression(SM, End)) 1562 return nullptr; 1563 1564 int64_t Imm = SM.getImm(); 1565 if (isParsingInlineAsm()) { 1566 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer(); 1567 if (StartTok.getString().size() == Len) 1568 // Just add a prefix if this wasn't a complex immediate expression. 1569 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start)); 1570 else 1571 // Otherwise, rewrite the complex expression as a single immediate. 1572 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm)); 1573 } 1574 1575 if (getLexer().isNot(AsmToken::LBrac)) { 1576 // If a directional label (ie. 1f or 2b) was parsed above from 1577 // ParseIntelExpression() then SM.getSym() was set to a pointer to 1578 // to the MCExpr with the directional local symbol and this is a 1579 // memory operand not an immediate operand. 1580 if (SM.getSym()) 1581 return X86Operand::CreateMem(SM.getSym(), Start, End, Size); 1582 1583 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext()); 1584 return X86Operand::CreateImm(ImmExpr, Start, End); 1585 } 1586 1587 // Only positive immediates are valid. 1588 if (Imm < 0) 1589 return ErrorOperand(Start, "expected a positive immediate displacement " 1590 "before bracketed expr."); 1591 1592 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. 1593 return ParseIntelMemOperand(Imm, Start, Size); 1594 } 1595 1596 // Register. 1597 unsigned RegNo = 0; 1598 if (!ParseRegister(RegNo, Start, End)) { 1599 // If this is a segment register followed by a ':', then this is the start 1600 // of a segment override, otherwise this is a normal register reference. 1601 if (getLexer().isNot(AsmToken::Colon)) 1602 return X86Operand::CreateReg(RegNo, Start, End); 1603 1604 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size); 1605 } 1606 1607 // Memory operand. 1608 return ParseIntelMemOperand(/*Disp=*/0, Start, Size); 1609 } 1610 1611 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() { 1612 switch (getLexer().getKind()) { 1613 default: 1614 // Parse a memory operand with no segment register. 1615 return ParseMemOperand(0, Parser.getTok().getLoc()); 1616 case AsmToken::Percent: { 1617 // Read the register. 1618 unsigned RegNo; 1619 SMLoc Start, End; 1620 if (ParseRegister(RegNo, Start, End)) return nullptr; 1621 if (RegNo == X86::EIZ || RegNo == X86::RIZ) { 1622 Error(Start, "%eiz and %riz can only be used as index registers", 1623 SMRange(Start, End)); 1624 return nullptr; 1625 } 1626 1627 // If this is a segment register followed by a ':', then this is the start 1628 // of a memory reference, otherwise this is a normal register reference. 1629 if (getLexer().isNot(AsmToken::Colon)) 1630 return X86Operand::CreateReg(RegNo, Start, End); 1631 1632 getParser().Lex(); // Eat the colon. 1633 return ParseMemOperand(RegNo, Start); 1634 } 1635 case AsmToken::Dollar: { 1636 // $42 -> immediate. 1637 SMLoc Start = Parser.getTok().getLoc(), End; 1638 Parser.Lex(); 1639 const MCExpr *Val; 1640 if (getParser().parseExpression(Val, End)) 1641 return nullptr; 1642 return X86Operand::CreateImm(Val, Start, End); 1643 } 1644 } 1645 } 1646 1647 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands, 1648 const MCParsedAsmOperand &Op) { 1649 if(STI.getFeatureBits() & X86::FeatureAVX512) { 1650 if (getLexer().is(AsmToken::LCurly)) { 1651 // Eat "{" and mark the current place. 1652 const SMLoc consumedToken = consumeToken(); 1653 // Distinguish {1to<NUM>} from {%k<NUM>}. 1654 if(getLexer().is(AsmToken::Integer)) { 1655 // Parse memory broadcasting ({1to<NUM>}). 1656 if (getLexer().getTok().getIntVal() != 1) 1657 return !ErrorAndEatStatement(getLexer().getLoc(), 1658 "Expected 1to<NUM> at this point"); 1659 Parser.Lex(); // Eat "1" of 1to8 1660 if (!getLexer().is(AsmToken::Identifier) || 1661 !getLexer().getTok().getIdentifier().startswith("to")) 1662 return !ErrorAndEatStatement(getLexer().getLoc(), 1663 "Expected 1to<NUM> at this point"); 1664 // Recognize only reasonable suffixes. 1665 const char *BroadcastPrimitive = 1666 StringSwitch<const char*>(getLexer().getTok().getIdentifier()) 1667 .Case("to8", "{1to8}") 1668 .Case("to16", "{1to16}") 1669 .Default(nullptr); 1670 if (!BroadcastPrimitive) 1671 return !ErrorAndEatStatement(getLexer().getLoc(), 1672 "Invalid memory broadcast primitive."); 1673 Parser.Lex(); // Eat "toN" of 1toN 1674 if (!getLexer().is(AsmToken::RCurly)) 1675 return !ErrorAndEatStatement(getLexer().getLoc(), 1676 "Expected } at this point"); 1677 Parser.Lex(); // Eat "}" 1678 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive, 1679 consumedToken)); 1680 // No AVX512 specific primitives can pass 1681 // after memory broadcasting, so return. 1682 return true; 1683 } else { 1684 // Parse mask register {%k1} 1685 Operands.push_back(X86Operand::CreateToken("{", consumedToken)); 1686 if (std::unique_ptr<X86Operand> Op = ParseOperand()) { 1687 Operands.push_back(std::move(Op)); 1688 if (!getLexer().is(AsmToken::RCurly)) 1689 return !ErrorAndEatStatement(getLexer().getLoc(), 1690 "Expected } at this point"); 1691 Operands.push_back(X86Operand::CreateToken("}", consumeToken())); 1692 1693 // Parse "zeroing non-masked" semantic {z} 1694 if (getLexer().is(AsmToken::LCurly)) { 1695 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken())); 1696 if (!getLexer().is(AsmToken::Identifier) || 1697 getLexer().getTok().getIdentifier() != "z") 1698 return !ErrorAndEatStatement(getLexer().getLoc(), 1699 "Expected z at this point"); 1700 Parser.Lex(); // Eat the z 1701 if (!getLexer().is(AsmToken::RCurly)) 1702 return !ErrorAndEatStatement(getLexer().getLoc(), 1703 "Expected } at this point"); 1704 Parser.Lex(); // Eat the } 1705 } 1706 } 1707 } 1708 } 1709 } 1710 return true; 1711 } 1712 1713 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix 1714 /// has already been parsed if present. 1715 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg, 1716 SMLoc MemStart) { 1717 1718 // We have to disambiguate a parenthesized expression "(4+5)" from the start 1719 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The 1720 // only way to do this without lookahead is to eat the '(' and see what is 1721 // after it. 1722 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); 1723 if (getLexer().isNot(AsmToken::LParen)) { 1724 SMLoc ExprEnd; 1725 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr; 1726 1727 // After parsing the base expression we could either have a parenthesized 1728 // memory address or not. If not, return now. If so, eat the (. 1729 if (getLexer().isNot(AsmToken::LParen)) { 1730 // Unless we have a segment register, treat this as an immediate. 1731 if (SegReg == 0) 1732 return X86Operand::CreateMem(Disp, MemStart, ExprEnd); 1733 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); 1734 } 1735 1736 // Eat the '('. 1737 Parser.Lex(); 1738 } else { 1739 // Okay, we have a '('. We don't know if this is an expression or not, but 1740 // so we have to eat the ( to see beyond it. 1741 SMLoc LParenLoc = Parser.getTok().getLoc(); 1742 Parser.Lex(); // Eat the '('. 1743 1744 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) { 1745 // Nothing to do here, fall into the code below with the '(' part of the 1746 // memory operand consumed. 1747 } else { 1748 SMLoc ExprEnd; 1749 1750 // It must be an parenthesized expression, parse it now. 1751 if (getParser().parseParenExpression(Disp, ExprEnd)) 1752 return nullptr; 1753 1754 // After parsing the base expression we could either have a parenthesized 1755 // memory address or not. If not, return now. If so, eat the (. 1756 if (getLexer().isNot(AsmToken::LParen)) { 1757 // Unless we have a segment register, treat this as an immediate. 1758 if (SegReg == 0) 1759 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd); 1760 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); 1761 } 1762 1763 // Eat the '('. 1764 Parser.Lex(); 1765 } 1766 } 1767 1768 // If we reached here, then we just ate the ( of the memory operand. Process 1769 // the rest of the memory operand. 1770 unsigned BaseReg = 0, IndexReg = 0, Scale = 1; 1771 SMLoc IndexLoc, BaseLoc; 1772 1773 if (getLexer().is(AsmToken::Percent)) { 1774 SMLoc StartLoc, EndLoc; 1775 BaseLoc = Parser.getTok().getLoc(); 1776 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr; 1777 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) { 1778 Error(StartLoc, "eiz and riz can only be used as index registers", 1779 SMRange(StartLoc, EndLoc)); 1780 return nullptr; 1781 } 1782 } 1783 1784 if (getLexer().is(AsmToken::Comma)) { 1785 Parser.Lex(); // Eat the comma. 1786 IndexLoc = Parser.getTok().getLoc(); 1787 1788 // Following the comma we should have either an index register, or a scale 1789 // value. We don't support the later form, but we want to parse it 1790 // correctly. 1791 // 1792 // Not that even though it would be completely consistent to support syntax 1793 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this. 1794 if (getLexer().is(AsmToken::Percent)) { 1795 SMLoc L; 1796 if (ParseRegister(IndexReg, L, L)) return nullptr; 1797 1798 if (getLexer().isNot(AsmToken::RParen)) { 1799 // Parse the scale amount: 1800 // ::= ',' [scale-expression] 1801 if (getLexer().isNot(AsmToken::Comma)) { 1802 Error(Parser.getTok().getLoc(), 1803 "expected comma in scale expression"); 1804 return nullptr; 1805 } 1806 Parser.Lex(); // Eat the comma. 1807 1808 if (getLexer().isNot(AsmToken::RParen)) { 1809 SMLoc Loc = Parser.getTok().getLoc(); 1810 1811 int64_t ScaleVal; 1812 if (getParser().parseAbsoluteExpression(ScaleVal)){ 1813 Error(Loc, "expected scale expression"); 1814 return nullptr; 1815 } 1816 1817 // Validate the scale amount. 1818 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && 1819 ScaleVal != 1) { 1820 Error(Loc, "scale factor in 16-bit address must be 1"); 1821 return nullptr; 1822 } 1823 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){ 1824 Error(Loc, "scale factor in address must be 1, 2, 4 or 8"); 1825 return nullptr; 1826 } 1827 Scale = (unsigned)ScaleVal; 1828 } 1829 } 1830 } else if (getLexer().isNot(AsmToken::RParen)) { 1831 // A scale amount without an index is ignored. 1832 // index. 1833 SMLoc Loc = Parser.getTok().getLoc(); 1834 1835 int64_t Value; 1836 if (getParser().parseAbsoluteExpression(Value)) 1837 return nullptr; 1838 1839 if (Value != 1) 1840 Warning(Loc, "scale factor without index register is ignored"); 1841 Scale = 1; 1842 } 1843 } 1844 1845 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. 1846 if (getLexer().isNot(AsmToken::RParen)) { 1847 Error(Parser.getTok().getLoc(), "unexpected token in memory operand"); 1848 return nullptr; 1849 } 1850 SMLoc MemEnd = Parser.getTok().getEndLoc(); 1851 Parser.Lex(); // Eat the ')'. 1852 1853 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed, 1854 // and then only in non-64-bit modes. Except for DX, which is a special case 1855 // because an unofficial form of in/out instructions uses it. 1856 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && 1857 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP && 1858 BaseReg != X86::SI && BaseReg != X86::DI)) && 1859 BaseReg != X86::DX) { 1860 Error(BaseLoc, "invalid 16-bit base register"); 1861 return nullptr; 1862 } 1863 if (BaseReg == 0 && 1864 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) { 1865 Error(IndexLoc, "16-bit memory operand may not include only index register"); 1866 return nullptr; 1867 } 1868 1869 StringRef ErrMsg; 1870 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) { 1871 Error(BaseLoc, ErrMsg); 1872 return nullptr; 1873 } 1874 1875 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, 1876 MemStart, MemEnd); 1877 } 1878 1879 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1880 SMLoc NameLoc, OperandVector &Operands) { 1881 InstInfo = &Info; 1882 StringRef PatchedName = Name; 1883 1884 // FIXME: Hack to recognize setneb as setne. 1885 if (PatchedName.startswith("set") && PatchedName.endswith("b") && 1886 PatchedName != "setb" && PatchedName != "setnb") 1887 PatchedName = PatchedName.substr(0, Name.size()-1); 1888 1889 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}. 1890 const MCExpr *ExtraImmOp = nullptr; 1891 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && 1892 (PatchedName.endswith("ss") || PatchedName.endswith("sd") || 1893 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { 1894 bool IsVCMP = PatchedName[0] == 'v'; 1895 unsigned SSECCIdx = IsVCMP ? 4 : 3; 1896 unsigned SSEComparisonCode = StringSwitch<unsigned>( 1897 PatchedName.slice(SSECCIdx, PatchedName.size() - 2)) 1898 .Case("eq", 0x00) 1899 .Case("lt", 0x01) 1900 .Case("le", 0x02) 1901 .Case("unord", 0x03) 1902 .Case("neq", 0x04) 1903 .Case("nlt", 0x05) 1904 .Case("nle", 0x06) 1905 .Case("ord", 0x07) 1906 /* AVX only from here */ 1907 .Case("eq_uq", 0x08) 1908 .Case("nge", 0x09) 1909 .Case("ngt", 0x0A) 1910 .Case("false", 0x0B) 1911 .Case("neq_oq", 0x0C) 1912 .Case("ge", 0x0D) 1913 .Case("gt", 0x0E) 1914 .Case("true", 0x0F) 1915 .Case("eq_os", 0x10) 1916 .Case("lt_oq", 0x11) 1917 .Case("le_oq", 0x12) 1918 .Case("unord_s", 0x13) 1919 .Case("neq_us", 0x14) 1920 .Case("nlt_uq", 0x15) 1921 .Case("nle_uq", 0x16) 1922 .Case("ord_s", 0x17) 1923 .Case("eq_us", 0x18) 1924 .Case("nge_uq", 0x19) 1925 .Case("ngt_uq", 0x1A) 1926 .Case("false_os", 0x1B) 1927 .Case("neq_os", 0x1C) 1928 .Case("ge_oq", 0x1D) 1929 .Case("gt_oq", 0x1E) 1930 .Case("true_us", 0x1F) 1931 .Default(~0U); 1932 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) { 1933 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode, 1934 getParser().getContext()); 1935 if (PatchedName.endswith("ss")) { 1936 PatchedName = IsVCMP ? "vcmpss" : "cmpss"; 1937 } else if (PatchedName.endswith("sd")) { 1938 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd"; 1939 } else if (PatchedName.endswith("ps")) { 1940 PatchedName = IsVCMP ? "vcmpps" : "cmpps"; 1941 } else { 1942 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!"); 1943 PatchedName = IsVCMP ? "vcmppd" : "cmppd"; 1944 } 1945 } 1946 } 1947 1948 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); 1949 1950 if (ExtraImmOp && !isParsingIntelSyntax()) 1951 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc)); 1952 1953 // Determine whether this is an instruction prefix. 1954 bool isPrefix = 1955 Name == "lock" || Name == "rep" || 1956 Name == "repe" || Name == "repz" || 1957 Name == "repne" || Name == "repnz" || 1958 Name == "rex64" || Name == "data16"; 1959 1960 1961 // This does the actual operand parsing. Don't parse any more if we have a 1962 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we 1963 // just want to parse the "lock" as the first instruction and the "incl" as 1964 // the next one. 1965 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) { 1966 1967 // Parse '*' modifier. 1968 if (getLexer().is(AsmToken::Star)) 1969 Operands.push_back(X86Operand::CreateToken("*", consumeToken())); 1970 1971 // Read the operands. 1972 while(1) { 1973 if (std::unique_ptr<X86Operand> Op = ParseOperand()) { 1974 Operands.push_back(std::move(Op)); 1975 if (!HandleAVX512Operand(Operands, *Operands.back())) 1976 return true; 1977 } else { 1978 Parser.eatToEndOfStatement(); 1979 return true; 1980 } 1981 // check for comma and eat it 1982 if (getLexer().is(AsmToken::Comma)) 1983 Parser.Lex(); 1984 else 1985 break; 1986 } 1987 1988 if (getLexer().isNot(AsmToken::EndOfStatement)) 1989 return ErrorAndEatStatement(getLexer().getLoc(), 1990 "unexpected token in argument list"); 1991 } 1992 1993 // Consume the EndOfStatement or the prefix separator Slash 1994 if (getLexer().is(AsmToken::EndOfStatement) || 1995 (isPrefix && getLexer().is(AsmToken::Slash))) 1996 Parser.Lex(); 1997 1998 if (ExtraImmOp && isParsingIntelSyntax()) 1999 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc)); 2000 2001 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" -> 2002 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely 2003 // documented form in various unofficial manuals, so a lot of code uses it. 2004 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") && 2005 Operands.size() == 3) { 2006 X86Operand &Op = (X86Operand &)*Operands.back(); 2007 if (Op.isMem() && Op.Mem.SegReg == 0 && 2008 isa<MCConstantExpr>(Op.Mem.Disp) && 2009 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 2010 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { 2011 SMLoc Loc = Op.getEndLoc(); 2012 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); 2013 } 2014 } 2015 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al". 2016 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") && 2017 Operands.size() == 3) { 2018 X86Operand &Op = (X86Operand &)*Operands[1]; 2019 if (Op.isMem() && Op.Mem.SegReg == 0 && 2020 isa<MCConstantExpr>(Op.Mem.Disp) && 2021 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 2022 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { 2023 SMLoc Loc = Op.getEndLoc(); 2024 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); 2025 } 2026 } 2027 2028 // Append default arguments to "ins[bwld]" 2029 if (Name.startswith("ins") && Operands.size() == 1 && 2030 (Name == "insb" || Name == "insw" || Name == "insl" || 2031 Name == "insd" )) { 2032 if (isParsingIntelSyntax()) { 2033 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc)); 2034 Operands.push_back(DefaultMemDIOperand(NameLoc)); 2035 } else { 2036 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc)); 2037 Operands.push_back(DefaultMemDIOperand(NameLoc)); 2038 } 2039 } 2040 2041 // Append default arguments to "outs[bwld]" 2042 if (Name.startswith("outs") && Operands.size() == 1 && 2043 (Name == "outsb" || Name == "outsw" || Name == "outsl" || 2044 Name == "outsd" )) { 2045 if (isParsingIntelSyntax()) { 2046 Operands.push_back(DefaultMemSIOperand(NameLoc)); 2047 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc)); 2048 } else { 2049 Operands.push_back(DefaultMemSIOperand(NameLoc)); 2050 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc)); 2051 } 2052 } 2053 2054 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate 2055 // values of $SIREG according to the mode. It would be nice if this 2056 // could be achieved with InstAlias in the tables. 2057 if (Name.startswith("lods") && Operands.size() == 1 && 2058 (Name == "lods" || Name == "lodsb" || Name == "lodsw" || 2059 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) 2060 Operands.push_back(DefaultMemSIOperand(NameLoc)); 2061 2062 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate 2063 // values of $DIREG according to the mode. It would be nice if this 2064 // could be achieved with InstAlias in the tables. 2065 if (Name.startswith("stos") && Operands.size() == 1 && 2066 (Name == "stos" || Name == "stosb" || Name == "stosw" || 2067 Name == "stosl" || Name == "stosd" || Name == "stosq")) 2068 Operands.push_back(DefaultMemDIOperand(NameLoc)); 2069 2070 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate 2071 // values of $DIREG according to the mode. It would be nice if this 2072 // could be achieved with InstAlias in the tables. 2073 if (Name.startswith("scas") && Operands.size() == 1 && 2074 (Name == "scas" || Name == "scasb" || Name == "scasw" || 2075 Name == "scasl" || Name == "scasd" || Name == "scasq")) 2076 Operands.push_back(DefaultMemDIOperand(NameLoc)); 2077 2078 // Add default SI and DI operands to "cmps[bwlq]". 2079 if (Name.startswith("cmps") && 2080 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" || 2081 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) { 2082 if (Operands.size() == 1) { 2083 if (isParsingIntelSyntax()) { 2084 Operands.push_back(DefaultMemSIOperand(NameLoc)); 2085 Operands.push_back(DefaultMemDIOperand(NameLoc)); 2086 } else { 2087 Operands.push_back(DefaultMemDIOperand(NameLoc)); 2088 Operands.push_back(DefaultMemSIOperand(NameLoc)); 2089 } 2090 } else if (Operands.size() == 3) { 2091 X86Operand &Op = (X86Operand &)*Operands[1]; 2092 X86Operand &Op2 = (X86Operand &)*Operands[2]; 2093 if (!doSrcDstMatch(Op, Op2)) 2094 return Error(Op.getStartLoc(), 2095 "mismatching source and destination index registers"); 2096 } 2097 } 2098 2099 // Add default SI and DI operands to "movs[bwlq]". 2100 if ((Name.startswith("movs") && 2101 (Name == "movs" || Name == "movsb" || Name == "movsw" || 2102 Name == "movsl" || Name == "movsd" || Name == "movsq")) || 2103 (Name.startswith("smov") && 2104 (Name == "smov" || Name == "smovb" || Name == "smovw" || 2105 Name == "smovl" || Name == "smovd" || Name == "smovq"))) { 2106 if (Operands.size() == 1) { 2107 if (Name == "movsd") 2108 Operands.back() = X86Operand::CreateToken("movsl", NameLoc); 2109 if (isParsingIntelSyntax()) { 2110 Operands.push_back(DefaultMemDIOperand(NameLoc)); 2111 Operands.push_back(DefaultMemSIOperand(NameLoc)); 2112 } else { 2113 Operands.push_back(DefaultMemSIOperand(NameLoc)); 2114 Operands.push_back(DefaultMemDIOperand(NameLoc)); 2115 } 2116 } else if (Operands.size() == 3) { 2117 X86Operand &Op = (X86Operand &)*Operands[1]; 2118 X86Operand &Op2 = (X86Operand &)*Operands[2]; 2119 if (!doSrcDstMatch(Op, Op2)) 2120 return Error(Op.getStartLoc(), 2121 "mismatching source and destination index registers"); 2122 } 2123 } 2124 2125 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to 2126 // "shift <op>". 2127 if ((Name.startswith("shr") || Name.startswith("sar") || 2128 Name.startswith("shl") || Name.startswith("sal") || 2129 Name.startswith("rcl") || Name.startswith("rcr") || 2130 Name.startswith("rol") || Name.startswith("ror")) && 2131 Operands.size() == 3) { 2132 if (isParsingIntelSyntax()) { 2133 // Intel syntax 2134 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]); 2135 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) && 2136 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1) 2137 Operands.pop_back(); 2138 } else { 2139 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]); 2140 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) && 2141 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1) 2142 Operands.erase(Operands.begin() + 1); 2143 } 2144 } 2145 2146 // Transforms "int $3" into "int3" as a size optimization. We can't write an 2147 // instalias with an immediate operand yet. 2148 if (Name == "int" && Operands.size() == 2) { 2149 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]); 2150 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) && 2151 cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) { 2152 Operands.erase(Operands.begin() + 1); 2153 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3"); 2154 } 2155 } 2156 2157 return false; 2158 } 2159 2160 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg, 2161 bool isCmp) { 2162 MCInst TmpInst; 2163 TmpInst.setOpcode(Opcode); 2164 if (!isCmp) 2165 TmpInst.addOperand(MCOperand::CreateReg(Reg)); 2166 TmpInst.addOperand(MCOperand::CreateReg(Reg)); 2167 TmpInst.addOperand(Inst.getOperand(0)); 2168 Inst = TmpInst; 2169 return true; 2170 } 2171 2172 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode, 2173 bool isCmp = false) { 2174 if (!Inst.getOperand(0).isImm() || 2175 !isImmSExti16i8Value(Inst.getOperand(0).getImm())) 2176 return false; 2177 2178 return convertToSExti8(Inst, Opcode, X86::AX, isCmp); 2179 } 2180 2181 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode, 2182 bool isCmp = false) { 2183 if (!Inst.getOperand(0).isImm() || 2184 !isImmSExti32i8Value(Inst.getOperand(0).getImm())) 2185 return false; 2186 2187 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp); 2188 } 2189 2190 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode, 2191 bool isCmp = false) { 2192 if (!Inst.getOperand(0).isImm() || 2193 !isImmSExti64i8Value(Inst.getOperand(0).getImm())) 2194 return false; 2195 2196 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp); 2197 } 2198 2199 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) { 2200 switch (Inst.getOpcode()) { 2201 default: return false; 2202 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8); 2203 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8); 2204 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8); 2205 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8); 2206 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8); 2207 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8); 2208 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8); 2209 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8); 2210 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8); 2211 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true); 2212 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true); 2213 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true); 2214 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8); 2215 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8); 2216 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8); 2217 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8); 2218 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8); 2219 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8); 2220 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8); 2221 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8); 2222 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8); 2223 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8); 2224 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8); 2225 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8); 2226 case X86::VMOVAPDrr: 2227 case X86::VMOVAPDYrr: 2228 case X86::VMOVAPSrr: 2229 case X86::VMOVAPSYrr: 2230 case X86::VMOVDQArr: 2231 case X86::VMOVDQAYrr: 2232 case X86::VMOVDQUrr: 2233 case X86::VMOVDQUYrr: 2234 case X86::VMOVUPDrr: 2235 case X86::VMOVUPDYrr: 2236 case X86::VMOVUPSrr: 2237 case X86::VMOVUPSYrr: { 2238 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) || 2239 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg())) 2240 return false; 2241 2242 unsigned NewOpc; 2243 switch (Inst.getOpcode()) { 2244 default: llvm_unreachable("Invalid opcode"); 2245 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break; 2246 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break; 2247 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break; 2248 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break; 2249 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break; 2250 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break; 2251 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break; 2252 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break; 2253 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break; 2254 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break; 2255 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break; 2256 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break; 2257 } 2258 Inst.setOpcode(NewOpc); 2259 return true; 2260 } 2261 case X86::VMOVSDrr: 2262 case X86::VMOVSSrr: { 2263 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) || 2264 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg())) 2265 return false; 2266 unsigned NewOpc; 2267 switch (Inst.getOpcode()) { 2268 default: llvm_unreachable("Invalid opcode"); 2269 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break; 2270 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break; 2271 } 2272 Inst.setOpcode(NewOpc); 2273 return true; 2274 } 2275 } 2276 } 2277 2278 static const char *getSubtargetFeatureName(unsigned Val); 2279 2280 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands, 2281 MCStreamer &Out) { 2282 Instrumentation->InstrumentInstruction(Inst, Operands, getContext(), MII, 2283 Out); 2284 Out.EmitInstruction(Inst, STI); 2285 } 2286 2287 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 2288 OperandVector &Operands, 2289 MCStreamer &Out, unsigned &ErrorInfo, 2290 bool MatchingInlineAsm) { 2291 assert(!Operands.empty() && "Unexpect empty operand list!"); 2292 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]); 2293 assert(Op.isToken() && "Leading operand should always be a mnemonic!"); 2294 ArrayRef<SMRange> EmptyRanges = None; 2295 2296 // First, handle aliases that expand to multiple instructions. 2297 // FIXME: This should be replaced with a real .td file alias mechanism. 2298 // Also, MatchInstructionImpl should actually *do* the EmitInstruction 2299 // call. 2300 if (Op.getToken() == "fstsw" || Op.getToken() == "fstcw" || 2301 Op.getToken() == "fstsww" || Op.getToken() == "fstcww" || 2302 Op.getToken() == "finit" || Op.getToken() == "fsave" || 2303 Op.getToken() == "fstenv" || Op.getToken() == "fclex") { 2304 MCInst Inst; 2305 Inst.setOpcode(X86::WAIT); 2306 Inst.setLoc(IDLoc); 2307 if (!MatchingInlineAsm) 2308 EmitInstruction(Inst, Operands, Out); 2309 2310 const char *Repl = StringSwitch<const char *>(Op.getToken()) 2311 .Case("finit", "fninit") 2312 .Case("fsave", "fnsave") 2313 .Case("fstcw", "fnstcw") 2314 .Case("fstcww", "fnstcw") 2315 .Case("fstenv", "fnstenv") 2316 .Case("fstsw", "fnstsw") 2317 .Case("fstsww", "fnstsw") 2318 .Case("fclex", "fnclex") 2319 .Default(nullptr); 2320 assert(Repl && "Unknown wait-prefixed instruction"); 2321 Operands[0] = X86Operand::CreateToken(Repl, IDLoc); 2322 } 2323 2324 bool WasOriginallyInvalidOperand = false; 2325 MCInst Inst; 2326 2327 // First, try a direct match. 2328 switch (MatchInstructionImpl(Operands, Inst, 2329 ErrorInfo, MatchingInlineAsm, 2330 isParsingIntelSyntax())) { 2331 default: break; 2332 case Match_Success: 2333 // Some instructions need post-processing to, for example, tweak which 2334 // encoding is selected. Loop on it while changes happen so the 2335 // individual transformations can chain off each other. 2336 if (!MatchingInlineAsm) 2337 while (processInstruction(Inst, Operands)) 2338 ; 2339 2340 Inst.setLoc(IDLoc); 2341 if (!MatchingInlineAsm) 2342 EmitInstruction(Inst, Operands, Out); 2343 Opcode = Inst.getOpcode(); 2344 return false; 2345 case Match_MissingFeature: { 2346 assert(ErrorInfo && "Unknown missing feature!"); 2347 // Special case the error message for the very common case where only 2348 // a single subtarget feature is missing. 2349 std::string Msg = "instruction requires:"; 2350 unsigned Mask = 1; 2351 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) { 2352 if (ErrorInfo & Mask) { 2353 Msg += " "; 2354 Msg += getSubtargetFeatureName(ErrorInfo & Mask); 2355 } 2356 Mask <<= 1; 2357 } 2358 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm); 2359 } 2360 case Match_InvalidOperand: 2361 WasOriginallyInvalidOperand = true; 2362 break; 2363 case Match_MnemonicFail: 2364 break; 2365 } 2366 2367 // FIXME: Ideally, we would only attempt suffix matches for things which are 2368 // valid prefixes, and we could just infer the right unambiguous 2369 // type. However, that requires substantially more matcher support than the 2370 // following hack. 2371 2372 // Change the operand to point to a temporary token. 2373 StringRef Base = Op.getToken(); 2374 SmallString<16> Tmp; 2375 Tmp += Base; 2376 Tmp += ' '; 2377 Op.setTokenValue(Tmp.str()); 2378 2379 // If this instruction starts with an 'f', then it is a floating point stack 2380 // instruction. These come in up to three forms for 32-bit, 64-bit, and 2381 // 80-bit floating point, which use the suffixes s,l,t respectively. 2382 // 2383 // Otherwise, we assume that this may be an integer instruction, which comes 2384 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively. 2385 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0"; 2386 2387 // Check for the various suffix matches. 2388 Tmp[Base.size()] = Suffixes[0]; 2389 unsigned ErrorInfoIgnore; 2390 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings. 2391 unsigned Match1, Match2, Match3, Match4; 2392 2393 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, 2394 MatchingInlineAsm, isParsingIntelSyntax()); 2395 // If this returned as a missing feature failure, remember that. 2396 if (Match1 == Match_MissingFeature) 2397 ErrorInfoMissingFeature = ErrorInfoIgnore; 2398 Tmp[Base.size()] = Suffixes[1]; 2399 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, 2400 MatchingInlineAsm, isParsingIntelSyntax()); 2401 // If this returned as a missing feature failure, remember that. 2402 if (Match2 == Match_MissingFeature) 2403 ErrorInfoMissingFeature = ErrorInfoIgnore; 2404 Tmp[Base.size()] = Suffixes[2]; 2405 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, 2406 MatchingInlineAsm, isParsingIntelSyntax()); 2407 // If this returned as a missing feature failure, remember that. 2408 if (Match3 == Match_MissingFeature) 2409 ErrorInfoMissingFeature = ErrorInfoIgnore; 2410 Tmp[Base.size()] = Suffixes[3]; 2411 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, 2412 MatchingInlineAsm, isParsingIntelSyntax()); 2413 // If this returned as a missing feature failure, remember that. 2414 if (Match4 == Match_MissingFeature) 2415 ErrorInfoMissingFeature = ErrorInfoIgnore; 2416 2417 // Restore the old token. 2418 Op.setTokenValue(Base); 2419 2420 // If exactly one matched, then we treat that as a successful match (and the 2421 // instruction will already have been filled in correctly, since the failing 2422 // matches won't have modified it). 2423 unsigned NumSuccessfulMatches = 2424 (Match1 == Match_Success) + (Match2 == Match_Success) + 2425 (Match3 == Match_Success) + (Match4 == Match_Success); 2426 if (NumSuccessfulMatches == 1) { 2427 Inst.setLoc(IDLoc); 2428 if (!MatchingInlineAsm) 2429 EmitInstruction(Inst, Operands, Out); 2430 Opcode = Inst.getOpcode(); 2431 return false; 2432 } 2433 2434 // Otherwise, the match failed, try to produce a decent error message. 2435 2436 // If we had multiple suffix matches, then identify this as an ambiguous 2437 // match. 2438 if (NumSuccessfulMatches > 1) { 2439 char MatchChars[4]; 2440 unsigned NumMatches = 0; 2441 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0]; 2442 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1]; 2443 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2]; 2444 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3]; 2445 2446 SmallString<126> Msg; 2447 raw_svector_ostream OS(Msg); 2448 OS << "ambiguous instructions require an explicit suffix (could be "; 2449 for (unsigned i = 0; i != NumMatches; ++i) { 2450 if (i != 0) 2451 OS << ", "; 2452 if (i + 1 == NumMatches) 2453 OS << "or "; 2454 OS << "'" << Base << MatchChars[i] << "'"; 2455 } 2456 OS << ")"; 2457 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm); 2458 return true; 2459 } 2460 2461 // Okay, we know that none of the variants matched successfully. 2462 2463 // If all of the instructions reported an invalid mnemonic, then the original 2464 // mnemonic was invalid. 2465 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) && 2466 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) { 2467 if (!WasOriginallyInvalidOperand) { 2468 ArrayRef<SMRange> Ranges = 2469 MatchingInlineAsm ? EmptyRanges : Op.getLocRange(); 2470 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'", 2471 Ranges, MatchingInlineAsm); 2472 } 2473 2474 // Recover location info for the operand if we know which was the problem. 2475 if (ErrorInfo != ~0U) { 2476 if (ErrorInfo >= Operands.size()) 2477 return Error(IDLoc, "too few operands for instruction", 2478 EmptyRanges, MatchingInlineAsm); 2479 2480 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo]; 2481 if (Operand.getStartLoc().isValid()) { 2482 SMRange OperandRange = Operand.getLocRange(); 2483 return Error(Operand.getStartLoc(), "invalid operand for instruction", 2484 OperandRange, MatchingInlineAsm); 2485 } 2486 } 2487 2488 return Error(IDLoc, "invalid operand for instruction", EmptyRanges, 2489 MatchingInlineAsm); 2490 } 2491 2492 // If one instruction matched with a missing feature, report this as a 2493 // missing feature. 2494 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) + 2495 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){ 2496 std::string Msg = "instruction requires:"; 2497 unsigned Mask = 1; 2498 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) { 2499 if (ErrorInfoMissingFeature & Mask) { 2500 Msg += " "; 2501 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask); 2502 } 2503 Mask <<= 1; 2504 } 2505 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm); 2506 } 2507 2508 // If one instruction matched with an invalid operand, report this as an 2509 // operand failure. 2510 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) + 2511 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){ 2512 Error(IDLoc, "invalid operand for instruction", EmptyRanges, 2513 MatchingInlineAsm); 2514 return true; 2515 } 2516 2517 // If all of these were an outright failure, report it in a useless way. 2518 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix", 2519 EmptyRanges, MatchingInlineAsm); 2520 return true; 2521 } 2522 2523 2524 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { 2525 StringRef IDVal = DirectiveID.getIdentifier(); 2526 if (IDVal == ".word") 2527 return ParseDirectiveWord(2, DirectiveID.getLoc()); 2528 else if (IDVal.startswith(".code")) 2529 return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); 2530 else if (IDVal.startswith(".att_syntax")) { 2531 getParser().setAssemblerDialect(0); 2532 return false; 2533 } else if (IDVal.startswith(".intel_syntax")) { 2534 getParser().setAssemblerDialect(1); 2535 if (getLexer().isNot(AsmToken::EndOfStatement)) { 2536 // FIXME: Handle noprefix 2537 if (Parser.getTok().getString() == "noprefix") 2538 Parser.Lex(); 2539 } 2540 return false; 2541 } 2542 return true; 2543 } 2544 2545 /// ParseDirectiveWord 2546 /// ::= .word [ expression (, expression)* ] 2547 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { 2548 if (getLexer().isNot(AsmToken::EndOfStatement)) { 2549 for (;;) { 2550 const MCExpr *Value; 2551 if (getParser().parseExpression(Value)) 2552 return false; 2553 2554 getParser().getStreamer().EmitValue(Value, Size); 2555 2556 if (getLexer().is(AsmToken::EndOfStatement)) 2557 break; 2558 2559 // FIXME: Improve diagnostic. 2560 if (getLexer().isNot(AsmToken::Comma)) { 2561 Error(L, "unexpected token in directive"); 2562 return false; 2563 } 2564 Parser.Lex(); 2565 } 2566 } 2567 2568 Parser.Lex(); 2569 return false; 2570 } 2571 2572 /// ParseDirectiveCode 2573 /// ::= .code16 | .code32 | .code64 2574 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { 2575 if (IDVal == ".code16") { 2576 Parser.Lex(); 2577 if (!is16BitMode()) { 2578 SwitchMode(X86::Mode16Bit); 2579 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); 2580 } 2581 } else if (IDVal == ".code32") { 2582 Parser.Lex(); 2583 if (!is32BitMode()) { 2584 SwitchMode(X86::Mode32Bit); 2585 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); 2586 } 2587 } else if (IDVal == ".code64") { 2588 Parser.Lex(); 2589 if (!is64BitMode()) { 2590 SwitchMode(X86::Mode64Bit); 2591 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64); 2592 } 2593 } else { 2594 Error(L, "unknown directive " + IDVal); 2595 return false; 2596 } 2597 2598 return false; 2599 } 2600 2601 // Force static initialization. 2602 extern "C" void LLVMInitializeX86AsmParser() { 2603 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target); 2604 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target); 2605 } 2606 2607 #define GET_REGISTER_MATCHER 2608 #define GET_MATCHER_IMPLEMENTATION 2609 #define GET_SUBTARGET_FEATURE_NAME 2610 #include "X86GenAsmMatcher.inc" 2611