1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "MCTargetDesc/X86BaseInfo.h" 11 #include "llvm/ADT/APFloat.h" 12 #include "llvm/ADT/STLExtras.h" 13 #include "llvm/ADT/SmallString.h" 14 #include "llvm/ADT/SmallVector.h" 15 #include "llvm/ADT/StringSwitch.h" 16 #include "llvm/ADT/Twine.h" 17 #include "llvm/MC/MCContext.h" 18 #include "llvm/MC/MCExpr.h" 19 #include "llvm/MC/MCInst.h" 20 #include "llvm/MC/MCParser/MCAsmLexer.h" 21 #include "llvm/MC/MCParser/MCAsmParser.h" 22 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 23 #include "llvm/MC/MCRegisterInfo.h" 24 #include "llvm/MC/MCStreamer.h" 25 #include "llvm/MC/MCSubtargetInfo.h" 26 #include "llvm/MC/MCSymbol.h" 27 #include "llvm/MC/MCTargetAsmParser.h" 28 #include "llvm/Support/SourceMgr.h" 29 #include "llvm/Support/TargetRegistry.h" 30 #include "llvm/Support/raw_ostream.h" 31 32 using namespace llvm; 33 34 namespace { 35 struct X86Operand; 36 37 static const char OpPrecedence[] = { 38 0, // IC_PLUS 39 0, // IC_MINUS 40 1, // IC_MULTIPLY 41 1, // IC_DIVIDE 42 2, // IC_RPAREN 43 3, // IC_LPAREN 44 0, // IC_IMM 45 0 // IC_REGISTER 46 }; 47 48 class X86AsmParser : public MCTargetAsmParser { 49 MCSubtargetInfo &STI; 50 MCAsmParser &Parser; 51 ParseInstructionInfo *InstInfo; 52 private: 53 enum InfixCalculatorTok { 54 IC_PLUS = 0, 55 IC_MINUS, 56 IC_MULTIPLY, 57 IC_DIVIDE, 58 IC_RPAREN, 59 IC_LPAREN, 60 IC_IMM, 61 IC_REGISTER 62 }; 63 64 class InfixCalculator { 65 typedef std::pair< InfixCalculatorTok, int64_t > ICToken; 66 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack; 67 SmallVector<ICToken, 4> PostfixStack; 68 69 public: 70 int64_t popOperand() { 71 assert (!PostfixStack.empty() && "Poped an empty stack!"); 72 ICToken Op = PostfixStack.pop_back_val(); 73 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER) 74 && "Expected and immediate or register!"); 75 return Op.second; 76 } 77 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) { 78 assert ((Op == IC_IMM || Op == IC_REGISTER) && 79 "Unexpected operand!"); 80 PostfixStack.push_back(std::make_pair(Op, Val)); 81 } 82 83 void popOperator() { InfixOperatorStack.pop_back_val(); } 84 void pushOperator(InfixCalculatorTok Op) { 85 // Push the new operator if the stack is empty. 86 if (InfixOperatorStack.empty()) { 87 InfixOperatorStack.push_back(Op); 88 return; 89 } 90 91 // Push the new operator if it has a higher precedence than the operator 92 // on the top of the stack or the operator on the top of the stack is a 93 // left parentheses. 94 unsigned Idx = InfixOperatorStack.size() - 1; 95 InfixCalculatorTok StackOp = InfixOperatorStack[Idx]; 96 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) { 97 InfixOperatorStack.push_back(Op); 98 return; 99 } 100 101 // The operator on the top of the stack has higher precedence than the 102 // new operator. 103 unsigned ParenCount = 0; 104 while (1) { 105 // Nothing to process. 106 if (InfixOperatorStack.empty()) 107 break; 108 109 Idx = InfixOperatorStack.size() - 1; 110 StackOp = InfixOperatorStack[Idx]; 111 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount)) 112 break; 113 114 // If we have an even parentheses count and we see a left parentheses, 115 // then stop processing. 116 if (!ParenCount && StackOp == IC_LPAREN) 117 break; 118 119 if (StackOp == IC_RPAREN) { 120 ++ParenCount; 121 InfixOperatorStack.pop_back_val(); 122 } else if (StackOp == IC_LPAREN) { 123 --ParenCount; 124 InfixOperatorStack.pop_back_val(); 125 } else { 126 InfixOperatorStack.pop_back_val(); 127 PostfixStack.push_back(std::make_pair(StackOp, 0)); 128 } 129 } 130 // Push the new operator. 131 InfixOperatorStack.push_back(Op); 132 } 133 int64_t execute() { 134 // Push any remaining operators onto the postfix stack. 135 while (!InfixOperatorStack.empty()) { 136 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val(); 137 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN) 138 PostfixStack.push_back(std::make_pair(StackOp, 0)); 139 } 140 141 if (PostfixStack.empty()) 142 return 0; 143 144 SmallVector<ICToken, 16> OperandStack; 145 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) { 146 ICToken Op = PostfixStack[i]; 147 if (Op.first == IC_IMM || Op.first == IC_REGISTER) { 148 OperandStack.push_back(Op); 149 } else { 150 assert (OperandStack.size() > 1 && "Too few operands."); 151 int64_t Val; 152 ICToken Op2 = OperandStack.pop_back_val(); 153 ICToken Op1 = OperandStack.pop_back_val(); 154 switch (Op.first) { 155 default: 156 report_fatal_error("Unexpected operator!"); 157 break; 158 case IC_PLUS: 159 Val = Op1.second + Op2.second; 160 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 161 break; 162 case IC_MINUS: 163 Val = Op1.second - Op2.second; 164 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 165 break; 166 case IC_MULTIPLY: 167 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 168 "Multiply operation with an immediate and a register!"); 169 Val = Op1.second * Op2.second; 170 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 171 break; 172 case IC_DIVIDE: 173 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 174 "Divide operation with an immediate and a register!"); 175 assert (Op2.second != 0 && "Division by zero!"); 176 Val = Op1.second / Op2.second; 177 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 178 break; 179 } 180 } 181 } 182 assert (OperandStack.size() == 1 && "Expected a single result."); 183 return OperandStack.pop_back_val().second; 184 } 185 }; 186 187 enum IntelExprState { 188 IES_PLUS, 189 IES_MINUS, 190 IES_MULTIPLY, 191 IES_DIVIDE, 192 IES_LBRAC, 193 IES_RBRAC, 194 IES_LPAREN, 195 IES_RPAREN, 196 IES_REGISTER, 197 IES_INTEGER, 198 IES_IDENTIFIER, 199 IES_ERROR 200 }; 201 202 class IntelExprStateMachine { 203 IntelExprState State, PrevState; 204 unsigned BaseReg, IndexReg, TmpReg, Scale; 205 int64_t Imm; 206 const MCExpr *Sym; 207 StringRef SymName; 208 bool StopOnLBrac, AddImmPrefix; 209 InfixCalculator IC; 210 InlineAsmIdentifierInfo Info; 211 public: 212 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) : 213 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0), 214 Scale(1), Imm(imm), Sym(0), StopOnLBrac(stoponlbrac), 215 AddImmPrefix(addimmprefix) { Info.clear(); } 216 217 unsigned getBaseReg() { return BaseReg; } 218 unsigned getIndexReg() { return IndexReg; } 219 unsigned getScale() { return Scale; } 220 const MCExpr *getSym() { return Sym; } 221 StringRef getSymName() { return SymName; } 222 int64_t getImm() { return Imm + IC.execute(); } 223 bool isValidEndState() { 224 return State == IES_RBRAC || State == IES_INTEGER; 225 } 226 bool getStopOnLBrac() { return StopOnLBrac; } 227 bool getAddImmPrefix() { return AddImmPrefix; } 228 bool hadError() { return State == IES_ERROR; } 229 230 InlineAsmIdentifierInfo &getIdentifierInfo() { 231 return Info; 232 } 233 234 void onPlus() { 235 IntelExprState CurrState = State; 236 switch (State) { 237 default: 238 State = IES_ERROR; 239 break; 240 case IES_INTEGER: 241 case IES_RPAREN: 242 case IES_REGISTER: 243 State = IES_PLUS; 244 IC.pushOperator(IC_PLUS); 245 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 246 // If we already have a BaseReg, then assume this is the IndexReg with 247 // a scale of 1. 248 if (!BaseReg) { 249 BaseReg = TmpReg; 250 } else { 251 assert (!IndexReg && "BaseReg/IndexReg already set!"); 252 IndexReg = TmpReg; 253 Scale = 1; 254 } 255 } 256 break; 257 } 258 PrevState = CurrState; 259 } 260 void onMinus() { 261 IntelExprState CurrState = State; 262 switch (State) { 263 default: 264 State = IES_ERROR; 265 break; 266 case IES_PLUS: 267 case IES_MULTIPLY: 268 case IES_DIVIDE: 269 case IES_LPAREN: 270 case IES_RPAREN: 271 case IES_LBRAC: 272 case IES_RBRAC: 273 case IES_INTEGER: 274 case IES_REGISTER: 275 State = IES_MINUS; 276 // Only push the minus operator if it is not a unary operator. 277 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS || 278 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE || 279 CurrState == IES_LPAREN || CurrState == IES_LBRAC)) 280 IC.pushOperator(IC_MINUS); 281 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 282 // If we already have a BaseReg, then assume this is the IndexReg with 283 // a scale of 1. 284 if (!BaseReg) { 285 BaseReg = TmpReg; 286 } else { 287 assert (!IndexReg && "BaseReg/IndexReg already set!"); 288 IndexReg = TmpReg; 289 Scale = 1; 290 } 291 } 292 break; 293 } 294 PrevState = CurrState; 295 } 296 void onRegister(unsigned Reg) { 297 IntelExprState CurrState = State; 298 switch (State) { 299 default: 300 State = IES_ERROR; 301 break; 302 case IES_PLUS: 303 case IES_LPAREN: 304 State = IES_REGISTER; 305 TmpReg = Reg; 306 IC.pushOperand(IC_REGISTER); 307 break; 308 case IES_MULTIPLY: 309 // Index Register - Scale * Register 310 if (PrevState == IES_INTEGER) { 311 assert (!IndexReg && "IndexReg already set!"); 312 State = IES_REGISTER; 313 IndexReg = Reg; 314 // Get the scale and replace the 'Scale * Register' with '0'. 315 Scale = IC.popOperand(); 316 IC.pushOperand(IC_IMM); 317 IC.popOperator(); 318 } else { 319 State = IES_ERROR; 320 } 321 break; 322 } 323 PrevState = CurrState; 324 } 325 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) { 326 PrevState = State; 327 switch (State) { 328 default: 329 State = IES_ERROR; 330 break; 331 case IES_PLUS: 332 case IES_MINUS: 333 State = IES_INTEGER; 334 Sym = SymRef; 335 SymName = SymRefName; 336 IC.pushOperand(IC_IMM); 337 break; 338 } 339 } 340 void onInteger(int64_t TmpInt) { 341 IntelExprState CurrState = State; 342 switch (State) { 343 default: 344 State = IES_ERROR; 345 break; 346 case IES_PLUS: 347 case IES_MINUS: 348 case IES_DIVIDE: 349 case IES_MULTIPLY: 350 case IES_LPAREN: 351 State = IES_INTEGER; 352 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { 353 // Index Register - Register * Scale 354 assert (!IndexReg && "IndexReg already set!"); 355 IndexReg = TmpReg; 356 Scale = TmpInt; 357 // Get the scale and replace the 'Register * Scale' with '0'. 358 IC.popOperator(); 359 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS || 360 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || 361 PrevState == IES_LPAREN || PrevState == IES_LBRAC) && 362 CurrState == IES_MINUS) { 363 // Unary minus. No need to pop the minus operand because it was never 364 // pushed. 365 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm. 366 } else { 367 IC.pushOperand(IC_IMM, TmpInt); 368 } 369 break; 370 } 371 PrevState = CurrState; 372 } 373 void onStar() { 374 PrevState = State; 375 switch (State) { 376 default: 377 State = IES_ERROR; 378 break; 379 case IES_INTEGER: 380 case IES_REGISTER: 381 case IES_RPAREN: 382 State = IES_MULTIPLY; 383 IC.pushOperator(IC_MULTIPLY); 384 break; 385 } 386 } 387 void onDivide() { 388 PrevState = State; 389 switch (State) { 390 default: 391 State = IES_ERROR; 392 break; 393 case IES_INTEGER: 394 case IES_RPAREN: 395 State = IES_DIVIDE; 396 IC.pushOperator(IC_DIVIDE); 397 break; 398 } 399 } 400 void onLBrac() { 401 PrevState = State; 402 switch (State) { 403 default: 404 State = IES_ERROR; 405 break; 406 case IES_RBRAC: 407 State = IES_PLUS; 408 IC.pushOperator(IC_PLUS); 409 break; 410 } 411 } 412 void onRBrac() { 413 IntelExprState CurrState = State; 414 switch (State) { 415 default: 416 State = IES_ERROR; 417 break; 418 case IES_INTEGER: 419 case IES_REGISTER: 420 case IES_RPAREN: 421 State = IES_RBRAC; 422 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 423 // If we already have a BaseReg, then assume this is the IndexReg with 424 // a scale of 1. 425 if (!BaseReg) { 426 BaseReg = TmpReg; 427 } else { 428 assert (!IndexReg && "BaseReg/IndexReg already set!"); 429 IndexReg = TmpReg; 430 Scale = 1; 431 } 432 } 433 break; 434 } 435 PrevState = CurrState; 436 } 437 void onLParen() { 438 IntelExprState CurrState = State; 439 switch (State) { 440 default: 441 State = IES_ERROR; 442 break; 443 case IES_PLUS: 444 case IES_MINUS: 445 case IES_MULTIPLY: 446 case IES_DIVIDE: 447 case IES_LPAREN: 448 // FIXME: We don't handle this type of unary minus, yet. 449 if ((PrevState == IES_PLUS || PrevState == IES_MINUS || 450 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || 451 PrevState == IES_LPAREN || PrevState == IES_LBRAC) && 452 CurrState == IES_MINUS) { 453 State = IES_ERROR; 454 break; 455 } 456 State = IES_LPAREN; 457 IC.pushOperator(IC_LPAREN); 458 break; 459 } 460 PrevState = CurrState; 461 } 462 void onRParen() { 463 PrevState = State; 464 switch (State) { 465 default: 466 State = IES_ERROR; 467 break; 468 case IES_INTEGER: 469 case IES_REGISTER: 470 case IES_RPAREN: 471 State = IES_RPAREN; 472 IC.pushOperator(IC_RPAREN); 473 break; 474 } 475 } 476 }; 477 478 MCAsmParser &getParser() const { return Parser; } 479 480 MCAsmLexer &getLexer() const { return Parser.getLexer(); } 481 482 bool Error(SMLoc L, const Twine &Msg, 483 ArrayRef<SMRange> Ranges = None, 484 bool MatchingInlineAsm = false) { 485 if (MatchingInlineAsm) return true; 486 return Parser.Error(L, Msg, Ranges); 487 } 488 489 X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) { 490 Error(Loc, Msg); 491 return 0; 492 } 493 494 X86Operand *ParseOperand(); 495 X86Operand *ParseATTOperand(); 496 X86Operand *ParseIntelOperand(); 497 X86Operand *ParseIntelOffsetOfOperator(); 498 X86Operand *ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp); 499 X86Operand *ParseIntelOperator(unsigned OpKind); 500 X86Operand *ParseIntelMemOperand(unsigned SegReg, int64_t ImmDisp, 501 SMLoc StartLoc); 502 X86Operand *ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End); 503 X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start, 504 int64_t ImmDisp, unsigned Size); 505 X86Operand *ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier, 506 InlineAsmIdentifierInfo &Info, 507 bool IsUnevaluatedOperand, SMLoc &End); 508 509 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); 510 511 X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, 512 unsigned BaseReg, unsigned IndexReg, 513 unsigned Scale, SMLoc Start, SMLoc End, 514 unsigned Size, StringRef Identifier, 515 InlineAsmIdentifierInfo &Info); 516 517 bool ParseDirectiveWord(unsigned Size, SMLoc L); 518 bool ParseDirectiveCode(StringRef IDVal, SMLoc L); 519 520 bool processInstruction(MCInst &Inst, 521 const SmallVectorImpl<MCParsedAsmOperand*> &Ops); 522 523 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 524 SmallVectorImpl<MCParsedAsmOperand*> &Operands, 525 MCStreamer &Out, unsigned &ErrorInfo, 526 bool MatchingInlineAsm); 527 528 /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi) 529 /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode. 530 bool isSrcOp(X86Operand &Op); 531 532 /// isDstOp - Returns true if operand is either (%rdi) or %es:(%rdi) 533 /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode. 534 bool isDstOp(X86Operand &Op); 535 536 bool is64BitMode() const { 537 // FIXME: Can tablegen auto-generate this? 538 return (STI.getFeatureBits() & X86::Mode64Bit) != 0; 539 } 540 void SwitchMode() { 541 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(X86::Mode64Bit)); 542 setAvailableFeatures(FB); 543 } 544 545 bool isParsingIntelSyntax() { 546 return getParser().getAssemblerDialect(); 547 } 548 549 /// @name Auto-generated Matcher Functions 550 /// { 551 552 #define GET_ASSEMBLER_HEADER 553 #include "X86GenAsmMatcher.inc" 554 555 /// } 556 557 public: 558 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) 559 : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) { 560 561 // Initialize the set of available features. 562 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); 563 } 564 virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); 565 566 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 567 SMLoc NameLoc, 568 SmallVectorImpl<MCParsedAsmOperand*> &Operands); 569 570 virtual bool ParseDirective(AsmToken DirectiveID); 571 }; 572 } // end anonymous namespace 573 574 /// @name Auto-generated Match Functions 575 /// { 576 577 static unsigned MatchRegisterName(StringRef Name); 578 579 /// } 580 581 static bool isImmSExti16i8Value(uint64_t Value) { 582 return (( Value <= 0x000000000000007FULL)|| 583 (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)|| 584 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 585 } 586 587 static bool isImmSExti32i8Value(uint64_t Value) { 588 return (( Value <= 0x000000000000007FULL)|| 589 (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)|| 590 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 591 } 592 593 static bool isImmZExtu32u8Value(uint64_t Value) { 594 return (Value <= 0x00000000000000FFULL); 595 } 596 597 static bool isImmSExti64i8Value(uint64_t Value) { 598 return (( Value <= 0x000000000000007FULL)|| 599 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 600 } 601 602 static bool isImmSExti64i32Value(uint64_t Value) { 603 return (( Value <= 0x000000007FFFFFFFULL)|| 604 (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 605 } 606 namespace { 607 608 /// X86Operand - Instances of this class represent a parsed X86 machine 609 /// instruction. 610 struct X86Operand : public MCParsedAsmOperand { 611 enum KindTy { 612 Token, 613 Register, 614 Immediate, 615 Memory 616 } Kind; 617 618 SMLoc StartLoc, EndLoc; 619 SMLoc OffsetOfLoc; 620 StringRef SymName; 621 void *OpDecl; 622 bool AddressOf; 623 624 struct TokOp { 625 const char *Data; 626 unsigned Length; 627 }; 628 629 struct RegOp { 630 unsigned RegNo; 631 }; 632 633 struct ImmOp { 634 const MCExpr *Val; 635 }; 636 637 struct MemOp { 638 unsigned SegReg; 639 const MCExpr *Disp; 640 unsigned BaseReg; 641 unsigned IndexReg; 642 unsigned Scale; 643 unsigned Size; 644 }; 645 646 union { 647 struct TokOp Tok; 648 struct RegOp Reg; 649 struct ImmOp Imm; 650 struct MemOp Mem; 651 }; 652 653 X86Operand(KindTy K, SMLoc Start, SMLoc End) 654 : Kind(K), StartLoc(Start), EndLoc(End) {} 655 656 StringRef getSymName() { return SymName; } 657 void *getOpDecl() { return OpDecl; } 658 659 /// getStartLoc - Get the location of the first token of this operand. 660 SMLoc getStartLoc() const { return StartLoc; } 661 /// getEndLoc - Get the location of the last token of this operand. 662 SMLoc getEndLoc() const { return EndLoc; } 663 /// getLocRange - Get the range between the first and last token of this 664 /// operand. 665 SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } 666 /// getOffsetOfLoc - Get the location of the offset operator. 667 SMLoc getOffsetOfLoc() const { return OffsetOfLoc; } 668 669 virtual void print(raw_ostream &OS) const {} 670 671 StringRef getToken() const { 672 assert(Kind == Token && "Invalid access!"); 673 return StringRef(Tok.Data, Tok.Length); 674 } 675 void setTokenValue(StringRef Value) { 676 assert(Kind == Token && "Invalid access!"); 677 Tok.Data = Value.data(); 678 Tok.Length = Value.size(); 679 } 680 681 unsigned getReg() const { 682 assert(Kind == Register && "Invalid access!"); 683 return Reg.RegNo; 684 } 685 686 const MCExpr *getImm() const { 687 assert(Kind == Immediate && "Invalid access!"); 688 return Imm.Val; 689 } 690 691 const MCExpr *getMemDisp() const { 692 assert(Kind == Memory && "Invalid access!"); 693 return Mem.Disp; 694 } 695 unsigned getMemSegReg() const { 696 assert(Kind == Memory && "Invalid access!"); 697 return Mem.SegReg; 698 } 699 unsigned getMemBaseReg() const { 700 assert(Kind == Memory && "Invalid access!"); 701 return Mem.BaseReg; 702 } 703 unsigned getMemIndexReg() const { 704 assert(Kind == Memory && "Invalid access!"); 705 return Mem.IndexReg; 706 } 707 unsigned getMemScale() const { 708 assert(Kind == Memory && "Invalid access!"); 709 return Mem.Scale; 710 } 711 712 bool isToken() const {return Kind == Token; } 713 714 bool isImm() const { return Kind == Immediate; } 715 716 bool isImmSExti16i8() const { 717 if (!isImm()) 718 return false; 719 720 // If this isn't a constant expr, just assume it fits and let relaxation 721 // handle it. 722 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 723 if (!CE) 724 return true; 725 726 // Otherwise, check the value is in a range that makes sense for this 727 // extension. 728 return isImmSExti16i8Value(CE->getValue()); 729 } 730 bool isImmSExti32i8() const { 731 if (!isImm()) 732 return false; 733 734 // If this isn't a constant expr, just assume it fits and let relaxation 735 // handle it. 736 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 737 if (!CE) 738 return true; 739 740 // Otherwise, check the value is in a range that makes sense for this 741 // extension. 742 return isImmSExti32i8Value(CE->getValue()); 743 } 744 bool isImmZExtu32u8() const { 745 if (!isImm()) 746 return false; 747 748 // If this isn't a constant expr, just assume it fits and let relaxation 749 // handle it. 750 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 751 if (!CE) 752 return true; 753 754 // Otherwise, check the value is in a range that makes sense for this 755 // extension. 756 return isImmZExtu32u8Value(CE->getValue()); 757 } 758 bool isImmSExti64i8() const { 759 if (!isImm()) 760 return false; 761 762 // If this isn't a constant expr, just assume it fits and let relaxation 763 // handle it. 764 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 765 if (!CE) 766 return true; 767 768 // Otherwise, check the value is in a range that makes sense for this 769 // extension. 770 return isImmSExti64i8Value(CE->getValue()); 771 } 772 bool isImmSExti64i32() const { 773 if (!isImm()) 774 return false; 775 776 // If this isn't a constant expr, just assume it fits and let relaxation 777 // handle it. 778 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 779 if (!CE) 780 return true; 781 782 // Otherwise, check the value is in a range that makes sense for this 783 // extension. 784 return isImmSExti64i32Value(CE->getValue()); 785 } 786 787 bool isOffsetOf() const { 788 return OffsetOfLoc.getPointer(); 789 } 790 791 bool needAddressOf() const { 792 return AddressOf; 793 } 794 795 bool isMem() const { return Kind == Memory; } 796 bool isMem8() const { 797 return Kind == Memory && (!Mem.Size || Mem.Size == 8); 798 } 799 bool isMem16() const { 800 return Kind == Memory && (!Mem.Size || Mem.Size == 16); 801 } 802 bool isMem32() const { 803 return Kind == Memory && (!Mem.Size || Mem.Size == 32); 804 } 805 bool isMem64() const { 806 return Kind == Memory && (!Mem.Size || Mem.Size == 64); 807 } 808 bool isMem80() const { 809 return Kind == Memory && (!Mem.Size || Mem.Size == 80); 810 } 811 bool isMem128() const { 812 return Kind == Memory && (!Mem.Size || Mem.Size == 128); 813 } 814 bool isMem256() const { 815 return Kind == Memory && (!Mem.Size || Mem.Size == 256); 816 } 817 818 bool isMemVX32() const { 819 return Kind == Memory && (!Mem.Size || Mem.Size == 32) && 820 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15; 821 } 822 bool isMemVY32() const { 823 return Kind == Memory && (!Mem.Size || Mem.Size == 32) && 824 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15; 825 } 826 bool isMemVX64() const { 827 return Kind == Memory && (!Mem.Size || Mem.Size == 64) && 828 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15; 829 } 830 bool isMemVY64() const { 831 return Kind == Memory && (!Mem.Size || Mem.Size == 64) && 832 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15; 833 } 834 bool isMemVZ32() const { 835 return Kind == Memory && (!Mem.Size || Mem.Size == 32) && 836 getMemIndexReg() >= X86::ZMM0 && getMemIndexReg() <= X86::ZMM31; 837 } 838 bool isMemVZ64() const { 839 return Kind == Memory && (!Mem.Size || Mem.Size == 64) && 840 getMemIndexReg() >= X86::ZMM0 && getMemIndexReg() <= X86::ZMM31; 841 } 842 843 bool isMem512() const { 844 return Kind == Memory && (!Mem.Size || Mem.Size == 512); 845 } 846 847 bool isAbsMem() const { 848 return Kind == Memory && !getMemSegReg() && !getMemBaseReg() && 849 !getMemIndexReg() && getMemScale() == 1; 850 } 851 852 bool isReg() const { return Kind == Register; } 853 854 void addExpr(MCInst &Inst, const MCExpr *Expr) const { 855 // Add as immediates when possible. 856 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) 857 Inst.addOperand(MCOperand::CreateImm(CE->getValue())); 858 else 859 Inst.addOperand(MCOperand::CreateExpr(Expr)); 860 } 861 862 void addRegOperands(MCInst &Inst, unsigned N) const { 863 assert(N == 1 && "Invalid number of operands!"); 864 Inst.addOperand(MCOperand::CreateReg(getReg())); 865 } 866 867 void addImmOperands(MCInst &Inst, unsigned N) const { 868 assert(N == 1 && "Invalid number of operands!"); 869 addExpr(Inst, getImm()); 870 } 871 872 void addMem8Operands(MCInst &Inst, unsigned N) const { 873 addMemOperands(Inst, N); 874 } 875 void addMem16Operands(MCInst &Inst, unsigned N) const { 876 addMemOperands(Inst, N); 877 } 878 void addMem32Operands(MCInst &Inst, unsigned N) const { 879 addMemOperands(Inst, N); 880 } 881 void addMem64Operands(MCInst &Inst, unsigned N) const { 882 addMemOperands(Inst, N); 883 } 884 void addMem80Operands(MCInst &Inst, unsigned N) const { 885 addMemOperands(Inst, N); 886 } 887 void addMem128Operands(MCInst &Inst, unsigned N) const { 888 addMemOperands(Inst, N); 889 } 890 void addMem256Operands(MCInst &Inst, unsigned N) const { 891 addMemOperands(Inst, N); 892 } 893 void addMemVX32Operands(MCInst &Inst, unsigned N) const { 894 addMemOperands(Inst, N); 895 } 896 void addMemVY32Operands(MCInst &Inst, unsigned N) const { 897 addMemOperands(Inst, N); 898 } 899 void addMemVX64Operands(MCInst &Inst, unsigned N) const { 900 addMemOperands(Inst, N); 901 } 902 void addMemVY64Operands(MCInst &Inst, unsigned N) const { 903 addMemOperands(Inst, N); 904 } 905 906 void addMemVZ32Operands(MCInst &Inst, unsigned N) const { 907 addMemOperands(Inst, N); 908 } 909 void addMemVZ64Operands(MCInst &Inst, unsigned N) const { 910 addMemOperands(Inst, N); 911 } 912 void addMem512Operands(MCInst &Inst, unsigned N) const { 913 addMemOperands(Inst, N); 914 } 915 916 void addMemOperands(MCInst &Inst, unsigned N) const { 917 assert((N == 5) && "Invalid number of operands!"); 918 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); 919 Inst.addOperand(MCOperand::CreateImm(getMemScale())); 920 Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); 921 addExpr(Inst, getMemDisp()); 922 Inst.addOperand(MCOperand::CreateReg(getMemSegReg())); 923 } 924 925 void addAbsMemOperands(MCInst &Inst, unsigned N) const { 926 assert((N == 1) && "Invalid number of operands!"); 927 // Add as immediates when possible. 928 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp())) 929 Inst.addOperand(MCOperand::CreateImm(CE->getValue())); 930 else 931 Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); 932 } 933 934 static X86Operand *CreateToken(StringRef Str, SMLoc Loc) { 935 SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size()); 936 X86Operand *Res = new X86Operand(Token, Loc, EndLoc); 937 Res->Tok.Data = Str.data(); 938 Res->Tok.Length = Str.size(); 939 return Res; 940 } 941 942 static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc, 943 bool AddressOf = false, 944 SMLoc OffsetOfLoc = SMLoc(), 945 StringRef SymName = StringRef(), 946 void *OpDecl = 0) { 947 X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc); 948 Res->Reg.RegNo = RegNo; 949 Res->AddressOf = AddressOf; 950 Res->OffsetOfLoc = OffsetOfLoc; 951 Res->SymName = SymName; 952 Res->OpDecl = OpDecl; 953 return Res; 954 } 955 956 static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){ 957 X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc); 958 Res->Imm.Val = Val; 959 return Res; 960 } 961 962 /// Create an absolute memory operand. 963 static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, 964 unsigned Size = 0, StringRef SymName = StringRef(), 965 void *OpDecl = 0) { 966 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); 967 Res->Mem.SegReg = 0; 968 Res->Mem.Disp = Disp; 969 Res->Mem.BaseReg = 0; 970 Res->Mem.IndexReg = 0; 971 Res->Mem.Scale = 1; 972 Res->Mem.Size = Size; 973 Res->SymName = SymName; 974 Res->OpDecl = OpDecl; 975 Res->AddressOf = false; 976 return Res; 977 } 978 979 /// Create a generalized memory operand. 980 static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp, 981 unsigned BaseReg, unsigned IndexReg, 982 unsigned Scale, SMLoc StartLoc, SMLoc EndLoc, 983 unsigned Size = 0, 984 StringRef SymName = StringRef(), 985 void *OpDecl = 0) { 986 // We should never just have a displacement, that should be parsed as an 987 // absolute memory operand. 988 assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); 989 990 // The scale should always be one of {1,2,4,8}. 991 assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) && 992 "Invalid scale!"); 993 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); 994 Res->Mem.SegReg = SegReg; 995 Res->Mem.Disp = Disp; 996 Res->Mem.BaseReg = BaseReg; 997 Res->Mem.IndexReg = IndexReg; 998 Res->Mem.Scale = Scale; 999 Res->Mem.Size = Size; 1000 Res->SymName = SymName; 1001 Res->OpDecl = OpDecl; 1002 Res->AddressOf = false; 1003 return Res; 1004 } 1005 }; 1006 1007 } // end anonymous namespace. 1008 1009 bool X86AsmParser::isSrcOp(X86Operand &Op) { 1010 unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI; 1011 1012 return (Op.isMem() && 1013 (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) && 1014 isa<MCConstantExpr>(Op.Mem.Disp) && 1015 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 1016 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0); 1017 } 1018 1019 bool X86AsmParser::isDstOp(X86Operand &Op) { 1020 unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI; 1021 1022 return Op.isMem() && 1023 (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) && 1024 isa<MCConstantExpr>(Op.Mem.Disp) && 1025 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 1026 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0; 1027 } 1028 1029 bool X86AsmParser::ParseRegister(unsigned &RegNo, 1030 SMLoc &StartLoc, SMLoc &EndLoc) { 1031 RegNo = 0; 1032 const AsmToken &PercentTok = Parser.getTok(); 1033 StartLoc = PercentTok.getLoc(); 1034 1035 // If we encounter a %, ignore it. This code handles registers with and 1036 // without the prefix, unprefixed registers can occur in cfi directives. 1037 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) 1038 Parser.Lex(); // Eat percent token. 1039 1040 const AsmToken &Tok = Parser.getTok(); 1041 EndLoc = Tok.getEndLoc(); 1042 1043 if (Tok.isNot(AsmToken::Identifier)) { 1044 if (isParsingIntelSyntax()) return true; 1045 return Error(StartLoc, "invalid register name", 1046 SMRange(StartLoc, EndLoc)); 1047 } 1048 1049 RegNo = MatchRegisterName(Tok.getString()); 1050 1051 // If the match failed, try the register name as lowercase. 1052 if (RegNo == 0) 1053 RegNo = MatchRegisterName(Tok.getString().lower()); 1054 1055 if (!is64BitMode()) { 1056 // FIXME: This should be done using Requires<In32BitMode> and 1057 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also 1058 // checked. 1059 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a 1060 // REX prefix. 1061 if (RegNo == X86::RIZ || 1062 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) || 1063 X86II::isX86_64NonExtLowByteReg(RegNo) || 1064 X86II::isX86_64ExtendedReg(RegNo)) 1065 return Error(StartLoc, "register %" 1066 + Tok.getString() + " is only available in 64-bit mode", 1067 SMRange(StartLoc, EndLoc)); 1068 } 1069 1070 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. 1071 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) { 1072 RegNo = X86::ST0; 1073 Parser.Lex(); // Eat 'st' 1074 1075 // Check to see if we have '(4)' after %st. 1076 if (getLexer().isNot(AsmToken::LParen)) 1077 return false; 1078 // Lex the paren. 1079 getParser().Lex(); 1080 1081 const AsmToken &IntTok = Parser.getTok(); 1082 if (IntTok.isNot(AsmToken::Integer)) 1083 return Error(IntTok.getLoc(), "expected stack index"); 1084 switch (IntTok.getIntVal()) { 1085 case 0: RegNo = X86::ST0; break; 1086 case 1: RegNo = X86::ST1; break; 1087 case 2: RegNo = X86::ST2; break; 1088 case 3: RegNo = X86::ST3; break; 1089 case 4: RegNo = X86::ST4; break; 1090 case 5: RegNo = X86::ST5; break; 1091 case 6: RegNo = X86::ST6; break; 1092 case 7: RegNo = X86::ST7; break; 1093 default: return Error(IntTok.getLoc(), "invalid stack index"); 1094 } 1095 1096 if (getParser().Lex().isNot(AsmToken::RParen)) 1097 return Error(Parser.getTok().getLoc(), "expected ')'"); 1098 1099 EndLoc = Parser.getTok().getEndLoc(); 1100 Parser.Lex(); // Eat ')' 1101 return false; 1102 } 1103 1104 EndLoc = Parser.getTok().getEndLoc(); 1105 1106 // If this is "db[0-7]", match it as an alias 1107 // for dr[0-7]. 1108 if (RegNo == 0 && Tok.getString().size() == 3 && 1109 Tok.getString().startswith("db")) { 1110 switch (Tok.getString()[2]) { 1111 case '0': RegNo = X86::DR0; break; 1112 case '1': RegNo = X86::DR1; break; 1113 case '2': RegNo = X86::DR2; break; 1114 case '3': RegNo = X86::DR3; break; 1115 case '4': RegNo = X86::DR4; break; 1116 case '5': RegNo = X86::DR5; break; 1117 case '6': RegNo = X86::DR6; break; 1118 case '7': RegNo = X86::DR7; break; 1119 } 1120 1121 if (RegNo != 0) { 1122 EndLoc = Parser.getTok().getEndLoc(); 1123 Parser.Lex(); // Eat it. 1124 return false; 1125 } 1126 } 1127 1128 if (RegNo == 0) { 1129 if (isParsingIntelSyntax()) return true; 1130 return Error(StartLoc, "invalid register name", 1131 SMRange(StartLoc, EndLoc)); 1132 } 1133 1134 Parser.Lex(); // Eat identifier token. 1135 return false; 1136 } 1137 1138 X86Operand *X86AsmParser::ParseOperand() { 1139 if (isParsingIntelSyntax()) 1140 return ParseIntelOperand(); 1141 return ParseATTOperand(); 1142 } 1143 1144 /// getIntelMemOperandSize - Return intel memory operand size. 1145 static unsigned getIntelMemOperandSize(StringRef OpStr) { 1146 unsigned Size = StringSwitch<unsigned>(OpStr) 1147 .Cases("BYTE", "byte", 8) 1148 .Cases("WORD", "word", 16) 1149 .Cases("DWORD", "dword", 32) 1150 .Cases("QWORD", "qword", 64) 1151 .Cases("XWORD", "xword", 80) 1152 .Cases("XMMWORD", "xmmword", 128) 1153 .Cases("YMMWORD", "ymmword", 256) 1154 .Default(0); 1155 return Size; 1156 } 1157 1158 X86Operand * 1159 X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, 1160 unsigned BaseReg, unsigned IndexReg, 1161 unsigned Scale, SMLoc Start, SMLoc End, 1162 unsigned Size, StringRef Identifier, 1163 InlineAsmIdentifierInfo &Info){ 1164 if (isa<MCSymbolRefExpr>(Disp)) { 1165 // If this is not a VarDecl then assume it is a FuncDecl or some other label 1166 // reference. We need an 'r' constraint here, so we need to create register 1167 // operand to ensure proper matching. Just pick a GPR based on the size of 1168 // a pointer. 1169 if (!Info.IsVarDecl) { 1170 unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; 1171 return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true, 1172 SMLoc(), Identifier, Info.OpDecl); 1173 } 1174 if (!Size) { 1175 Size = Info.Type * 8; // Size is in terms of bits in this context. 1176 if (Size) 1177 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start, 1178 /*Len=*/0, Size)); 1179 } 1180 } 1181 1182 // When parsing inline assembly we set the base register to a non-zero value 1183 // if we don't know the actual value at this time. This is necessary to 1184 // get the matching correct in some cases. 1185 BaseReg = BaseReg ? BaseReg : 1; 1186 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start, 1187 End, Size, Identifier, Info.OpDecl); 1188 } 1189 1190 static void 1191 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites, 1192 StringRef SymName, int64_t ImmDisp, 1193 int64_t FinalImmDisp, SMLoc &BracLoc, 1194 SMLoc &StartInBrac, SMLoc &End) { 1195 // Remove the '[' and ']' from the IR string. 1196 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1)); 1197 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1)); 1198 1199 // If ImmDisp is non-zero, then we parsed a displacement before the 1200 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp]) 1201 // If ImmDisp doesn't match the displacement computed by the state machine 1202 // then we have an additional displacement in the bracketed expression. 1203 if (ImmDisp != FinalImmDisp) { 1204 if (ImmDisp) { 1205 // We have an immediate displacement before the bracketed expression. 1206 // Adjust this to match the final immediate displacement. 1207 bool Found = false; 1208 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(), 1209 E = AsmRewrites->end(); I != E; ++I) { 1210 if ((*I).Loc.getPointer() > BracLoc.getPointer()) 1211 continue; 1212 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) { 1213 assert (!Found && "ImmDisp already rewritten."); 1214 (*I).Kind = AOK_Imm; 1215 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer(); 1216 (*I).Val = FinalImmDisp; 1217 Found = true; 1218 break; 1219 } 1220 } 1221 assert (Found && "Unable to rewrite ImmDisp."); 1222 (void)Found; 1223 } else { 1224 // We have a symbolic and an immediate displacement, but no displacement 1225 // before the bracketed expression. Put the immediate displacement 1226 // before the bracketed expression. 1227 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp)); 1228 } 1229 } 1230 // Remove all the ImmPrefix rewrites within the brackets. 1231 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(), 1232 E = AsmRewrites->end(); I != E; ++I) { 1233 if ((*I).Loc.getPointer() < StartInBrac.getPointer()) 1234 continue; 1235 if ((*I).Kind == AOK_ImmPrefix) 1236 (*I).Kind = AOK_Delete; 1237 } 1238 const char *SymLocPtr = SymName.data(); 1239 // Skip everything before the symbol. 1240 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) { 1241 assert(Len > 0 && "Expected a non-negative length."); 1242 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len)); 1243 } 1244 // Skip everything after the symbol. 1245 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) { 1246 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size()); 1247 assert(Len > 0 && "Expected a non-negative length."); 1248 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len)); 1249 } 1250 } 1251 1252 X86Operand * 1253 X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { 1254 const AsmToken &Tok = Parser.getTok(); 1255 1256 bool Done = false; 1257 while (!Done) { 1258 bool UpdateLocLex = true; 1259 1260 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an 1261 // identifier. Don't try an parse it as a register. 1262 if (Tok.getString().startswith(".")) 1263 break; 1264 1265 // If we're parsing an immediate expression, we don't expect a '['. 1266 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac) 1267 break; 1268 1269 switch (getLexer().getKind()) { 1270 default: { 1271 if (SM.isValidEndState()) { 1272 Done = true; 1273 break; 1274 } 1275 return ErrorOperand(Tok.getLoc(), "Unexpected token!"); 1276 } 1277 case AsmToken::EndOfStatement: { 1278 Done = true; 1279 break; 1280 } 1281 case AsmToken::Identifier: { 1282 // This could be a register or a symbolic displacement. 1283 unsigned TmpReg; 1284 const MCExpr *Val; 1285 SMLoc IdentLoc = Tok.getLoc(); 1286 StringRef Identifier = Tok.getString(); 1287 if(!ParseRegister(TmpReg, IdentLoc, End)) { 1288 SM.onRegister(TmpReg); 1289 UpdateLocLex = false; 1290 break; 1291 } else { 1292 if (!isParsingInlineAsm()) { 1293 if (getParser().parsePrimaryExpr(Val, End)) 1294 return ErrorOperand(Tok.getLoc(), "Unexpected identifier!"); 1295 } else { 1296 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); 1297 if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info, 1298 /*Unevaluated*/ false, End)) 1299 return Err; 1300 } 1301 SM.onIdentifierExpr(Val, Identifier); 1302 UpdateLocLex = false; 1303 break; 1304 } 1305 return ErrorOperand(Tok.getLoc(), "Unexpected identifier!"); 1306 } 1307 case AsmToken::Integer: 1308 if (isParsingInlineAsm() && SM.getAddImmPrefix()) 1309 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, 1310 Tok.getLoc())); 1311 SM.onInteger(Tok.getIntVal()); 1312 break; 1313 case AsmToken::Plus: SM.onPlus(); break; 1314 case AsmToken::Minus: SM.onMinus(); break; 1315 case AsmToken::Star: SM.onStar(); break; 1316 case AsmToken::Slash: SM.onDivide(); break; 1317 case AsmToken::LBrac: SM.onLBrac(); break; 1318 case AsmToken::RBrac: SM.onRBrac(); break; 1319 case AsmToken::LParen: SM.onLParen(); break; 1320 case AsmToken::RParen: SM.onRParen(); break; 1321 } 1322 if (SM.hadError()) 1323 return ErrorOperand(Tok.getLoc(), "Unexpected token!"); 1324 1325 if (!Done && UpdateLocLex) { 1326 End = Tok.getLoc(); 1327 Parser.Lex(); // Consume the token. 1328 } 1329 } 1330 return 0; 1331 } 1332 1333 X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start, 1334 int64_t ImmDisp, 1335 unsigned Size) { 1336 const AsmToken &Tok = Parser.getTok(); 1337 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc(); 1338 if (getLexer().isNot(AsmToken::LBrac)) 1339 return ErrorOperand(BracLoc, "Expected '[' token!"); 1340 Parser.Lex(); // Eat '[' 1341 1342 SMLoc StartInBrac = Tok.getLoc(); 1343 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We 1344 // may have already parsed an immediate displacement before the bracketed 1345 // expression. 1346 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true); 1347 if (X86Operand *Err = ParseIntelExpression(SM, End)) 1348 return Err; 1349 1350 const MCExpr *Disp; 1351 if (const MCExpr *Sym = SM.getSym()) { 1352 // A symbolic displacement. 1353 Disp = Sym; 1354 if (isParsingInlineAsm()) 1355 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(), 1356 ImmDisp, SM.getImm(), BracLoc, StartInBrac, 1357 End); 1358 } else { 1359 // An immediate displacement only. 1360 Disp = MCConstantExpr::Create(SM.getImm(), getContext()); 1361 } 1362 1363 // Parse the dot operator (e.g., [ebx].foo.bar). 1364 if (Tok.getString().startswith(".")) { 1365 const MCExpr *NewDisp; 1366 if (X86Operand *Err = ParseIntelDotOperator(Disp, NewDisp)) 1367 return Err; 1368 1369 End = Tok.getEndLoc(); 1370 Parser.Lex(); // Eat the field. 1371 Disp = NewDisp; 1372 } 1373 1374 int BaseReg = SM.getBaseReg(); 1375 int IndexReg = SM.getIndexReg(); 1376 int Scale = SM.getScale(); 1377 if (!isParsingInlineAsm()) { 1378 // handle [-42] 1379 if (!BaseReg && !IndexReg) { 1380 if (!SegReg) 1381 return X86Operand::CreateMem(Disp, Start, End, Size); 1382 else 1383 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size); 1384 } 1385 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start, 1386 End, Size); 1387 } 1388 1389 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); 1390 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start, 1391 End, Size, SM.getSymName(), Info); 1392 } 1393 1394 // Inline assembly may use variable names with namespace alias qualifiers. 1395 X86Operand *X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val, 1396 StringRef &Identifier, 1397 InlineAsmIdentifierInfo &Info, 1398 bool IsUnevaluatedOperand, 1399 SMLoc &End) { 1400 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly."); 1401 Val = 0; 1402 1403 StringRef LineBuf(Identifier.data()); 1404 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand); 1405 1406 const AsmToken &Tok = Parser.getTok(); 1407 1408 // Advance the token stream until the end of the current token is 1409 // after the end of what the frontend claimed. 1410 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size(); 1411 while (true) { 1412 End = Tok.getEndLoc(); 1413 getLexer().Lex(); 1414 1415 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?"); 1416 if (End.getPointer() == EndPtr) break; 1417 } 1418 1419 // Create the symbol reference. 1420 Identifier = LineBuf; 1421 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier); 1422 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 1423 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext()); 1424 return 0; 1425 } 1426 1427 /// ParseIntelMemOperand - Parse intel style memory operand. 1428 X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, 1429 int64_t ImmDisp, 1430 SMLoc Start) { 1431 const AsmToken &Tok = Parser.getTok(); 1432 SMLoc End; 1433 1434 unsigned Size = getIntelMemOperandSize(Tok.getString()); 1435 if (Size) { 1436 Parser.Lex(); // Eat operand size (e.g., byte, word). 1437 if (Tok.getString() != "PTR" && Tok.getString() != "ptr") 1438 return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!"); 1439 Parser.Lex(); // Eat ptr. 1440 } 1441 1442 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. 1443 if (getLexer().is(AsmToken::Integer)) { 1444 if (isParsingInlineAsm()) 1445 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, 1446 Tok.getLoc())); 1447 int64_t ImmDisp = Tok.getIntVal(); 1448 Parser.Lex(); // Eat the integer. 1449 if (getLexer().isNot(AsmToken::LBrac)) 1450 return ErrorOperand(Start, "Expected '[' token!"); 1451 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size); 1452 } 1453 1454 if (getLexer().is(AsmToken::LBrac)) 1455 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size); 1456 1457 if (!ParseRegister(SegReg, Start, End)) { 1458 // Handel SegReg : [ ... ] 1459 if (getLexer().isNot(AsmToken::Colon)) 1460 return ErrorOperand(Start, "Expected ':' token!"); 1461 Parser.Lex(); // Eat : 1462 if (getLexer().isNot(AsmToken::LBrac)) 1463 return ErrorOperand(Start, "Expected '[' token!"); 1464 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size); 1465 } 1466 1467 const MCExpr *Val; 1468 if (!isParsingInlineAsm()) { 1469 if (getParser().parsePrimaryExpr(Val, End)) 1470 return ErrorOperand(Tok.getLoc(), "Unexpected token!"); 1471 1472 return X86Operand::CreateMem(Val, Start, End, Size); 1473 } 1474 1475 InlineAsmIdentifierInfo Info; 1476 StringRef Identifier = Tok.getString(); 1477 if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info, 1478 /*Unevaluated*/ false, End)) 1479 return Err; 1480 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0, 1481 /*Scale=*/1, Start, End, Size, Identifier, Info); 1482 } 1483 1484 /// Parse the '.' operator. 1485 X86Operand *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, 1486 const MCExpr *&NewDisp) { 1487 const AsmToken &Tok = Parser.getTok(); 1488 int64_t OrigDispVal, DotDispVal; 1489 1490 // FIXME: Handle non-constant expressions. 1491 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) 1492 OrigDispVal = OrigDisp->getValue(); 1493 else 1494 return ErrorOperand(Tok.getLoc(), "Non-constant offsets are not supported!"); 1495 1496 // Drop the '.'. 1497 StringRef DotDispStr = Tok.getString().drop_front(1); 1498 1499 // .Imm gets lexed as a real. 1500 if (Tok.is(AsmToken::Real)) { 1501 APInt DotDisp; 1502 DotDispStr.getAsInteger(10, DotDisp); 1503 DotDispVal = DotDisp.getZExtValue(); 1504 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { 1505 unsigned DotDisp; 1506 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.'); 1507 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second, 1508 DotDisp)) 1509 return ErrorOperand(Tok.getLoc(), "Unable to lookup field reference!"); 1510 DotDispVal = DotDisp; 1511 } else 1512 return ErrorOperand(Tok.getLoc(), "Unexpected token type!"); 1513 1514 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { 1515 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data()); 1516 unsigned Len = DotDispStr.size(); 1517 unsigned Val = OrigDispVal + DotDispVal; 1518 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len, 1519 Val)); 1520 } 1521 1522 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext()); 1523 return 0; 1524 } 1525 1526 /// Parse the 'offset' operator. This operator is used to specify the 1527 /// location rather then the content of a variable. 1528 X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() { 1529 const AsmToken &Tok = Parser.getTok(); 1530 SMLoc OffsetOfLoc = Tok.getLoc(); 1531 Parser.Lex(); // Eat offset. 1532 1533 const MCExpr *Val; 1534 InlineAsmIdentifierInfo Info; 1535 SMLoc Start = Tok.getLoc(), End; 1536 StringRef Identifier = Tok.getString(); 1537 if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info, 1538 /*Unevaluated*/ false, End)) 1539 return Err; 1540 1541 // Don't emit the offset operator. 1542 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7)); 1543 1544 // The offset operator will have an 'r' constraint, thus we need to create 1545 // register operand to ensure proper matching. Just pick a GPR based on 1546 // the size of a pointer. 1547 unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; 1548 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true, 1549 OffsetOfLoc, Identifier, Info.OpDecl); 1550 } 1551 1552 enum IntelOperatorKind { 1553 IOK_LENGTH, 1554 IOK_SIZE, 1555 IOK_TYPE 1556 }; 1557 1558 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator 1559 /// returns the number of elements in an array. It returns the value 1 for 1560 /// non-array variables. The SIZE operator returns the size of a C or C++ 1561 /// variable. A variable's size is the product of its LENGTH and TYPE. The 1562 /// TYPE operator returns the size of a C or C++ type or variable. If the 1563 /// variable is an array, TYPE returns the size of a single element. 1564 X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) { 1565 const AsmToken &Tok = Parser.getTok(); 1566 SMLoc TypeLoc = Tok.getLoc(); 1567 Parser.Lex(); // Eat operator. 1568 1569 const MCExpr *Val = 0; 1570 InlineAsmIdentifierInfo Info; 1571 SMLoc Start = Tok.getLoc(), End; 1572 StringRef Identifier = Tok.getString(); 1573 if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info, 1574 /*Unevaluated*/ true, End)) 1575 return Err; 1576 1577 unsigned CVal = 0; 1578 switch(OpKind) { 1579 default: llvm_unreachable("Unexpected operand kind!"); 1580 case IOK_LENGTH: CVal = Info.Length; break; 1581 case IOK_SIZE: CVal = Info.Size; break; 1582 case IOK_TYPE: CVal = Info.Type; break; 1583 } 1584 1585 // Rewrite the type operator and the C or C++ type or variable in terms of an 1586 // immediate. E.g. TYPE foo -> $$4 1587 unsigned Len = End.getPointer() - TypeLoc.getPointer(); 1588 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal)); 1589 1590 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext()); 1591 return X86Operand::CreateImm(Imm, Start, End); 1592 } 1593 1594 X86Operand *X86AsmParser::ParseIntelOperand() { 1595 const AsmToken &Tok = Parser.getTok(); 1596 SMLoc Start = Tok.getLoc(), End; 1597 1598 // Offset, length, type and size operators. 1599 if (isParsingInlineAsm()) { 1600 StringRef AsmTokStr = Tok.getString(); 1601 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET") 1602 return ParseIntelOffsetOfOperator(); 1603 if (AsmTokStr == "length" || AsmTokStr == "LENGTH") 1604 return ParseIntelOperator(IOK_LENGTH); 1605 if (AsmTokStr == "size" || AsmTokStr == "SIZE") 1606 return ParseIntelOperator(IOK_SIZE); 1607 if (AsmTokStr == "type" || AsmTokStr == "TYPE") 1608 return ParseIntelOperator(IOK_TYPE); 1609 } 1610 1611 // Immediate. 1612 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) || 1613 getLexer().is(AsmToken::LParen)) { 1614 AsmToken StartTok = Tok; 1615 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true, 1616 /*AddImmPrefix=*/false); 1617 if (X86Operand *Err = ParseIntelExpression(SM, End)) 1618 return Err; 1619 1620 int64_t Imm = SM.getImm(); 1621 if (isParsingInlineAsm()) { 1622 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer(); 1623 if (StartTok.getString().size() == Len) 1624 // Just add a prefix if this wasn't a complex immediate expression. 1625 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start)); 1626 else 1627 // Otherwise, rewrite the complex expression as a single immediate. 1628 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm)); 1629 } 1630 1631 if (getLexer().isNot(AsmToken::LBrac)) { 1632 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext()); 1633 return X86Operand::CreateImm(ImmExpr, Start, End); 1634 } 1635 1636 // Only positive immediates are valid. 1637 if (Imm < 0) 1638 return ErrorOperand(Start, "expected a positive immediate displacement " 1639 "before bracketed expr."); 1640 1641 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. 1642 return ParseIntelMemOperand(/*SegReg=*/0, Imm, Start); 1643 } 1644 1645 // Register. 1646 unsigned RegNo = 0; 1647 if (!ParseRegister(RegNo, Start, End)) { 1648 // If this is a segment register followed by a ':', then this is the start 1649 // of a memory reference, otherwise this is a normal register reference. 1650 if (getLexer().isNot(AsmToken::Colon)) 1651 return X86Operand::CreateReg(RegNo, Start, End); 1652 1653 getParser().Lex(); // Eat the colon. 1654 return ParseIntelMemOperand(/*SegReg=*/RegNo, /*Disp=*/0, Start); 1655 } 1656 1657 // Memory operand. 1658 return ParseIntelMemOperand(/*SegReg=*/0, /*Disp=*/0, Start); 1659 } 1660 1661 X86Operand *X86AsmParser::ParseATTOperand() { 1662 switch (getLexer().getKind()) { 1663 default: 1664 // Parse a memory operand with no segment register. 1665 return ParseMemOperand(0, Parser.getTok().getLoc()); 1666 case AsmToken::Percent: { 1667 // Read the register. 1668 unsigned RegNo; 1669 SMLoc Start, End; 1670 if (ParseRegister(RegNo, Start, End)) return 0; 1671 if (RegNo == X86::EIZ || RegNo == X86::RIZ) { 1672 Error(Start, "%eiz and %riz can only be used as index registers", 1673 SMRange(Start, End)); 1674 return 0; 1675 } 1676 1677 // If this is a segment register followed by a ':', then this is the start 1678 // of a memory reference, otherwise this is a normal register reference. 1679 if (getLexer().isNot(AsmToken::Colon)) 1680 return X86Operand::CreateReg(RegNo, Start, End); 1681 1682 getParser().Lex(); // Eat the colon. 1683 return ParseMemOperand(RegNo, Start); 1684 } 1685 case AsmToken::Dollar: { 1686 // $42 -> immediate. 1687 SMLoc Start = Parser.getTok().getLoc(), End; 1688 Parser.Lex(); 1689 const MCExpr *Val; 1690 if (getParser().parseExpression(Val, End)) 1691 return 0; 1692 return X86Operand::CreateImm(Val, Start, End); 1693 } 1694 } 1695 } 1696 1697 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix 1698 /// has already been parsed if present. 1699 X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { 1700 1701 // We have to disambiguate a parenthesized expression "(4+5)" from the start 1702 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The 1703 // only way to do this without lookahead is to eat the '(' and see what is 1704 // after it. 1705 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); 1706 if (getLexer().isNot(AsmToken::LParen)) { 1707 SMLoc ExprEnd; 1708 if (getParser().parseExpression(Disp, ExprEnd)) return 0; 1709 1710 // After parsing the base expression we could either have a parenthesized 1711 // memory address or not. If not, return now. If so, eat the (. 1712 if (getLexer().isNot(AsmToken::LParen)) { 1713 // Unless we have a segment register, treat this as an immediate. 1714 if (SegReg == 0) 1715 return X86Operand::CreateMem(Disp, MemStart, ExprEnd); 1716 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); 1717 } 1718 1719 // Eat the '('. 1720 Parser.Lex(); 1721 } else { 1722 // Okay, we have a '('. We don't know if this is an expression or not, but 1723 // so we have to eat the ( to see beyond it. 1724 SMLoc LParenLoc = Parser.getTok().getLoc(); 1725 Parser.Lex(); // Eat the '('. 1726 1727 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) { 1728 // Nothing to do here, fall into the code below with the '(' part of the 1729 // memory operand consumed. 1730 } else { 1731 SMLoc ExprEnd; 1732 1733 // It must be an parenthesized expression, parse it now. 1734 if (getParser().parseParenExpression(Disp, ExprEnd)) 1735 return 0; 1736 1737 // After parsing the base expression we could either have a parenthesized 1738 // memory address or not. If not, return now. If so, eat the (. 1739 if (getLexer().isNot(AsmToken::LParen)) { 1740 // Unless we have a segment register, treat this as an immediate. 1741 if (SegReg == 0) 1742 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd); 1743 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); 1744 } 1745 1746 // Eat the '('. 1747 Parser.Lex(); 1748 } 1749 } 1750 1751 // If we reached here, then we just ate the ( of the memory operand. Process 1752 // the rest of the memory operand. 1753 unsigned BaseReg = 0, IndexReg = 0, Scale = 1; 1754 SMLoc IndexLoc; 1755 1756 if (getLexer().is(AsmToken::Percent)) { 1757 SMLoc StartLoc, EndLoc; 1758 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0; 1759 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) { 1760 Error(StartLoc, "eiz and riz can only be used as index registers", 1761 SMRange(StartLoc, EndLoc)); 1762 return 0; 1763 } 1764 } 1765 1766 if (getLexer().is(AsmToken::Comma)) { 1767 Parser.Lex(); // Eat the comma. 1768 IndexLoc = Parser.getTok().getLoc(); 1769 1770 // Following the comma we should have either an index register, or a scale 1771 // value. We don't support the later form, but we want to parse it 1772 // correctly. 1773 // 1774 // Not that even though it would be completely consistent to support syntax 1775 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this. 1776 if (getLexer().is(AsmToken::Percent)) { 1777 SMLoc L; 1778 if (ParseRegister(IndexReg, L, L)) return 0; 1779 1780 if (getLexer().isNot(AsmToken::RParen)) { 1781 // Parse the scale amount: 1782 // ::= ',' [scale-expression] 1783 if (getLexer().isNot(AsmToken::Comma)) { 1784 Error(Parser.getTok().getLoc(), 1785 "expected comma in scale expression"); 1786 return 0; 1787 } 1788 Parser.Lex(); // Eat the comma. 1789 1790 if (getLexer().isNot(AsmToken::RParen)) { 1791 SMLoc Loc = Parser.getTok().getLoc(); 1792 1793 int64_t ScaleVal; 1794 if (getParser().parseAbsoluteExpression(ScaleVal)){ 1795 Error(Loc, "expected scale expression"); 1796 return 0; 1797 } 1798 1799 // Validate the scale amount. 1800 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){ 1801 Error(Loc, "scale factor in address must be 1, 2, 4 or 8"); 1802 return 0; 1803 } 1804 Scale = (unsigned)ScaleVal; 1805 } 1806 } 1807 } else if (getLexer().isNot(AsmToken::RParen)) { 1808 // A scale amount without an index is ignored. 1809 // index. 1810 SMLoc Loc = Parser.getTok().getLoc(); 1811 1812 int64_t Value; 1813 if (getParser().parseAbsoluteExpression(Value)) 1814 return 0; 1815 1816 if (Value != 1) 1817 Warning(Loc, "scale factor without index register is ignored"); 1818 Scale = 1; 1819 } 1820 } 1821 1822 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. 1823 if (getLexer().isNot(AsmToken::RParen)) { 1824 Error(Parser.getTok().getLoc(), "unexpected token in memory operand"); 1825 return 0; 1826 } 1827 SMLoc MemEnd = Parser.getTok().getEndLoc(); 1828 Parser.Lex(); // Eat the ')'. 1829 1830 // If we have both a base register and an index register make sure they are 1831 // both 64-bit or 32-bit registers. 1832 // To support VSIB, IndexReg can be 128-bit or 256-bit registers. 1833 if (BaseReg != 0 && IndexReg != 0) { 1834 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && 1835 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1836 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) && 1837 IndexReg != X86::RIZ) { 1838 Error(IndexLoc, "index register is 32-bit, but base register is 64-bit"); 1839 return 0; 1840 } 1841 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) && 1842 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1843 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) && 1844 IndexReg != X86::EIZ){ 1845 Error(IndexLoc, "index register is 64-bit, but base register is 32-bit"); 1846 return 0; 1847 } 1848 } 1849 1850 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, 1851 MemStart, MemEnd); 1852 } 1853 1854 bool X86AsmParser:: 1855 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, 1856 SmallVectorImpl<MCParsedAsmOperand*> &Operands) { 1857 InstInfo = &Info; 1858 StringRef PatchedName = Name; 1859 1860 // FIXME: Hack to recognize setneb as setne. 1861 if (PatchedName.startswith("set") && PatchedName.endswith("b") && 1862 PatchedName != "setb" && PatchedName != "setnb") 1863 PatchedName = PatchedName.substr(0, Name.size()-1); 1864 1865 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}. 1866 const MCExpr *ExtraImmOp = 0; 1867 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && 1868 (PatchedName.endswith("ss") || PatchedName.endswith("sd") || 1869 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { 1870 bool IsVCMP = PatchedName[0] == 'v'; 1871 unsigned SSECCIdx = IsVCMP ? 4 : 3; 1872 unsigned SSEComparisonCode = StringSwitch<unsigned>( 1873 PatchedName.slice(SSECCIdx, PatchedName.size() - 2)) 1874 .Case("eq", 0x00) 1875 .Case("lt", 0x01) 1876 .Case("le", 0x02) 1877 .Case("unord", 0x03) 1878 .Case("neq", 0x04) 1879 .Case("nlt", 0x05) 1880 .Case("nle", 0x06) 1881 .Case("ord", 0x07) 1882 /* AVX only from here */ 1883 .Case("eq_uq", 0x08) 1884 .Case("nge", 0x09) 1885 .Case("ngt", 0x0A) 1886 .Case("false", 0x0B) 1887 .Case("neq_oq", 0x0C) 1888 .Case("ge", 0x0D) 1889 .Case("gt", 0x0E) 1890 .Case("true", 0x0F) 1891 .Case("eq_os", 0x10) 1892 .Case("lt_oq", 0x11) 1893 .Case("le_oq", 0x12) 1894 .Case("unord_s", 0x13) 1895 .Case("neq_us", 0x14) 1896 .Case("nlt_uq", 0x15) 1897 .Case("nle_uq", 0x16) 1898 .Case("ord_s", 0x17) 1899 .Case("eq_us", 0x18) 1900 .Case("nge_uq", 0x19) 1901 .Case("ngt_uq", 0x1A) 1902 .Case("false_os", 0x1B) 1903 .Case("neq_os", 0x1C) 1904 .Case("ge_oq", 0x1D) 1905 .Case("gt_oq", 0x1E) 1906 .Case("true_us", 0x1F) 1907 .Default(~0U); 1908 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) { 1909 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode, 1910 getParser().getContext()); 1911 if (PatchedName.endswith("ss")) { 1912 PatchedName = IsVCMP ? "vcmpss" : "cmpss"; 1913 } else if (PatchedName.endswith("sd")) { 1914 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd"; 1915 } else if (PatchedName.endswith("ps")) { 1916 PatchedName = IsVCMP ? "vcmpps" : "cmpps"; 1917 } else { 1918 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!"); 1919 PatchedName = IsVCMP ? "vcmppd" : "cmppd"; 1920 } 1921 } 1922 } 1923 1924 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); 1925 1926 if (ExtraImmOp && !isParsingIntelSyntax()) 1927 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc)); 1928 1929 // Determine whether this is an instruction prefix. 1930 bool isPrefix = 1931 Name == "lock" || Name == "rep" || 1932 Name == "repe" || Name == "repz" || 1933 Name == "repne" || Name == "repnz" || 1934 Name == "rex64" || Name == "data16"; 1935 1936 1937 // This does the actual operand parsing. Don't parse any more if we have a 1938 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we 1939 // just want to parse the "lock" as the first instruction and the "incl" as 1940 // the next one. 1941 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) { 1942 1943 // Parse '*' modifier. 1944 if (getLexer().is(AsmToken::Star)) { 1945 SMLoc Loc = Parser.getTok().getLoc(); 1946 Operands.push_back(X86Operand::CreateToken("*", Loc)); 1947 Parser.Lex(); // Eat the star. 1948 } 1949 1950 // Read the first operand. 1951 if (X86Operand *Op = ParseOperand()) 1952 Operands.push_back(Op); 1953 else { 1954 Parser.eatToEndOfStatement(); 1955 return true; 1956 } 1957 1958 while (getLexer().is(AsmToken::Comma)) { 1959 Parser.Lex(); // Eat the comma. 1960 1961 // Parse and remember the operand. 1962 if (X86Operand *Op = ParseOperand()) 1963 Operands.push_back(Op); 1964 else { 1965 Parser.eatToEndOfStatement(); 1966 return true; 1967 } 1968 } 1969 1970 if (getLexer().isNot(AsmToken::EndOfStatement)) { 1971 SMLoc Loc = getLexer().getLoc(); 1972 Parser.eatToEndOfStatement(); 1973 return Error(Loc, "unexpected token in argument list"); 1974 } 1975 } 1976 1977 if (getLexer().is(AsmToken::EndOfStatement)) 1978 Parser.Lex(); // Consume the EndOfStatement 1979 else if (isPrefix && getLexer().is(AsmToken::Slash)) 1980 Parser.Lex(); // Consume the prefix separator Slash 1981 1982 if (ExtraImmOp && isParsingIntelSyntax()) 1983 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc)); 1984 1985 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" -> 1986 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely 1987 // documented form in various unofficial manuals, so a lot of code uses it. 1988 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") && 1989 Operands.size() == 3) { 1990 X86Operand &Op = *(X86Operand*)Operands.back(); 1991 if (Op.isMem() && Op.Mem.SegReg == 0 && 1992 isa<MCConstantExpr>(Op.Mem.Disp) && 1993 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 1994 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { 1995 SMLoc Loc = Op.getEndLoc(); 1996 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); 1997 delete &Op; 1998 } 1999 } 2000 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al". 2001 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") && 2002 Operands.size() == 3) { 2003 X86Operand &Op = *(X86Operand*)Operands.begin()[1]; 2004 if (Op.isMem() && Op.Mem.SegReg == 0 && 2005 isa<MCConstantExpr>(Op.Mem.Disp) && 2006 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 2007 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { 2008 SMLoc Loc = Op.getEndLoc(); 2009 Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); 2010 delete &Op; 2011 } 2012 } 2013 // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]" 2014 if (Name.startswith("ins") && Operands.size() == 3 && 2015 (Name == "insb" || Name == "insw" || Name == "insl")) { 2016 X86Operand &Op = *(X86Operand*)Operands.begin()[1]; 2017 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; 2018 if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) { 2019 Operands.pop_back(); 2020 Operands.pop_back(); 2021 delete &Op; 2022 delete &Op2; 2023 } 2024 } 2025 2026 // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]" 2027 if (Name.startswith("outs") && Operands.size() == 3 && 2028 (Name == "outsb" || Name == "outsw" || Name == "outsl")) { 2029 X86Operand &Op = *(X86Operand*)Operands.begin()[1]; 2030 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; 2031 if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) { 2032 Operands.pop_back(); 2033 Operands.pop_back(); 2034 delete &Op; 2035 delete &Op2; 2036 } 2037 } 2038 2039 // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]" 2040 if (Name.startswith("movs") && Operands.size() == 3 && 2041 (Name == "movsb" || Name == "movsw" || Name == "movsl" || 2042 (is64BitMode() && Name == "movsq"))) { 2043 X86Operand &Op = *(X86Operand*)Operands.begin()[1]; 2044 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; 2045 if (isSrcOp(Op) && isDstOp(Op2)) { 2046 Operands.pop_back(); 2047 Operands.pop_back(); 2048 delete &Op; 2049 delete &Op2; 2050 } 2051 } 2052 // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]" 2053 if (Name.startswith("lods") && Operands.size() == 3 && 2054 (Name == "lods" || Name == "lodsb" || Name == "lodsw" || 2055 Name == "lodsl" || (is64BitMode() && Name == "lodsq"))) { 2056 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); 2057 X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]); 2058 if (isSrcOp(*Op1) && Op2->isReg()) { 2059 const char *ins; 2060 unsigned reg = Op2->getReg(); 2061 bool isLods = Name == "lods"; 2062 if (reg == X86::AL && (isLods || Name == "lodsb")) 2063 ins = "lodsb"; 2064 else if (reg == X86::AX && (isLods || Name == "lodsw")) 2065 ins = "lodsw"; 2066 else if (reg == X86::EAX && (isLods || Name == "lodsl")) 2067 ins = "lodsl"; 2068 else if (reg == X86::RAX && (isLods || Name == "lodsq")) 2069 ins = "lodsq"; 2070 else 2071 ins = NULL; 2072 if (ins != NULL) { 2073 Operands.pop_back(); 2074 Operands.pop_back(); 2075 delete Op1; 2076 delete Op2; 2077 if (Name != ins) 2078 static_cast<X86Operand*>(Operands[0])->setTokenValue(ins); 2079 } 2080 } 2081 } 2082 // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]" 2083 if (Name.startswith("stos") && Operands.size() == 3 && 2084 (Name == "stos" || Name == "stosb" || Name == "stosw" || 2085 Name == "stosl" || (is64BitMode() && Name == "stosq"))) { 2086 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); 2087 X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]); 2088 if (isDstOp(*Op2) && Op1->isReg()) { 2089 const char *ins; 2090 unsigned reg = Op1->getReg(); 2091 bool isStos = Name == "stos"; 2092 if (reg == X86::AL && (isStos || Name == "stosb")) 2093 ins = "stosb"; 2094 else if (reg == X86::AX && (isStos || Name == "stosw")) 2095 ins = "stosw"; 2096 else if (reg == X86::EAX && (isStos || Name == "stosl")) 2097 ins = "stosl"; 2098 else if (reg == X86::RAX && (isStos || Name == "stosq")) 2099 ins = "stosq"; 2100 else 2101 ins = NULL; 2102 if (ins != NULL) { 2103 Operands.pop_back(); 2104 Operands.pop_back(); 2105 delete Op1; 2106 delete Op2; 2107 if (Name != ins) 2108 static_cast<X86Operand*>(Operands[0])->setTokenValue(ins); 2109 } 2110 } 2111 } 2112 2113 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to 2114 // "shift <op>". 2115 if ((Name.startswith("shr") || Name.startswith("sar") || 2116 Name.startswith("shl") || Name.startswith("sal") || 2117 Name.startswith("rcl") || Name.startswith("rcr") || 2118 Name.startswith("rol") || Name.startswith("ror")) && 2119 Operands.size() == 3) { 2120 if (isParsingIntelSyntax()) { 2121 // Intel syntax 2122 X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]); 2123 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && 2124 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) { 2125 delete Operands[2]; 2126 Operands.pop_back(); 2127 } 2128 } else { 2129 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); 2130 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && 2131 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) { 2132 delete Operands[1]; 2133 Operands.erase(Operands.begin() + 1); 2134 } 2135 } 2136 } 2137 2138 // Transforms "int $3" into "int3" as a size optimization. We can't write an 2139 // instalias with an immediate operand yet. 2140 if (Name == "int" && Operands.size() == 2) { 2141 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); 2142 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && 2143 cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) { 2144 delete Operands[1]; 2145 Operands.erase(Operands.begin() + 1); 2146 static_cast<X86Operand*>(Operands[0])->setTokenValue("int3"); 2147 } 2148 } 2149 2150 return false; 2151 } 2152 2153 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg, 2154 bool isCmp) { 2155 MCInst TmpInst; 2156 TmpInst.setOpcode(Opcode); 2157 if (!isCmp) 2158 TmpInst.addOperand(MCOperand::CreateReg(Reg)); 2159 TmpInst.addOperand(MCOperand::CreateReg(Reg)); 2160 TmpInst.addOperand(Inst.getOperand(0)); 2161 Inst = TmpInst; 2162 return true; 2163 } 2164 2165 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode, 2166 bool isCmp = false) { 2167 if (!Inst.getOperand(0).isImm() || 2168 !isImmSExti16i8Value(Inst.getOperand(0).getImm())) 2169 return false; 2170 2171 return convertToSExti8(Inst, Opcode, X86::AX, isCmp); 2172 } 2173 2174 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode, 2175 bool isCmp = false) { 2176 if (!Inst.getOperand(0).isImm() || 2177 !isImmSExti32i8Value(Inst.getOperand(0).getImm())) 2178 return false; 2179 2180 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp); 2181 } 2182 2183 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode, 2184 bool isCmp = false) { 2185 if (!Inst.getOperand(0).isImm() || 2186 !isImmSExti64i8Value(Inst.getOperand(0).getImm())) 2187 return false; 2188 2189 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp); 2190 } 2191 2192 bool X86AsmParser:: 2193 processInstruction(MCInst &Inst, 2194 const SmallVectorImpl<MCParsedAsmOperand*> &Ops) { 2195 switch (Inst.getOpcode()) { 2196 default: return false; 2197 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8); 2198 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8); 2199 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8); 2200 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8); 2201 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8); 2202 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8); 2203 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8); 2204 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8); 2205 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8); 2206 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true); 2207 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true); 2208 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true); 2209 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8); 2210 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8); 2211 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8); 2212 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8); 2213 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8); 2214 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8); 2215 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8); 2216 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8); 2217 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8); 2218 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8); 2219 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8); 2220 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8); 2221 } 2222 } 2223 2224 static const char *getSubtargetFeatureName(unsigned Val); 2225 bool X86AsmParser:: 2226 MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 2227 SmallVectorImpl<MCParsedAsmOperand*> &Operands, 2228 MCStreamer &Out, unsigned &ErrorInfo, 2229 bool MatchingInlineAsm) { 2230 assert(!Operands.empty() && "Unexpect empty operand list!"); 2231 X86Operand *Op = static_cast<X86Operand*>(Operands[0]); 2232 assert(Op->isToken() && "Leading operand should always be a mnemonic!"); 2233 ArrayRef<SMRange> EmptyRanges = None; 2234 2235 // First, handle aliases that expand to multiple instructions. 2236 // FIXME: This should be replaced with a real .td file alias mechanism. 2237 // Also, MatchInstructionImpl should actually *do* the EmitInstruction 2238 // call. 2239 if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" || 2240 Op->getToken() == "fstsww" || Op->getToken() == "fstcww" || 2241 Op->getToken() == "finit" || Op->getToken() == "fsave" || 2242 Op->getToken() == "fstenv" || Op->getToken() == "fclex") { 2243 MCInst Inst; 2244 Inst.setOpcode(X86::WAIT); 2245 Inst.setLoc(IDLoc); 2246 if (!MatchingInlineAsm) 2247 Out.EmitInstruction(Inst); 2248 2249 const char *Repl = 2250 StringSwitch<const char*>(Op->getToken()) 2251 .Case("finit", "fninit") 2252 .Case("fsave", "fnsave") 2253 .Case("fstcw", "fnstcw") 2254 .Case("fstcww", "fnstcw") 2255 .Case("fstenv", "fnstenv") 2256 .Case("fstsw", "fnstsw") 2257 .Case("fstsww", "fnstsw") 2258 .Case("fclex", "fnclex") 2259 .Default(0); 2260 assert(Repl && "Unknown wait-prefixed instruction"); 2261 delete Operands[0]; 2262 Operands[0] = X86Operand::CreateToken(Repl, IDLoc); 2263 } 2264 2265 bool WasOriginallyInvalidOperand = false; 2266 MCInst Inst; 2267 2268 // First, try a direct match. 2269 switch (MatchInstructionImpl(Operands, Inst, 2270 ErrorInfo, MatchingInlineAsm, 2271 isParsingIntelSyntax())) { 2272 default: break; 2273 case Match_Success: 2274 // Some instructions need post-processing to, for example, tweak which 2275 // encoding is selected. Loop on it while changes happen so the 2276 // individual transformations can chain off each other. 2277 if (!MatchingInlineAsm) 2278 while (processInstruction(Inst, Operands)) 2279 ; 2280 2281 Inst.setLoc(IDLoc); 2282 if (!MatchingInlineAsm) 2283 Out.EmitInstruction(Inst); 2284 Opcode = Inst.getOpcode(); 2285 return false; 2286 case Match_MissingFeature: { 2287 assert(ErrorInfo && "Unknown missing feature!"); 2288 // Special case the error message for the very common case where only 2289 // a single subtarget feature is missing. 2290 std::string Msg = "instruction requires:"; 2291 unsigned Mask = 1; 2292 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) { 2293 if (ErrorInfo & Mask) { 2294 Msg += " "; 2295 Msg += getSubtargetFeatureName(ErrorInfo & Mask); 2296 } 2297 Mask <<= 1; 2298 } 2299 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm); 2300 } 2301 case Match_InvalidOperand: 2302 WasOriginallyInvalidOperand = true; 2303 break; 2304 case Match_MnemonicFail: 2305 break; 2306 } 2307 2308 // FIXME: Ideally, we would only attempt suffix matches for things which are 2309 // valid prefixes, and we could just infer the right unambiguous 2310 // type. However, that requires substantially more matcher support than the 2311 // following hack. 2312 2313 // Change the operand to point to a temporary token. 2314 StringRef Base = Op->getToken(); 2315 SmallString<16> Tmp; 2316 Tmp += Base; 2317 Tmp += ' '; 2318 Op->setTokenValue(Tmp.str()); 2319 2320 // If this instruction starts with an 'f', then it is a floating point stack 2321 // instruction. These come in up to three forms for 32-bit, 64-bit, and 2322 // 80-bit floating point, which use the suffixes s,l,t respectively. 2323 // 2324 // Otherwise, we assume that this may be an integer instruction, which comes 2325 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively. 2326 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0"; 2327 2328 // Check for the various suffix matches. 2329 Tmp[Base.size()] = Suffixes[0]; 2330 unsigned ErrorInfoIgnore; 2331 unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings. 2332 unsigned Match1, Match2, Match3, Match4; 2333 2334 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, 2335 MatchingInlineAsm, isParsingIntelSyntax()); 2336 // If this returned as a missing feature failure, remember that. 2337 if (Match1 == Match_MissingFeature) 2338 ErrorInfoMissingFeature = ErrorInfoIgnore; 2339 Tmp[Base.size()] = Suffixes[1]; 2340 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, 2341 MatchingInlineAsm, isParsingIntelSyntax()); 2342 // If this returned as a missing feature failure, remember that. 2343 if (Match2 == Match_MissingFeature) 2344 ErrorInfoMissingFeature = ErrorInfoIgnore; 2345 Tmp[Base.size()] = Suffixes[2]; 2346 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, 2347 MatchingInlineAsm, isParsingIntelSyntax()); 2348 // If this returned as a missing feature failure, remember that. 2349 if (Match3 == Match_MissingFeature) 2350 ErrorInfoMissingFeature = ErrorInfoIgnore; 2351 Tmp[Base.size()] = Suffixes[3]; 2352 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, 2353 MatchingInlineAsm, isParsingIntelSyntax()); 2354 // If this returned as a missing feature failure, remember that. 2355 if (Match4 == Match_MissingFeature) 2356 ErrorInfoMissingFeature = ErrorInfoIgnore; 2357 2358 // Restore the old token. 2359 Op->setTokenValue(Base); 2360 2361 // If exactly one matched, then we treat that as a successful match (and the 2362 // instruction will already have been filled in correctly, since the failing 2363 // matches won't have modified it). 2364 unsigned NumSuccessfulMatches = 2365 (Match1 == Match_Success) + (Match2 == Match_Success) + 2366 (Match3 == Match_Success) + (Match4 == Match_Success); 2367 if (NumSuccessfulMatches == 1) { 2368 Inst.setLoc(IDLoc); 2369 if (!MatchingInlineAsm) 2370 Out.EmitInstruction(Inst); 2371 Opcode = Inst.getOpcode(); 2372 return false; 2373 } 2374 2375 // Otherwise, the match failed, try to produce a decent error message. 2376 2377 // If we had multiple suffix matches, then identify this as an ambiguous 2378 // match. 2379 if (NumSuccessfulMatches > 1) { 2380 char MatchChars[4]; 2381 unsigned NumMatches = 0; 2382 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0]; 2383 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1]; 2384 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2]; 2385 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3]; 2386 2387 SmallString<126> Msg; 2388 raw_svector_ostream OS(Msg); 2389 OS << "ambiguous instructions require an explicit suffix (could be "; 2390 for (unsigned i = 0; i != NumMatches; ++i) { 2391 if (i != 0) 2392 OS << ", "; 2393 if (i + 1 == NumMatches) 2394 OS << "or "; 2395 OS << "'" << Base << MatchChars[i] << "'"; 2396 } 2397 OS << ")"; 2398 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm); 2399 return true; 2400 } 2401 2402 // Okay, we know that none of the variants matched successfully. 2403 2404 // If all of the instructions reported an invalid mnemonic, then the original 2405 // mnemonic was invalid. 2406 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) && 2407 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) { 2408 if (!WasOriginallyInvalidOperand) { 2409 ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges : 2410 Op->getLocRange(); 2411 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'", 2412 Ranges, MatchingInlineAsm); 2413 } 2414 2415 // Recover location info for the operand if we know which was the problem. 2416 if (ErrorInfo != ~0U) { 2417 if (ErrorInfo >= Operands.size()) 2418 return Error(IDLoc, "too few operands for instruction", 2419 EmptyRanges, MatchingInlineAsm); 2420 2421 X86Operand *Operand = (X86Operand*)Operands[ErrorInfo]; 2422 if (Operand->getStartLoc().isValid()) { 2423 SMRange OperandRange = Operand->getLocRange(); 2424 return Error(Operand->getStartLoc(), "invalid operand for instruction", 2425 OperandRange, MatchingInlineAsm); 2426 } 2427 } 2428 2429 return Error(IDLoc, "invalid operand for instruction", EmptyRanges, 2430 MatchingInlineAsm); 2431 } 2432 2433 // If one instruction matched with a missing feature, report this as a 2434 // missing feature. 2435 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) + 2436 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){ 2437 std::string Msg = "instruction requires:"; 2438 unsigned Mask = 1; 2439 for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) { 2440 if (ErrorInfoMissingFeature & Mask) { 2441 Msg += " "; 2442 Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask); 2443 } 2444 Mask <<= 1; 2445 } 2446 return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm); 2447 } 2448 2449 // If one instruction matched with an invalid operand, report this as an 2450 // operand failure. 2451 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) + 2452 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){ 2453 Error(IDLoc, "invalid operand for instruction", EmptyRanges, 2454 MatchingInlineAsm); 2455 return true; 2456 } 2457 2458 // If all of these were an outright failure, report it in a useless way. 2459 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix", 2460 EmptyRanges, MatchingInlineAsm); 2461 return true; 2462 } 2463 2464 2465 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { 2466 StringRef IDVal = DirectiveID.getIdentifier(); 2467 if (IDVal == ".word") 2468 return ParseDirectiveWord(2, DirectiveID.getLoc()); 2469 else if (IDVal.startswith(".code")) 2470 return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); 2471 else if (IDVal.startswith(".att_syntax")) { 2472 getParser().setAssemblerDialect(0); 2473 return false; 2474 } else if (IDVal.startswith(".intel_syntax")) { 2475 getParser().setAssemblerDialect(1); 2476 if (getLexer().isNot(AsmToken::EndOfStatement)) { 2477 if(Parser.getTok().getString() == "noprefix") { 2478 // FIXME : Handle noprefix 2479 Parser.Lex(); 2480 } else 2481 return true; 2482 } 2483 return false; 2484 } 2485 return true; 2486 } 2487 2488 /// ParseDirectiveWord 2489 /// ::= .word [ expression (, expression)* ] 2490 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { 2491 if (getLexer().isNot(AsmToken::EndOfStatement)) { 2492 for (;;) { 2493 const MCExpr *Value; 2494 if (getParser().parseExpression(Value)) 2495 return true; 2496 2497 getParser().getStreamer().EmitValue(Value, Size); 2498 2499 if (getLexer().is(AsmToken::EndOfStatement)) 2500 break; 2501 2502 // FIXME: Improve diagnostic. 2503 if (getLexer().isNot(AsmToken::Comma)) 2504 return Error(L, "unexpected token in directive"); 2505 Parser.Lex(); 2506 } 2507 } 2508 2509 Parser.Lex(); 2510 return false; 2511 } 2512 2513 /// ParseDirectiveCode 2514 /// ::= .code32 | .code64 2515 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { 2516 if (IDVal == ".code32") { 2517 Parser.Lex(); 2518 if (is64BitMode()) { 2519 SwitchMode(); 2520 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); 2521 } 2522 } else if (IDVal == ".code64") { 2523 Parser.Lex(); 2524 if (!is64BitMode()) { 2525 SwitchMode(); 2526 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64); 2527 } 2528 } else { 2529 return Error(L, "unexpected directive " + IDVal); 2530 } 2531 2532 return false; 2533 } 2534 2535 // Force static initialization. 2536 extern "C" void LLVMInitializeX86AsmParser() { 2537 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target); 2538 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target); 2539 } 2540 2541 #define GET_REGISTER_MATCHER 2542 #define GET_MATCHER_IMPLEMENTATION 2543 #define GET_SUBTARGET_FEATURE_NAME 2544 #include "X86GenAsmMatcher.inc" 2545