Home | History | Annotate | Download | only in MCParser
      1 //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
     11 #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
     12 
     13 #include "llvm/ADT/StringRef.h"
     14 #include "llvm/MC/MCExpr.h"
     15 #include "llvm/MC/MCInstrInfo.h"
     16 #include "llvm/MC/MCParser/MCAsmLexer.h"
     17 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
     18 #include "llvm/MC/MCTargetOptions.h"
     19 #include "llvm/Support/SMLoc.h"
     20 #include <cstdint>
     21 #include <memory>
     22 
     23 namespace llvm {
     24 
     25 class MCInst;
     26 class MCParsedAsmOperand;
     27 class MCStreamer;
     28 class MCSubtargetInfo;
     29 template <typename T> class SmallVectorImpl;
     30 
     31 using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>;
     32 
     33 enum AsmRewriteKind {
     34   AOK_Align,          // Rewrite align as .align.
     35   AOK_EVEN,           // Rewrite even as .even.
     36   AOK_Emit,           // Rewrite _emit as .byte.
     37   AOK_Input,          // Rewrite in terms of $N.
     38   AOK_Output,         // Rewrite in terms of $N.
     39   AOK_SizeDirective,  // Add a sizing directive (e.g., dword ptr).
     40   AOK_Label,          // Rewrite local labels.
     41   AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t").
     42   AOK_Skip,           // Skip emission (e.g., offset/type operators).
     43   AOK_IntelExpr       // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp]
     44 };
     45 
     46 const char AsmRewritePrecedence [] = {
     47   2, // AOK_Align
     48   2, // AOK_EVEN
     49   2, // AOK_Emit
     50   3, // AOK_Input
     51   3, // AOK_Output
     52   5, // AOK_SizeDirective
     53   1, // AOK_Label
     54   5, // AOK_EndOfStatement
     55   2, // AOK_Skip
     56   2  // AOK_IntelExpr
     57 };
     58 
     59 // Represnt the various parts which makes up an intel expression,
     60 // used for emitting compound intel expressions
     61 struct IntelExpr {
     62   bool NeedBracs;
     63   int64_t Imm;
     64   StringRef BaseReg;
     65   StringRef IndexReg;
     66   unsigned Scale;
     67 
     68   IntelExpr(bool needBracs = false) : NeedBracs(needBracs), Imm(0),
     69     BaseReg(StringRef()), IndexReg(StringRef()),
     70     Scale(1) {}
     71   // Compund immediate expression
     72   IntelExpr(int64_t imm, bool needBracs) : IntelExpr(needBracs) {
     73     Imm = imm;
     74   }
     75   // [Reg + ImmediateExpression]
     76   // We don't bother to emit an immediate expression evaluated to zero
     77   IntelExpr(StringRef reg, int64_t imm = 0, unsigned scale = 0,
     78     bool needBracs = true) :
     79     IntelExpr(imm, needBracs) {
     80     IndexReg = reg;
     81     if (scale)
     82       Scale = scale;
     83   }
     84   // [BaseReg + IndexReg * ScaleExpression + ImmediateExpression]
     85   IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale = 0,
     86     int64_t imm = 0, bool needBracs = true) :
     87     IntelExpr(indexReg, imm, scale, needBracs) {
     88     BaseReg = baseReg;
     89   }
     90   bool hasBaseReg() const {
     91     return BaseReg.size();
     92   }
     93   bool hasIndexReg() const {
     94     return IndexReg.size();
     95   }
     96   bool hasRegs() const {
     97     return hasBaseReg() || hasIndexReg();
     98   }
     99   bool isValid() const {
    100     return (Scale == 1) ||
    101            (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8));
    102   }
    103 };
    104 
    105 struct AsmRewrite {
    106   AsmRewriteKind Kind;
    107   SMLoc Loc;
    108   unsigned Len;
    109   int64_t Val;
    110   StringRef Label;
    111   IntelExpr IntelExp;
    112 
    113 public:
    114   AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0)
    115     : Kind(kind), Loc(loc), Len(len), Val(val) {}
    116   AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label)
    117     : AsmRewrite(kind, loc, len) { Label = label; }
    118   AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp)
    119     : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; }
    120 };
    121 
    122 struct ParseInstructionInfo {
    123   SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
    124 
    125   ParseInstructionInfo() = default;
    126   ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites)
    127     : AsmRewrites(rewrites) {}
    128 };
    129 
    130 enum OperandMatchResultTy {
    131   MatchOperand_Success,  // operand matched successfully
    132   MatchOperand_NoMatch,  // operand did not match
    133   MatchOperand_ParseFail // operand matched but had errors
    134 };
    135 
    136 // When matching of an assembly instruction fails, there may be multiple
    137 // encodings that are close to being a match. It's often ambiguous which one
    138 // the programmer intended to use, so we want to report an error which mentions
    139 // each of these "near-miss" encodings. This struct contains information about
    140 // one such encoding, and why it did not match the parsed instruction.
    141 class NearMissInfo {
    142 public:
    143   enum NearMissKind {
    144     NoNearMiss,
    145     NearMissOperand,
    146     NearMissFeature,
    147     NearMissPredicate,
    148     NearMissTooFewOperands,
    149   };
    150 
    151   // The encoding is valid for the parsed assembly string. This is only used
    152   // internally to the table-generated assembly matcher.
    153   static NearMissInfo getSuccess() { return NearMissInfo(); }
    154 
    155   // The instruction encoding is not valid because it requires some target
    156   // features that are not currently enabled. MissingFeatures has a bit set for
    157   // each feature that the encoding needs but which is not enabled.
    158   static NearMissInfo getMissedFeature(uint64_t MissingFeatures) {
    159     NearMissInfo Result;
    160     Result.Kind = NearMissFeature;
    161     Result.Features = MissingFeatures;
    162     return Result;
    163   }
    164 
    165   // The instruction encoding is not valid because the target-specific
    166   // predicate function returned an error code. FailureCode is the
    167   // target-specific error code returned by the predicate.
    168   static NearMissInfo getMissedPredicate(unsigned FailureCode) {
    169     NearMissInfo Result;
    170     Result.Kind = NearMissPredicate;
    171     Result.PredicateError = FailureCode;
    172     return Result;
    173   }
    174 
    175   // The instruction encoding is not valid because one (and only one) parsed
    176   // operand is not of the correct type. OperandError is the error code
    177   // relating to the operand class expected by the encoding. OperandClass is
    178   // the type of the expected operand. Opcode is the opcode of the encoding.
    179   // OperandIndex is the index into the parsed operand list.
    180   static NearMissInfo getMissedOperand(unsigned OperandError,
    181                                        unsigned OperandClass, unsigned Opcode,
    182                                        unsigned OperandIndex) {
    183     NearMissInfo Result;
    184     Result.Kind = NearMissOperand;
    185     Result.MissedOperand.Error = OperandError;
    186     Result.MissedOperand.Class = OperandClass;
    187     Result.MissedOperand.Opcode = Opcode;
    188     Result.MissedOperand.Index = OperandIndex;
    189     return Result;
    190   }
    191 
    192   // The instruction encoding is not valid because it expects more operands
    193   // than were parsed. OperandClass is the class of the expected operand that
    194   // was not provided. Opcode is the instruction encoding.
    195   static NearMissInfo getTooFewOperands(unsigned OperandClass,
    196                                         unsigned Opcode) {
    197     NearMissInfo Result;
    198     Result.Kind = NearMissTooFewOperands;
    199     Result.TooFewOperands.Class = OperandClass;
    200     Result.TooFewOperands.Opcode = Opcode;
    201     return Result;
    202   }
    203 
    204   operator bool() const { return Kind != NoNearMiss; }
    205 
    206   NearMissKind getKind() const { return Kind; }
    207 
    208   // Feature flags required by the instruction, that the current target does
    209   // not have.
    210   uint64_t getFeatures() const {
    211     assert(Kind == NearMissFeature);
    212     return Features;
    213   }
    214   // Error code returned by the target predicate when validating this
    215   // instruction encoding.
    216   unsigned getPredicateError() const {
    217     assert(Kind == NearMissPredicate);
    218     return PredicateError;
    219   }
    220   // MatchClassKind of the operand that we expected to see.
    221   unsigned getOperandClass() const {
    222     assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
    223     return MissedOperand.Class;
    224   }
    225   // Opcode of the encoding we were trying to match.
    226   unsigned getOpcode() const {
    227     assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
    228     return MissedOperand.Opcode;
    229   }
    230   // Error code returned when validating the operand.
    231   unsigned getOperandError() const {
    232     assert(Kind == NearMissOperand);
    233     return MissedOperand.Error;
    234   }
    235   // Index of the actual operand we were trying to match in the list of parsed
    236   // operands.
    237   unsigned getOperandIndex() const {
    238     assert(Kind == NearMissOperand);
    239     return MissedOperand.Index;
    240   }
    241 
    242 private:
    243   NearMissKind Kind;
    244 
    245   // These two structs share a common prefix, so we can safely rely on the fact
    246   // that they overlap in the union.
    247   struct MissedOpInfo {
    248     unsigned Class;
    249     unsigned Opcode;
    250     unsigned Error;
    251     unsigned Index;
    252   };
    253 
    254   struct TooFewOperandsInfo {
    255     unsigned Class;
    256     unsigned Opcode;
    257   };
    258 
    259   union {
    260     uint64_t Features;
    261     unsigned PredicateError;
    262     MissedOpInfo MissedOperand;
    263     TooFewOperandsInfo TooFewOperands;
    264   };
    265 
    266   NearMissInfo() : Kind(NoNearMiss) {}
    267 };
    268 
    269 /// MCTargetAsmParser - Generic interface to target specific assembly parsers.
    270 class MCTargetAsmParser : public MCAsmParserExtension {
    271 public:
    272   enum MatchResultTy {
    273     Match_InvalidOperand,
    274     Match_MissingFeature,
    275     Match_MnemonicFail,
    276     Match_Success,
    277     Match_NearMisses,
    278     FIRST_TARGET_MATCH_RESULT_TY
    279   };
    280 
    281 protected: // Can only create subclasses.
    282   MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI,
    283                     const MCInstrInfo &MII);
    284 
    285   /// Create a copy of STI and return a non-const reference to it.
    286   MCSubtargetInfo &copySTI();
    287 
    288   /// AvailableFeatures - The current set of available features.
    289   uint64_t AvailableFeatures = 0;
    290 
    291   /// ParsingInlineAsm - Are we parsing ms-style inline assembly?
    292   bool ParsingInlineAsm = false;
    293 
    294   /// SemaCallback - The Sema callback implementation.  Must be set when parsing
    295   /// ms-style inline assembly.
    296   MCAsmParserSemaCallback *SemaCallback;
    297 
    298   /// Set of options which affects instrumentation of inline assembly.
    299   MCTargetOptions MCOptions;
    300 
    301   /// Current STI.
    302   const MCSubtargetInfo *STI;
    303 
    304   const MCInstrInfo &MII;
    305 
    306 public:
    307   MCTargetAsmParser(const MCTargetAsmParser &) = delete;
    308   MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete;
    309 
    310   ~MCTargetAsmParser() override;
    311 
    312   const MCSubtargetInfo &getSTI() const;
    313 
    314   uint64_t getAvailableFeatures() const { return AvailableFeatures; }
    315   void setAvailableFeatures(uint64_t Value) { AvailableFeatures = Value; }
    316 
    317   bool isParsingInlineAsm () { return ParsingInlineAsm; }
    318   void setParsingInlineAsm (bool Value) { ParsingInlineAsm = Value; }
    319 
    320   MCTargetOptions getTargetOptions() const { return MCOptions; }
    321 
    322   void setSemaCallback(MCAsmParserSemaCallback *Callback) {
    323     SemaCallback = Callback;
    324   }
    325 
    326   virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
    327                              SMLoc &EndLoc) = 0;
    328 
    329   /// Sets frame register corresponding to the current MachineFunction.
    330   virtual void SetFrameRegister(unsigned RegNo) {}
    331 
    332   /// ParseInstruction - Parse one assembly instruction.
    333   ///
    334   /// The parser is positioned following the instruction name. The target
    335   /// specific instruction parser should parse the entire instruction and
    336   /// construct the appropriate MCInst, or emit an error. On success, the entire
    337   /// line should be parsed up to and including the end-of-statement token. On
    338   /// failure, the parser is not required to read to the end of the line.
    339   //
    340   /// \param Name - The instruction name.
    341   /// \param NameLoc - The source location of the name.
    342   /// \param Operands [out] - The list of parsed operands, this returns
    343   ///        ownership of them to the caller.
    344   /// \return True on failure.
    345   virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
    346                                 SMLoc NameLoc, OperandVector &Operands) = 0;
    347   virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
    348                                 AsmToken Token, OperandVector &Operands) {
    349     return ParseInstruction(Info, Name, Token.getLoc(), Operands);
    350   }
    351 
    352   /// ParseDirective - Parse a target specific assembler directive
    353   ///
    354   /// The parser is positioned following the directive name.  The target
    355   /// specific directive parser should parse the entire directive doing or
    356   /// recording any target specific work, or return true and do nothing if the
    357   /// directive is not target specific. If the directive is specific for
    358   /// the target, the entire line is parsed up to and including the
    359   /// end-of-statement token and false is returned.
    360   ///
    361   /// \param DirectiveID - the identifier token of the directive.
    362   virtual bool ParseDirective(AsmToken DirectiveID) = 0;
    363 
    364   /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
    365   /// instruction as an actual MCInst and emit it to the specified MCStreamer.
    366   /// This returns false on success and returns true on failure to match.
    367   ///
    368   /// On failure, the target parser is responsible for emitting a diagnostic
    369   /// explaining the match failure.
    370   virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
    371                                        OperandVector &Operands, MCStreamer &Out,
    372                                        uint64_t &ErrorInfo,
    373                                        bool MatchingInlineAsm) = 0;
    374 
    375   /// Allows targets to let registers opt out of clobber lists.
    376   virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; }
    377 
    378   /// Allow a target to add special case operand matching for things that
    379   /// tblgen doesn't/can't handle effectively. For example, literal
    380   /// immediates on ARM. TableGen expects a token operand, but the parser
    381   /// will recognize them as immediates.
    382   virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
    383                                               unsigned Kind) {
    384     return Match_InvalidOperand;
    385   }
    386 
    387   /// Validate the instruction match against any complex target predicates
    388   /// before rendering any operands to it.
    389   virtual unsigned
    390   checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) {
    391     return Match_Success;
    392   }
    393 
    394   /// checkTargetMatchPredicate - Validate the instruction match against
    395   /// any complex target predicates not expressible via match classes.
    396   virtual unsigned checkTargetMatchPredicate(MCInst &Inst) {
    397     return Match_Success;
    398   }
    399 
    400   virtual void convertToMapAndConstraints(unsigned Kind,
    401                                           const OperandVector &Operands) = 0;
    402 
    403   // Return whether this parser uses assignment statements with equals tokens
    404   virtual bool equalIsAsmAssignment() { return true; };
    405   // Return whether this start of statement identifier is a label
    406   virtual bool isLabel(AsmToken &Token) { return true; };
    407   // Return whether this parser accept star as start of statement
    408   virtual bool starIsStartOfStatement() { return false; };
    409 
    410   virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
    411                                             MCSymbolRefExpr::VariantKind,
    412                                             MCContext &Ctx) {
    413     return nullptr;
    414   }
    415 
    416   virtual void onLabelParsed(MCSymbol *Symbol) {}
    417 
    418   /// Ensure that all previously parsed instructions have been emitted to the
    419   /// output streamer, if the target does not emit them immediately.
    420   virtual void flushPendingInstructions(MCStreamer &Out) {}
    421 
    422   virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E,
    423                                               AsmToken::TokenKind OperatorToken,
    424                                               MCContext &Ctx) {
    425     return nullptr;
    426   }
    427 };
    428 
    429 } // end namespace llvm
    430 
    431 #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
    432