Home | History | Annotate | Download | only in MCParser
      1 //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
     11 #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
     12 
     13 #include "llvm/ADT/StringRef.h"
     14 #include "llvm/MC/MCExpr.h"
     15 #include "llvm/MC/MCInstrInfo.h"
     16 #include "llvm/MC/MCParser/MCAsmLexer.h"
     17 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
     18 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
     19 #include "llvm/MC/MCTargetOptions.h"
     20 #include "llvm/Support/SMLoc.h"
     21 #include <cstdint>
     22 #include <memory>
     23 
     24 namespace llvm {
     25 
     26 class MCInst;
     27 class MCParsedAsmOperand;
     28 class MCStreamer;
     29 class MCSubtargetInfo;
     30 template <typename T> class SmallVectorImpl;
     31 
     32 using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>;
     33 
     34 enum AsmRewriteKind {
     35   AOK_Align,          // Rewrite align as .align.
     36   AOK_EVEN,           // Rewrite even as .even.
     37   AOK_Emit,           // Rewrite _emit as .byte.
     38   AOK_Input,          // Rewrite in terms of $N.
     39   AOK_Output,         // Rewrite in terms of $N.
     40   AOK_SizeDirective,  // Add a sizing directive (e.g., dword ptr).
     41   AOK_Label,          // Rewrite local labels.
     42   AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t").
     43   AOK_Skip,           // Skip emission (e.g., offset/type operators).
     44   AOK_IntelExpr       // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp]
     45 };
     46 
     47 const char AsmRewritePrecedence [] = {
     48   2, // AOK_Align
     49   2, // AOK_EVEN
     50   2, // AOK_Emit
     51   3, // AOK_Input
     52   3, // AOK_Output
     53   5, // AOK_SizeDirective
     54   1, // AOK_Label
     55   5, // AOK_EndOfStatement
     56   2, // AOK_Skip
     57   2  // AOK_IntelExpr
     58 };
     59 
     60 // Represnt the various parts which makes up an intel expression,
     61 // used for emitting compound intel expressions
     62 struct IntelExpr {
     63   bool NeedBracs;
     64   int64_t Imm;
     65   StringRef BaseReg;
     66   StringRef IndexReg;
     67   unsigned Scale;
     68 
     69   IntelExpr(bool needBracs = false) : NeedBracs(needBracs), Imm(0),
     70     BaseReg(StringRef()), IndexReg(StringRef()),
     71     Scale(1) {}
     72   // Compund immediate expression
     73   IntelExpr(int64_t imm, bool needBracs) : IntelExpr(needBracs) {
     74     Imm = imm;
     75   }
     76   // [Reg + ImmediateExpression]
     77   // We don't bother to emit an immediate expression evaluated to zero
     78   IntelExpr(StringRef reg, int64_t imm = 0, unsigned scale = 0,
     79     bool needBracs = true) :
     80     IntelExpr(imm, needBracs) {
     81     IndexReg = reg;
     82     if (scale)
     83       Scale = scale;
     84   }
     85   // [BaseReg + IndexReg * ScaleExpression + ImmediateExpression]
     86   IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale = 0,
     87     int64_t imm = 0, bool needBracs = true) :
     88     IntelExpr(indexReg, imm, scale, needBracs) {
     89     BaseReg = baseReg;
     90   }
     91   bool hasBaseReg() const {
     92     return BaseReg.size();
     93   }
     94   bool hasIndexReg() const {
     95     return IndexReg.size();
     96   }
     97   bool hasRegs() const {
     98     return hasBaseReg() || hasIndexReg();
     99   }
    100   bool isValid() const {
    101     return (Scale == 1) ||
    102            (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8));
    103   }
    104 };
    105 
    106 struct AsmRewrite {
    107   AsmRewriteKind Kind;
    108   SMLoc Loc;
    109   unsigned Len;
    110   int64_t Val;
    111   StringRef Label;
    112   IntelExpr IntelExp;
    113 
    114 public:
    115   AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0)
    116     : Kind(kind), Loc(loc), Len(len), Val(val) {}
    117   AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label)
    118     : AsmRewrite(kind, loc, len) { Label = label; }
    119   AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp)
    120     : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; }
    121 };
    122 
    123 struct ParseInstructionInfo {
    124   SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
    125 
    126   ParseInstructionInfo() = default;
    127   ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites)
    128     : AsmRewrites(rewrites) {}
    129 };
    130 
    131 enum OperandMatchResultTy {
    132   MatchOperand_Success,  // operand matched successfully
    133   MatchOperand_NoMatch,  // operand did not match
    134   MatchOperand_ParseFail // operand matched but had errors
    135 };
    136 
    137 enum class DiagnosticPredicateTy {
    138   Match,
    139   NearMatch,
    140   NoMatch,
    141 };
    142 
    143 // When an operand is parsed, the assembler will try to iterate through a set of
    144 // possible operand classes that the operand might match and call the
    145 // corresponding PredicateMethod to determine that.
    146 //
    147 // If there are two AsmOperands that would give a specific diagnostic if there
    148 // is no match, there is currently no mechanism to distinguish which operand is
    149 // a closer match. The DiagnosticPredicate distinguishes between 'completely
    150 // no match' and 'near match', so the assembler can decide whether to give a
    151 // specific diagnostic, or use 'InvalidOperand' and continue to find a
    152 // 'better matching' diagnostic.
    153 //
    154 // For example:
    155 //    opcode opnd0, onpd1, opnd2
    156 //
    157 // where:
    158 //    opnd2 could be an 'immediate of range [-8, 7]'
    159 //    opnd2 could be a  'register + shift/extend'.
    160 //
    161 // If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes
    162 // little sense to give a diagnostic that the operand should be an immediate
    163 // in range [-8, 7].
    164 //
    165 // This is a light-weight alternative to the 'NearMissInfo' approach
    166 // below which collects *all* possible diagnostics. This alternative
    167 // is optional and fully backward compatible with existing
    168 // PredicateMethods that return a 'bool' (match or no match).
    169 struct DiagnosticPredicate {
    170   DiagnosticPredicateTy Type;
    171 
    172   explicit DiagnosticPredicate(bool Match)
    173       : Type(Match ? DiagnosticPredicateTy::Match
    174                    : DiagnosticPredicateTy::NearMatch) {}
    175   DiagnosticPredicate(DiagnosticPredicateTy T) : Type(T) {}
    176   DiagnosticPredicate(const DiagnosticPredicate &) = default;
    177 
    178   operator bool() const { return Type == DiagnosticPredicateTy::Match; }
    179   bool isMatch() const { return Type == DiagnosticPredicateTy::Match; }
    180   bool isNearMatch() const { return Type == DiagnosticPredicateTy::NearMatch; }
    181   bool isNoMatch() const { return Type == DiagnosticPredicateTy::NoMatch; }
    182 };
    183 
    184 // When matching of an assembly instruction fails, there may be multiple
    185 // encodings that are close to being a match. It's often ambiguous which one
    186 // the programmer intended to use, so we want to report an error which mentions
    187 // each of these "near-miss" encodings. This struct contains information about
    188 // one such encoding, and why it did not match the parsed instruction.
    189 class NearMissInfo {
    190 public:
    191   enum NearMissKind {
    192     NoNearMiss,
    193     NearMissOperand,
    194     NearMissFeature,
    195     NearMissPredicate,
    196     NearMissTooFewOperands,
    197   };
    198 
    199   // The encoding is valid for the parsed assembly string. This is only used
    200   // internally to the table-generated assembly matcher.
    201   static NearMissInfo getSuccess() { return NearMissInfo(); }
    202 
    203   // The instruction encoding is not valid because it requires some target
    204   // features that are not currently enabled. MissingFeatures has a bit set for
    205   // each feature that the encoding needs but which is not enabled.
    206   static NearMissInfo getMissedFeature(uint64_t MissingFeatures) {
    207     NearMissInfo Result;
    208     Result.Kind = NearMissFeature;
    209     Result.Features = MissingFeatures;
    210     return Result;
    211   }
    212 
    213   // The instruction encoding is not valid because the target-specific
    214   // predicate function returned an error code. FailureCode is the
    215   // target-specific error code returned by the predicate.
    216   static NearMissInfo getMissedPredicate(unsigned FailureCode) {
    217     NearMissInfo Result;
    218     Result.Kind = NearMissPredicate;
    219     Result.PredicateError = FailureCode;
    220     return Result;
    221   }
    222 
    223   // The instruction encoding is not valid because one (and only one) parsed
    224   // operand is not of the correct type. OperandError is the error code
    225   // relating to the operand class expected by the encoding. OperandClass is
    226   // the type of the expected operand. Opcode is the opcode of the encoding.
    227   // OperandIndex is the index into the parsed operand list.
    228   static NearMissInfo getMissedOperand(unsigned OperandError,
    229                                        unsigned OperandClass, unsigned Opcode,
    230                                        unsigned OperandIndex) {
    231     NearMissInfo Result;
    232     Result.Kind = NearMissOperand;
    233     Result.MissedOperand.Error = OperandError;
    234     Result.MissedOperand.Class = OperandClass;
    235     Result.MissedOperand.Opcode = Opcode;
    236     Result.MissedOperand.Index = OperandIndex;
    237     return Result;
    238   }
    239 
    240   // The instruction encoding is not valid because it expects more operands
    241   // than were parsed. OperandClass is the class of the expected operand that
    242   // was not provided. Opcode is the instruction encoding.
    243   static NearMissInfo getTooFewOperands(unsigned OperandClass,
    244                                         unsigned Opcode) {
    245     NearMissInfo Result;
    246     Result.Kind = NearMissTooFewOperands;
    247     Result.TooFewOperands.Class = OperandClass;
    248     Result.TooFewOperands.Opcode = Opcode;
    249     return Result;
    250   }
    251 
    252   operator bool() const { return Kind != NoNearMiss; }
    253 
    254   NearMissKind getKind() const { return Kind; }
    255 
    256   // Feature flags required by the instruction, that the current target does
    257   // not have.
    258   uint64_t getFeatures() const {
    259     assert(Kind == NearMissFeature);
    260     return Features;
    261   }
    262   // Error code returned by the target predicate when validating this
    263   // instruction encoding.
    264   unsigned getPredicateError() const {
    265     assert(Kind == NearMissPredicate);
    266     return PredicateError;
    267   }
    268   // MatchClassKind of the operand that we expected to see.
    269   unsigned getOperandClass() const {
    270     assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
    271     return MissedOperand.Class;
    272   }
    273   // Opcode of the encoding we were trying to match.
    274   unsigned getOpcode() const {
    275     assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
    276     return MissedOperand.Opcode;
    277   }
    278   // Error code returned when validating the operand.
    279   unsigned getOperandError() const {
    280     assert(Kind == NearMissOperand);
    281     return MissedOperand.Error;
    282   }
    283   // Index of the actual operand we were trying to match in the list of parsed
    284   // operands.
    285   unsigned getOperandIndex() const {
    286     assert(Kind == NearMissOperand);
    287     return MissedOperand.Index;
    288   }
    289 
    290 private:
    291   NearMissKind Kind;
    292 
    293   // These two structs share a common prefix, so we can safely rely on the fact
    294   // that they overlap in the union.
    295   struct MissedOpInfo {
    296     unsigned Class;
    297     unsigned Opcode;
    298     unsigned Error;
    299     unsigned Index;
    300   };
    301 
    302   struct TooFewOperandsInfo {
    303     unsigned Class;
    304     unsigned Opcode;
    305   };
    306 
    307   union {
    308     uint64_t Features;
    309     unsigned PredicateError;
    310     MissedOpInfo MissedOperand;
    311     TooFewOperandsInfo TooFewOperands;
    312   };
    313 
    314   NearMissInfo() : Kind(NoNearMiss) {}
    315 };
    316 
    317 /// MCTargetAsmParser - Generic interface to target specific assembly parsers.
    318 class MCTargetAsmParser : public MCAsmParserExtension {
    319 public:
    320   enum MatchResultTy {
    321     Match_InvalidOperand,
    322     Match_InvalidTiedOperand,
    323     Match_MissingFeature,
    324     Match_MnemonicFail,
    325     Match_Success,
    326     Match_NearMisses,
    327     FIRST_TARGET_MATCH_RESULT_TY
    328   };
    329 
    330 protected: // Can only create subclasses.
    331   MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI,
    332                     const MCInstrInfo &MII);
    333 
    334   /// Create a copy of STI and return a non-const reference to it.
    335   MCSubtargetInfo &copySTI();
    336 
    337   /// AvailableFeatures - The current set of available features.
    338   uint64_t AvailableFeatures = 0;
    339 
    340   /// ParsingInlineAsm - Are we parsing ms-style inline assembly?
    341   bool ParsingInlineAsm = false;
    342 
    343   /// SemaCallback - The Sema callback implementation.  Must be set when parsing
    344   /// ms-style inline assembly.
    345   MCAsmParserSemaCallback *SemaCallback;
    346 
    347   /// Set of options which affects instrumentation of inline assembly.
    348   MCTargetOptions MCOptions;
    349 
    350   /// Current STI.
    351   const MCSubtargetInfo *STI;
    352 
    353   const MCInstrInfo &MII;
    354 
    355 public:
    356   MCTargetAsmParser(const MCTargetAsmParser &) = delete;
    357   MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete;
    358 
    359   ~MCTargetAsmParser() override;
    360 
    361   const MCSubtargetInfo &getSTI() const;
    362 
    363   uint64_t getAvailableFeatures() const { return AvailableFeatures; }
    364   void setAvailableFeatures(uint64_t Value) { AvailableFeatures = Value; }
    365 
    366   bool isParsingInlineAsm () { return ParsingInlineAsm; }
    367   void setParsingInlineAsm (bool Value) { ParsingInlineAsm = Value; }
    368 
    369   MCTargetOptions getTargetOptions() const { return MCOptions; }
    370 
    371   void setSemaCallback(MCAsmParserSemaCallback *Callback) {
    372     SemaCallback = Callback;
    373   }
    374 
    375   // Target-specific parsing of expression.
    376   virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
    377     return getParser().parsePrimaryExpr(Res, EndLoc);
    378   }
    379 
    380   virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
    381                              SMLoc &EndLoc) = 0;
    382 
    383   /// Sets frame register corresponding to the current MachineFunction.
    384   virtual void SetFrameRegister(unsigned RegNo) {}
    385 
    386   /// ParseInstruction - Parse one assembly instruction.
    387   ///
    388   /// The parser is positioned following the instruction name. The target
    389   /// specific instruction parser should parse the entire instruction and
    390   /// construct the appropriate MCInst, or emit an error. On success, the entire
    391   /// line should be parsed up to and including the end-of-statement token. On
    392   /// failure, the parser is not required to read to the end of the line.
    393   //
    394   /// \param Name - The instruction name.
    395   /// \param NameLoc - The source location of the name.
    396   /// \param Operands [out] - The list of parsed operands, this returns
    397   ///        ownership of them to the caller.
    398   /// \return True on failure.
    399   virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
    400                                 SMLoc NameLoc, OperandVector &Operands) = 0;
    401   virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
    402                                 AsmToken Token, OperandVector &Operands) {
    403     return ParseInstruction(Info, Name, Token.getLoc(), Operands);
    404   }
    405 
    406   /// ParseDirective - Parse a target specific assembler directive
    407   ///
    408   /// The parser is positioned following the directive name.  The target
    409   /// specific directive parser should parse the entire directive doing or
    410   /// recording any target specific work, or return true and do nothing if the
    411   /// directive is not target specific. If the directive is specific for
    412   /// the target, the entire line is parsed up to and including the
    413   /// end-of-statement token and false is returned.
    414   ///
    415   /// \param DirectiveID - the identifier token of the directive.
    416   virtual bool ParseDirective(AsmToken DirectiveID) = 0;
    417 
    418   /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
    419   /// instruction as an actual MCInst and emit it to the specified MCStreamer.
    420   /// This returns false on success and returns true on failure to match.
    421   ///
    422   /// On failure, the target parser is responsible for emitting a diagnostic
    423   /// explaining the match failure.
    424   virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
    425                                        OperandVector &Operands, MCStreamer &Out,
    426                                        uint64_t &ErrorInfo,
    427                                        bool MatchingInlineAsm) = 0;
    428 
    429   /// Allows targets to let registers opt out of clobber lists.
    430   virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; }
    431 
    432   /// Allow a target to add special case operand matching for things that
    433   /// tblgen doesn't/can't handle effectively. For example, literal
    434   /// immediates on ARM. TableGen expects a token operand, but the parser
    435   /// will recognize them as immediates.
    436   virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
    437                                               unsigned Kind) {
    438     return Match_InvalidOperand;
    439   }
    440 
    441   /// Validate the instruction match against any complex target predicates
    442   /// before rendering any operands to it.
    443   virtual unsigned
    444   checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) {
    445     return Match_Success;
    446   }
    447 
    448   /// checkTargetMatchPredicate - Validate the instruction match against
    449   /// any complex target predicates not expressible via match classes.
    450   virtual unsigned checkTargetMatchPredicate(MCInst &Inst) {
    451     return Match_Success;
    452   }
    453 
    454   virtual void convertToMapAndConstraints(unsigned Kind,
    455                                           const OperandVector &Operands) = 0;
    456 
    457   /// Returns whether two registers are equal and is used by the tied-operands
    458   /// checks in the AsmMatcher. This method can be overridden allow e.g. a
    459   /// sub- or super-register as the tied operand.
    460   virtual bool regsEqual(const MCParsedAsmOperand &Op1,
    461                          const MCParsedAsmOperand &Op2) const {
    462     assert(Op1.isReg() && Op2.isReg() && "Operands not all regs");
    463     return Op1.getReg() == Op2.getReg();
    464   }
    465 
    466   // Return whether this parser uses assignment statements with equals tokens
    467   virtual bool equalIsAsmAssignment() { return true; };
    468   // Return whether this start of statement identifier is a label
    469   virtual bool isLabel(AsmToken &Token) { return true; };
    470   // Return whether this parser accept star as start of statement
    471   virtual bool starIsStartOfStatement() { return false; };
    472 
    473   virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
    474                                             MCSymbolRefExpr::VariantKind,
    475                                             MCContext &Ctx) {
    476     return nullptr;
    477   }
    478 
    479   virtual void onLabelParsed(MCSymbol *Symbol) {}
    480 
    481   /// Ensure that all previously parsed instructions have been emitted to the
    482   /// output streamer, if the target does not emit them immediately.
    483   virtual void flushPendingInstructions(MCStreamer &Out) {}
    484 
    485   virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E,
    486                                               AsmToken::TokenKind OperatorToken,
    487                                               MCContext &Ctx) {
    488     return nullptr;
    489   }
    490 };
    491 
    492 } // end namespace llvm
    493 
    494 #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
    495