Home | History | Annotate | Download | only in MCParser
      1 //===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
     11 #define LLVM_MC_MCPARSER_MCASMLEXER_H
     12 
     13 #include "llvm/ADT/APInt.h"
     14 #include "llvm/ADT/StringRef.h"
     15 #include "llvm/Support/Compiler.h"
     16 #include "llvm/Support/DataTypes.h"
     17 #include "llvm/Support/SMLoc.h"
     18 
     19 namespace llvm {
     20 
     21 /// Target independent representation for an assembler token.
     22 class AsmToken {
     23 public:
     24   enum TokenKind {
     25     // Markers
     26     Eof, Error,
     27 
     28     // String values.
     29     Identifier,
     30     String,
     31 
     32     // Integer values.
     33     Integer,
     34     BigNum, // larger than 64 bits
     35 
     36     // Real values.
     37     Real,
     38 
     39     // No-value.
     40     EndOfStatement,
     41     Colon,
     42     Space,
     43     Plus, Minus, Tilde,
     44     Slash,    // '/'
     45     BackSlash, // '\'
     46     LParen, RParen, LBrac, RBrac, LCurly, RCurly,
     47     Star, Dot, Comma, Dollar, Equal, EqualEqual,
     48 
     49     Pipe, PipePipe, Caret,
     50     Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
     51     Less, LessEqual, LessLess, LessGreater,
     52     Greater, GreaterEqual, GreaterGreater, At
     53   };
     54 
     55 private:
     56   TokenKind Kind;
     57 
     58   /// A reference to the entire token contents; this is always a pointer into
     59   /// a memory buffer owned by the source manager.
     60   StringRef Str;
     61 
     62   APInt IntVal;
     63 
     64 public:
     65   AsmToken() {}
     66   AsmToken(TokenKind Kind, StringRef Str, APInt IntVal)
     67       : Kind(Kind), Str(Str), IntVal(IntVal) {}
     68   AsmToken(TokenKind Kind, StringRef Str, int64_t IntVal = 0)
     69       : Kind(Kind), Str(Str), IntVal(64, IntVal, true) {}
     70 
     71   TokenKind getKind() const { return Kind; }
     72   bool is(TokenKind K) const { return Kind == K; }
     73   bool isNot(TokenKind K) const { return Kind != K; }
     74 
     75   SMLoc getLoc() const;
     76   SMLoc getEndLoc() const;
     77   SMRange getLocRange() const;
     78 
     79   /// Get the contents of a string token (without quotes).
     80   StringRef getStringContents() const {
     81     assert(Kind == String && "This token isn't a string!");
     82     return Str.slice(1, Str.size() - 1);
     83   }
     84 
     85   /// Get the identifier string for the current token, which should be an
     86   /// identifier or a string. This gets the portion of the string which should
     87   /// be used as the identifier, e.g., it does not include the quotes on
     88   /// strings.
     89   StringRef getIdentifier() const {
     90     if (Kind == Identifier)
     91       return getString();
     92     return getStringContents();
     93   }
     94 
     95   /// Get the string for the current token, this includes all characters (for
     96   /// example, the quotes on strings) in the token.
     97   ///
     98   /// The returned StringRef points into the source manager's memory buffer, and
     99   /// is safe to store across calls to Lex().
    100   StringRef getString() const { return Str; }
    101 
    102   // FIXME: Don't compute this in advance, it makes every token larger, and is
    103   // also not generally what we want (it is nicer for recovery etc. to lex 123br
    104   // as a single token, then diagnose as an invalid number).
    105   int64_t getIntVal() const {
    106     assert(Kind == Integer && "This token isn't an integer!");
    107     return IntVal.getZExtValue();
    108   }
    109 
    110   APInt getAPIntVal() const {
    111     assert((Kind == Integer || Kind == BigNum) &&
    112            "This token isn't an integer!");
    113     return IntVal;
    114   }
    115 };
    116 
    117 /// Generic assembler lexer interface, for use by target specific assembly
    118 /// lexers.
    119 class MCAsmLexer {
    120   /// The current token, stored in the base class for faster access.
    121   SmallVector<AsmToken, 1> CurTok;
    122 
    123   /// The location and description of the current error
    124   SMLoc ErrLoc;
    125   std::string Err;
    126 
    127   MCAsmLexer(const MCAsmLexer &) = delete;
    128   void operator=(const MCAsmLexer &) = delete;
    129 protected: // Can only create subclasses.
    130   const char *TokStart;
    131   bool SkipSpace;
    132   bool AllowAtInIdentifier;
    133 
    134   MCAsmLexer();
    135 
    136   virtual AsmToken LexToken() = 0;
    137 
    138   void SetError(SMLoc errLoc, const std::string &err) {
    139     ErrLoc = errLoc;
    140     Err = err;
    141   }
    142 
    143 public:
    144   virtual ~MCAsmLexer();
    145 
    146   /// Consume the next token from the input stream and return it.
    147   ///
    148   /// The lexer will continuosly return the end-of-file token once the end of
    149   /// the main input file has been reached.
    150   const AsmToken &Lex() {
    151     assert(!CurTok.empty());
    152     CurTok.erase(CurTok.begin());
    153     if (CurTok.empty())
    154       CurTok.emplace_back(LexToken());
    155     return CurTok.front();
    156   }
    157 
    158   void UnLex(AsmToken const &Token) {
    159     CurTok.insert(CurTok.begin(), Token);
    160   }
    161 
    162   virtual StringRef LexUntilEndOfStatement() = 0;
    163 
    164   /// Get the current source location.
    165   SMLoc getLoc() const;
    166 
    167   /// Get the current (last) lexed token.
    168   const AsmToken &getTok() const {
    169     return CurTok[0];
    170   }
    171 
    172   /// Look ahead at the next token to be lexed.
    173   const AsmToken peekTok(bool ShouldSkipSpace = true) {
    174     AsmToken Tok;
    175 
    176     MutableArrayRef<AsmToken> Buf(Tok);
    177     size_t ReadCount = peekTokens(Buf, ShouldSkipSpace);
    178 
    179     assert(ReadCount == 1);
    180     (void)ReadCount;
    181 
    182     return Tok;
    183   }
    184 
    185   /// Look ahead an arbitrary number of tokens.
    186   virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf,
    187                             bool ShouldSkipSpace = true) = 0;
    188 
    189   /// Get the current error location
    190   SMLoc getErrLoc() {
    191     return ErrLoc;
    192   }
    193 
    194   /// Get the current error string
    195   const std::string &getErr() {
    196     return Err;
    197   }
    198 
    199   /// Get the kind of current token.
    200   AsmToken::TokenKind getKind() const { return getTok().getKind(); }
    201 
    202   /// Check if the current token has kind \p K.
    203   bool is(AsmToken::TokenKind K) const { return getTok().is(K); }
    204 
    205   /// Check if the current token has kind \p K.
    206   bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); }
    207 
    208   /// Set whether spaces should be ignored by the lexer
    209   void setSkipSpace(bool val) { SkipSpace = val; }
    210 
    211   bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
    212   void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
    213 };
    214 
    215 } // End llvm namespace
    216 
    217 #endif
    218