Home | History | Annotate | Download | only in MCParser
      1 //===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #ifndef LLVM_MC_MCASMLEXER_H
     11 #define LLVM_MC_MCASMLEXER_H
     12 
     13 #include "llvm/ADT/StringRef.h"
     14 #include "llvm/Support/DataTypes.h"
     15 #include "llvm/Support/SMLoc.h"
     16 
     17 namespace llvm {
     18 class MCAsmLexer;
     19 class MCInst;
     20 
     21 /// AsmToken - Target independent representation for an assembler token.
     22 class AsmToken {
     23 public:
     24   enum TokenKind {
     25     // Markers
     26     Eof, Error,
     27 
     28     // String values.
     29     Identifier,
     30     String,
     31 
     32     // Integer values.
     33     Integer,
     34 
     35     // Real values.
     36     Real,
     37 
     38     // Register values (stored in IntVal).  Only used by MCTargetAsmLexer.
     39     Register,
     40 
     41     // No-value.
     42     EndOfStatement,
     43     Colon,
     44     Plus, Minus, Tilde,
     45     Slash,    // '/'
     46     BackSlash, // '\'
     47     LParen, RParen, LBrac, RBrac, LCurly, RCurly,
     48     Star, Dot, Comma, Dollar, Equal, EqualEqual,
     49 
     50     Pipe, PipePipe, Caret,
     51     Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
     52     Less, LessEqual, LessLess, LessGreater,
     53     Greater, GreaterEqual, GreaterGreater, At
     54   };
     55 
     56   TokenKind Kind;
     57 
     58   /// A reference to the entire token contents; this is always a pointer into
     59   /// a memory buffer owned by the source manager.
     60   StringRef Str;
     61 
     62   int64_t IntVal;
     63 
     64 public:
     65   AsmToken() {}
     66   AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0)
     67     : Kind(_Kind), Str(_Str), IntVal(_IntVal) {}
     68 
     69   TokenKind getKind() const { return Kind; }
     70   bool is(TokenKind K) const { return Kind == K; }
     71   bool isNot(TokenKind K) const { return Kind != K; }
     72 
     73   SMLoc getLoc() const;
     74   SMLoc getEndLoc() const;
     75 
     76   /// getStringContents - Get the contents of a string token (without quotes).
     77   StringRef getStringContents() const {
     78     assert(Kind == String && "This token isn't a string!");
     79     return Str.slice(1, Str.size() - 1);
     80   }
     81 
     82   /// getIdentifier - Get the identifier string for the current token, which
     83   /// should be an identifier or a string. This gets the portion of the string
     84   /// which should be used as the identifier, e.g., it does not include the
     85   /// quotes on strings.
     86   StringRef getIdentifier() const {
     87     if (Kind == Identifier)
     88       return getString();
     89     return getStringContents();
     90   }
     91 
     92   /// getString - Get the string for the current token, this includes all
     93   /// characters (for example, the quotes on strings) in the token.
     94   ///
     95   /// The returned StringRef points into the source manager's memory buffer, and
     96   /// is safe to store across calls to Lex().
     97   StringRef getString() const { return Str; }
     98 
     99   // FIXME: Don't compute this in advance, it makes every token larger, and is
    100   // also not generally what we want (it is nicer for recovery etc. to lex 123br
    101   // as a single token, then diagnose as an invalid number).
    102   int64_t getIntVal() const {
    103     assert(Kind == Integer && "This token isn't an integer!");
    104     return IntVal;
    105   }
    106 
    107   /// getRegVal - Get the register number for the current token, which should
    108   /// be a register.
    109   unsigned getRegVal() const {
    110     assert(Kind == Register && "This token isn't a register!");
    111     return static_cast<unsigned>(IntVal);
    112   }
    113 };
    114 
    115 /// MCAsmLexer - Generic assembler lexer interface, for use by target specific
    116 /// assembly lexers.
    117 class MCAsmLexer {
    118   /// The current token, stored in the base class for faster access.
    119   AsmToken CurTok;
    120 
    121   /// The location and description of the current error
    122   SMLoc ErrLoc;
    123   std::string Err;
    124 
    125   MCAsmLexer(const MCAsmLexer &);   // DO NOT IMPLEMENT
    126   void operator=(const MCAsmLexer &);  // DO NOT IMPLEMENT
    127 protected: // Can only create subclasses.
    128   const char *TokStart;
    129 
    130   MCAsmLexer();
    131 
    132   virtual AsmToken LexToken() = 0;
    133 
    134   void SetError(const SMLoc &errLoc, const std::string &err) {
    135     ErrLoc = errLoc;
    136     Err = err;
    137   }
    138 
    139 public:
    140   virtual ~MCAsmLexer();
    141 
    142   /// Lex - Consume the next token from the input stream and return it.
    143   ///
    144   /// The lexer will continuosly return the end-of-file token once the end of
    145   /// the main input file has been reached.
    146   const AsmToken &Lex() {
    147     return CurTok = LexToken();
    148   }
    149 
    150   virtual StringRef LexUntilEndOfStatement() = 0;
    151 
    152   /// getLoc - Get the current source location.
    153   SMLoc getLoc() const;
    154 
    155   /// getTok - Get the current (last) lexed token.
    156   const AsmToken &getTok() {
    157     return CurTok;
    158   }
    159 
    160   /// getErrLoc - Get the current error location
    161   const SMLoc &getErrLoc() {
    162     return ErrLoc;
    163   }
    164 
    165   /// getErr - Get the current error string
    166   const std::string &getErr() {
    167     return Err;
    168   }
    169 
    170   /// getKind - Get the kind of current token.
    171   AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
    172 
    173   /// is - Check if the current token has kind \arg K.
    174   bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
    175 
    176   /// isNot - Check if the current token has kind \arg K.
    177   bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
    178 };
    179 
    180 } // End llvm namespace
    181 
    182 #endif
    183