Home | History | Annotate | Download | only in MCParser
      1 //===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #ifndef LLVM_MC_MCASMLEXER_H
     11 #define LLVM_MC_MCASMLEXER_H
     12 
     13 #include "llvm/ADT/StringRef.h"
     14 #include "llvm/Support/DataTypes.h"
     15 #include "llvm/Support/SMLoc.h"
     16 
     17 namespace llvm {
     18 
     19 /// AsmToken - Target independent representation for an assembler token.
     20 class AsmToken {
     21 public:
     22   enum TokenKind {
     23     // Markers
     24     Eof, Error,
     25 
     26     // String values.
     27     Identifier,
     28     String,
     29 
     30     // Integer values.
     31     Integer,
     32 
     33     // Real values.
     34     Real,
     35 
     36     // Register values (stored in IntVal).  Only used by MCTargetAsmLexer.
     37     Register,
     38 
     39     // No-value.
     40     EndOfStatement,
     41     Colon,
     42     Plus, Minus, Tilde,
     43     Slash,    // '/'
     44     BackSlash, // '\'
     45     LParen, RParen, LBrac, RBrac, LCurly, RCurly,
     46     Star, Dot, Comma, Dollar, Equal, EqualEqual,
     47 
     48     Pipe, PipePipe, Caret,
     49     Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
     50     Less, LessEqual, LessLess, LessGreater,
     51     Greater, GreaterEqual, GreaterGreater, At
     52   };
     53 
     54 private:
     55   TokenKind Kind;
     56 
     57   /// A reference to the entire token contents; this is always a pointer into
     58   /// a memory buffer owned by the source manager.
     59   StringRef Str;
     60 
     61   int64_t IntVal;
     62 
     63 public:
     64   AsmToken() {}
     65   AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0)
     66     : Kind(_Kind), Str(_Str), IntVal(_IntVal) {}
     67 
     68   TokenKind getKind() const { return Kind; }
     69   bool is(TokenKind K) const { return Kind == K; }
     70   bool isNot(TokenKind K) const { return Kind != K; }
     71 
     72   SMLoc getLoc() const;
     73   SMLoc getEndLoc() const;
     74 
     75   /// getStringContents - Get the contents of a string token (without quotes).
     76   StringRef getStringContents() const {
     77     assert(Kind == String && "This token isn't a string!");
     78     return Str.slice(1, Str.size() - 1);
     79   }
     80 
     81   /// getIdentifier - Get the identifier string for the current token, which
     82   /// should be an identifier or a string. This gets the portion of the string
     83   /// which should be used as the identifier, e.g., it does not include the
     84   /// quotes on strings.
     85   StringRef getIdentifier() const {
     86     if (Kind == Identifier)
     87       return getString();
     88     return getStringContents();
     89   }
     90 
     91   /// getString - Get the string for the current token, this includes all
     92   /// characters (for example, the quotes on strings) in the token.
     93   ///
     94   /// The returned StringRef points into the source manager's memory buffer, and
     95   /// is safe to store across calls to Lex().
     96   StringRef getString() const { return Str; }
     97 
     98   // FIXME: Don't compute this in advance, it makes every token larger, and is
     99   // also not generally what we want (it is nicer for recovery etc. to lex 123br
    100   // as a single token, then diagnose as an invalid number).
    101   int64_t getIntVal() const {
    102     assert(Kind == Integer && "This token isn't an integer!");
    103     return IntVal;
    104   }
    105 
    106   /// getRegVal - Get the register number for the current token, which should
    107   /// be a register.
    108   unsigned getRegVal() const {
    109     assert(Kind == Register && "This token isn't a register!");
    110     return static_cast<unsigned>(IntVal);
    111   }
    112 };
    113 
    114 /// MCAsmLexer - Generic assembler lexer interface, for use by target specific
    115 /// assembly lexers.
    116 class MCAsmLexer {
    117   /// The current token, stored in the base class for faster access.
    118   AsmToken CurTok;
    119 
    120   /// The location and description of the current error
    121   SMLoc ErrLoc;
    122   std::string Err;
    123 
    124   MCAsmLexer(const MCAsmLexer &);   // DO NOT IMPLEMENT
    125   void operator=(const MCAsmLexer &);  // DO NOT IMPLEMENT
    126 protected: // Can only create subclasses.
    127   const char *TokStart;
    128 
    129   MCAsmLexer();
    130 
    131   virtual AsmToken LexToken() = 0;
    132 
    133   void SetError(const SMLoc &errLoc, const std::string &err) {
    134     ErrLoc = errLoc;
    135     Err = err;
    136   }
    137 
    138 public:
    139   virtual ~MCAsmLexer();
    140 
    141   /// Lex - Consume the next token from the input stream and return it.
    142   ///
    143   /// The lexer will continuosly return the end-of-file token once the end of
    144   /// the main input file has been reached.
    145   const AsmToken &Lex() {
    146     return CurTok = LexToken();
    147   }
    148 
    149   virtual StringRef LexUntilEndOfStatement() = 0;
    150 
    151   /// getLoc - Get the current source location.
    152   SMLoc getLoc() const;
    153 
    154   /// getTok - Get the current (last) lexed token.
    155   const AsmToken &getTok() {
    156     return CurTok;
    157   }
    158 
    159   /// getErrLoc - Get the current error location
    160   const SMLoc &getErrLoc() {
    161     return ErrLoc;
    162   }
    163 
    164   /// getErr - Get the current error string
    165   const std::string &getErr() {
    166     return Err;
    167   }
    168 
    169   /// getKind - Get the kind of current token.
    170   AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
    171 
    172   /// is - Check if the current token has kind \arg K.
    173   bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
    174 
    175   /// isNot - Check if the current token has kind \arg K.
    176   bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
    177 };
    178 
    179 } // End llvm namespace
    180 
    181 #endif
    182