Home | History | Annotate | Download | only in MCParser
      1 //===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
     11 #define LLVM_MC_MCPARSER_MCASMLEXER_H
     12 
     13 #include "llvm/ADT/APInt.h"
     14 #include "llvm/ADT/StringRef.h"
     15 #include "llvm/Support/Compiler.h"
     16 #include "llvm/Support/DataTypes.h"
     17 #include "llvm/Support/SMLoc.h"
     18 
     19 namespace llvm {
     20 
     21 /// AsmToken - Target independent representation for an assembler token.
     22 class AsmToken {
     23 public:
     24   enum TokenKind {
     25     // Markers
     26     Eof, Error,
     27 
     28     // String values.
     29     Identifier,
     30     String,
     31 
     32     // Integer values.
     33     Integer,
     34     BigNum, // larger than 64 bits
     35 
     36     // Real values.
     37     Real,
     38 
     39     // No-value.
     40     EndOfStatement,
     41     Colon,
     42     Space,
     43     Plus, Minus, Tilde,
     44     Slash,    // '/'
     45     BackSlash, // '\'
     46     LParen, RParen, LBrac, RBrac, LCurly, RCurly,
     47     Star, Dot, Comma, Dollar, Equal, EqualEqual,
     48 
     49     Pipe, PipePipe, Caret,
     50     Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
     51     Less, LessEqual, LessLess, LessGreater,
     52     Greater, GreaterEqual, GreaterGreater, At
     53   };
     54 
     55 private:
     56   TokenKind Kind;
     57 
     58   /// A reference to the entire token contents; this is always a pointer into
     59   /// a memory buffer owned by the source manager.
     60   StringRef Str;
     61 
     62   APInt IntVal;
     63 
     64 public:
     65   AsmToken() {}
     66   AsmToken(TokenKind _Kind, StringRef _Str, APInt _IntVal)
     67     : Kind(_Kind), Str(_Str), IntVal(_IntVal) {}
     68   AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0)
     69     : Kind(_Kind), Str(_Str), IntVal(64, _IntVal, true) {}
     70 
     71   TokenKind getKind() const { return Kind; }
     72   bool is(TokenKind K) const { return Kind == K; }
     73   bool isNot(TokenKind K) const { return Kind != K; }
     74 
     75   SMLoc getLoc() const;
     76   SMLoc getEndLoc() const;
     77 
     78   /// getStringContents - Get the contents of a string token (without quotes).
     79   StringRef getStringContents() const {
     80     assert(Kind == String && "This token isn't a string!");
     81     return Str.slice(1, Str.size() - 1);
     82   }
     83 
     84   /// getIdentifier - Get the identifier string for the current token, which
     85   /// should be an identifier or a string. This gets the portion of the string
     86   /// which should be used as the identifier, e.g., it does not include the
     87   /// quotes on strings.
     88   StringRef getIdentifier() const {
     89     if (Kind == Identifier)
     90       return getString();
     91     return getStringContents();
     92   }
     93 
     94   /// getString - Get the string for the current token, this includes all
     95   /// characters (for example, the quotes on strings) in the token.
     96   ///
     97   /// The returned StringRef points into the source manager's memory buffer, and
     98   /// is safe to store across calls to Lex().
     99   StringRef getString() const { return Str; }
    100 
    101   // FIXME: Don't compute this in advance, it makes every token larger, and is
    102   // also not generally what we want (it is nicer for recovery etc. to lex 123br
    103   // as a single token, then diagnose as an invalid number).
    104   int64_t getIntVal() const {
    105     assert(Kind == Integer && "This token isn't an integer!");
    106     return IntVal.getZExtValue();
    107   }
    108 
    109   APInt getAPIntVal() const {
    110     assert((Kind == Integer || Kind == BigNum) &&
    111            "This token isn't an integer!");
    112     return IntVal;
    113   }
    114 };
    115 
    116 /// MCAsmLexer - Generic assembler lexer interface, for use by target specific
    117 /// assembly lexers.
    118 class MCAsmLexer {
    119   /// The current token, stored in the base class for faster access.
    120   AsmToken CurTok;
    121 
    122   /// The location and description of the current error
    123   SMLoc ErrLoc;
    124   std::string Err;
    125 
    126   MCAsmLexer(const MCAsmLexer &) LLVM_DELETED_FUNCTION;
    127   void operator=(const MCAsmLexer &) LLVM_DELETED_FUNCTION;
    128 protected: // Can only create subclasses.
    129   const char *TokStart;
    130   bool SkipSpace;
    131   bool AllowAtInIdentifier;
    132 
    133   MCAsmLexer();
    134 
    135   virtual AsmToken LexToken() = 0;
    136 
    137   void SetError(const SMLoc &errLoc, const std::string &err) {
    138     ErrLoc = errLoc;
    139     Err = err;
    140   }
    141 
    142 public:
    143   virtual ~MCAsmLexer();
    144 
    145   /// Lex - Consume the next token from the input stream and return it.
    146   ///
    147   /// The lexer will continuosly return the end-of-file token once the end of
    148   /// the main input file has been reached.
    149   const AsmToken &Lex() {
    150     return CurTok = LexToken();
    151   }
    152 
    153   virtual StringRef LexUntilEndOfStatement() = 0;
    154 
    155   /// getLoc - Get the current source location.
    156   SMLoc getLoc() const;
    157 
    158   /// getTok - Get the current (last) lexed token.
    159   const AsmToken &getTok() {
    160     return CurTok;
    161   }
    162 
    163   /// peekTok - Look ahead at the next token to be lexed.
    164   virtual const AsmToken peekTok(bool ShouldSkipSpace = true) = 0;
    165 
    166   /// getErrLoc - Get the current error location
    167   const SMLoc &getErrLoc() {
    168     return ErrLoc;
    169   }
    170 
    171   /// getErr - Get the current error string
    172   const std::string &getErr() {
    173     return Err;
    174   }
    175 
    176   /// getKind - Get the kind of current token.
    177   AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
    178 
    179   /// is - Check if the current token has kind \p K.
    180   bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
    181 
    182   /// isNot - Check if the current token has kind \p K.
    183   bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
    184 
    185   /// setSkipSpace - Set whether spaces should be ignored by the lexer
    186   void setSkipSpace(bool val) { SkipSpace = val; }
    187 
    188   bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
    189   void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
    190 };
    191 
    192 } // End llvm namespace
    193 
    194 #endif
    195