Home | History | Annotate | Download | only in MCParser
      1 //===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
     11 #define LLVM_MC_MCPARSER_MCASMLEXER_H
     12 
     13 #include "llvm/ADT/StringRef.h"
     14 #include "llvm/Support/Compiler.h"
     15 #include "llvm/Support/DataTypes.h"
     16 #include "llvm/Support/SMLoc.h"
     17 
     18 namespace llvm {
     19 
     20 /// AsmToken - Target independent representation for an assembler token.
     21 class AsmToken {
     22 public:
     23   enum TokenKind {
     24     // Markers
     25     Eof, Error,
     26 
     27     // String values.
     28     Identifier,
     29     String,
     30 
     31     // Integer values.
     32     Integer,
     33 
     34     // Real values.
     35     Real,
     36 
     37     // No-value.
     38     EndOfStatement,
     39     Colon,
     40     Space,
     41     Plus, Minus, Tilde,
     42     Slash,    // '/'
     43     BackSlash, // '\'
     44     LParen, RParen, LBrac, RBrac, LCurly, RCurly,
     45     Star, Dot, Comma, Dollar, Equal, EqualEqual,
     46 
     47     Pipe, PipePipe, Caret,
     48     Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
     49     Less, LessEqual, LessLess, LessGreater,
     50     Greater, GreaterEqual, GreaterGreater, At
     51   };
     52 
     53 private:
     54   TokenKind Kind;
     55 
     56   /// A reference to the entire token contents; this is always a pointer into
     57   /// a memory buffer owned by the source manager.
     58   StringRef Str;
     59 
     60   int64_t IntVal;
     61 
     62 public:
     63   AsmToken() {}
     64   AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0)
     65     : Kind(_Kind), Str(_Str), IntVal(_IntVal) {}
     66 
     67   TokenKind getKind() const { return Kind; }
     68   bool is(TokenKind K) const { return Kind == K; }
     69   bool isNot(TokenKind K) const { return Kind != K; }
     70 
     71   SMLoc getLoc() const;
     72   SMLoc getEndLoc() const;
     73 
     74   /// getStringContents - Get the contents of a string token (without quotes).
     75   StringRef getStringContents() const {
     76     assert(Kind == String && "This token isn't a string!");
     77     return Str.slice(1, Str.size() - 1);
     78   }
     79 
     80   /// getIdentifier - Get the identifier string for the current token, which
     81   /// should be an identifier or a string. This gets the portion of the string
     82   /// which should be used as the identifier, e.g., it does not include the
     83   /// quotes on strings.
     84   StringRef getIdentifier() const {
     85     if (Kind == Identifier)
     86       return getString();
     87     return getStringContents();
     88   }
     89 
     90   /// getString - Get the string for the current token, this includes all
     91   /// characters (for example, the quotes on strings) in the token.
     92   ///
     93   /// The returned StringRef points into the source manager's memory buffer, and
     94   /// is safe to store across calls to Lex().
     95   StringRef getString() const { return Str; }
     96 
     97   // FIXME: Don't compute this in advance, it makes every token larger, and is
     98   // also not generally what we want (it is nicer for recovery etc. to lex 123br
     99   // as a single token, then diagnose as an invalid number).
    100   int64_t getIntVal() const {
    101     assert(Kind == Integer && "This token isn't an integer!");
    102     return IntVal;
    103   }
    104 };
    105 
    106 /// MCAsmLexer - Generic assembler lexer interface, for use by target specific
    107 /// assembly lexers.
    108 class MCAsmLexer {
    109   /// The current token, stored in the base class for faster access.
    110   AsmToken CurTok;
    111 
    112   /// The location and description of the current error
    113   SMLoc ErrLoc;
    114   std::string Err;
    115 
    116   MCAsmLexer(const MCAsmLexer &) LLVM_DELETED_FUNCTION;
    117   void operator=(const MCAsmLexer &) LLVM_DELETED_FUNCTION;
    118 protected: // Can only create subclasses.
    119   const char *TokStart;
    120   bool SkipSpace;
    121 
    122   MCAsmLexer();
    123 
    124   virtual AsmToken LexToken() = 0;
    125 
    126   void SetError(const SMLoc &errLoc, const std::string &err) {
    127     ErrLoc = errLoc;
    128     Err = err;
    129   }
    130 
    131 public:
    132   virtual ~MCAsmLexer();
    133 
    134   /// Lex - Consume the next token from the input stream and return it.
    135   ///
    136   /// The lexer will continuosly return the end-of-file token once the end of
    137   /// the main input file has been reached.
    138   const AsmToken &Lex() {
    139     return CurTok = LexToken();
    140   }
    141 
    142   virtual StringRef LexUntilEndOfStatement() = 0;
    143 
    144   /// getLoc - Get the current source location.
    145   SMLoc getLoc() const;
    146 
    147   /// getTok - Get the current (last) lexed token.
    148   const AsmToken &getTok() {
    149     return CurTok;
    150   }
    151 
    152   /// getErrLoc - Get the current error location
    153   const SMLoc &getErrLoc() {
    154     return ErrLoc;
    155   }
    156 
    157   /// getErr - Get the current error string
    158   const std::string &getErr() {
    159     return Err;
    160   }
    161 
    162   /// getKind - Get the kind of current token.
    163   AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
    164 
    165   /// is - Check if the current token has kind \p K.
    166   bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
    167 
    168   /// isNot - Check if the current token has kind \p K.
    169   bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
    170 
    171   /// setSkipSpace - Set whether spaces should be ignored by the lexer
    172   void setSkipSpace(bool val) { SkipSpace = val; }
    173 };
    174 
    175 } // End llvm namespace
    176 
    177 #endif
    178