Home | History | Annotate | Download | only in MCParser
      1 //===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
     11 #define LLVM_MC_MCPARSER_MCASMLEXER_H
     12 
     13 #include "llvm/ADT/APInt.h"
     14 #include "llvm/ADT/StringRef.h"
     15 #include "llvm/Support/Compiler.h"
     16 #include "llvm/Support/DataTypes.h"
     17 #include "llvm/Support/SMLoc.h"
     18 
     19 namespace llvm {
     20 
     21 /// Target independent representation for an assembler token.
     22 class AsmToken {
     23 public:
     24   enum TokenKind {
     25     // Markers
     26     Eof, Error,
     27 
     28     // String values.
     29     Identifier,
     30     String,
     31 
     32     // Integer values.
     33     Integer,
     34     BigNum, // larger than 64 bits
     35 
     36     // Real values.
     37     Real,
     38 
     39     // No-value.
     40     EndOfStatement,
     41     Colon,
     42     Space,
     43     Plus, Minus, Tilde,
     44     Slash,    // '/'
     45     BackSlash, // '\'
     46     LParen, RParen, LBrac, RBrac, LCurly, RCurly,
     47     Star, Dot, Comma, Dollar, Equal, EqualEqual,
     48 
     49     Pipe, PipePipe, Caret,
     50     Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
     51     Less, LessEqual, LessLess, LessGreater,
     52     Greater, GreaterEqual, GreaterGreater, At
     53   };
     54 
     55 private:
     56   TokenKind Kind;
     57 
     58   /// A reference to the entire token contents; this is always a pointer into
     59   /// a memory buffer owned by the source manager.
     60   StringRef Str;
     61 
     62   APInt IntVal;
     63 
     64 public:
     65   AsmToken() {}
     66   AsmToken(TokenKind Kind, StringRef Str, APInt IntVal)
     67       : Kind(Kind), Str(Str), IntVal(IntVal) {}
     68   AsmToken(TokenKind Kind, StringRef Str, int64_t IntVal = 0)
     69       : Kind(Kind), Str(Str), IntVal(64, IntVal, true) {}
     70 
     71   TokenKind getKind() const { return Kind; }
     72   bool is(TokenKind K) const { return Kind == K; }
     73   bool isNot(TokenKind K) const { return Kind != K; }
     74 
     75   SMLoc getLoc() const;
     76   SMLoc getEndLoc() const;
     77   SMRange getLocRange() const;
     78 
     79   /// Get the contents of a string token (without quotes).
     80   StringRef getStringContents() const {
     81     assert(Kind == String && "This token isn't a string!");
     82     return Str.slice(1, Str.size() - 1);
     83   }
     84 
     85   /// Get the identifier string for the current token, which should be an
     86   /// identifier or a string. This gets the portion of the string which should
     87   /// be used as the identifier, e.g., it does not include the quotes on
     88   /// strings.
     89   StringRef getIdentifier() const {
     90     if (Kind == Identifier)
     91       return getString();
     92     return getStringContents();
     93   }
     94 
     95   /// Get the string for the current token, this includes all characters (for
     96   /// example, the quotes on strings) in the token.
     97   ///
     98   /// The returned StringRef points into the source manager's memory buffer, and
     99   /// is safe to store across calls to Lex().
    100   StringRef getString() const { return Str; }
    101 
    102   // FIXME: Don't compute this in advance, it makes every token larger, and is
    103   // also not generally what we want (it is nicer for recovery etc. to lex 123br
    104   // as a single token, then diagnose as an invalid number).
    105   int64_t getIntVal() const {
    106     assert(Kind == Integer && "This token isn't an integer!");
    107     return IntVal.getZExtValue();
    108   }
    109 
    110   APInt getAPIntVal() const {
    111     assert((Kind == Integer || Kind == BigNum) &&
    112            "This token isn't an integer!");
    113     return IntVal;
    114   }
    115 };
    116 
    117 /// Generic assembler lexer interface, for use by target specific assembly
    118 /// lexers.
    119 class MCAsmLexer {
    120   /// The current token, stored in the base class for faster access.
    121   AsmToken CurTok;
    122 
    123   /// The location and description of the current error
    124   SMLoc ErrLoc;
    125   std::string Err;
    126 
    127   MCAsmLexer(const MCAsmLexer &) = delete;
    128   void operator=(const MCAsmLexer &) = delete;
    129 protected: // Can only create subclasses.
    130   const char *TokStart;
    131   bool SkipSpace;
    132   bool AllowAtInIdentifier;
    133 
    134   MCAsmLexer();
    135 
    136   virtual AsmToken LexToken() = 0;
    137 
    138   void SetError(const SMLoc &errLoc, const std::string &err) {
    139     ErrLoc = errLoc;
    140     Err = err;
    141   }
    142 
    143 public:
    144   virtual ~MCAsmLexer();
    145 
    146   /// Consume the next token from the input stream and return it.
    147   ///
    148   /// The lexer will continuosly return the end-of-file token once the end of
    149   /// the main input file has been reached.
    150   const AsmToken &Lex() {
    151     return CurTok = LexToken();
    152   }
    153 
    154   virtual StringRef LexUntilEndOfStatement() = 0;
    155 
    156   /// Get the current source location.
    157   SMLoc getLoc() const;
    158 
    159   /// Get the current (last) lexed token.
    160   const AsmToken &getTok() const {
    161     return CurTok;
    162   }
    163 
    164   /// Look ahead at the next token to be lexed.
    165   virtual const AsmToken peekTok(bool ShouldSkipSpace = true) = 0;
    166 
    167   /// Get the current error location
    168   const SMLoc &getErrLoc() {
    169     return ErrLoc;
    170   }
    171 
    172   /// Get the current error string
    173   const std::string &getErr() {
    174     return Err;
    175   }
    176 
    177   /// Get the kind of current token.
    178   AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
    179 
    180   /// Check if the current token has kind \p K.
    181   bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
    182 
    183   /// Check if the current token has kind \p K.
    184   bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
    185 
    186   /// Set whether spaces should be ignored by the lexer
    187   void setSkipSpace(bool val) { SkipSpace = val; }
    188 
    189   bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
    190   void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
    191 };
    192 
    193 } // End llvm namespace
    194 
    195 #endif
    196