1 //===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H 11 #define LLVM_MC_MCPARSER_MCASMLEXER_H 12 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/Support/Compiler.h" 15 #include "llvm/Support/DataTypes.h" 16 #include "llvm/Support/SMLoc.h" 17 18 namespace llvm { 19 20 /// AsmToken - Target independent representation for an assembler token. 21 class AsmToken { 22 public: 23 enum TokenKind { 24 // Markers 25 Eof, Error, 26 27 // String values. 28 Identifier, 29 String, 30 31 // Integer values. 32 Integer, 33 34 // Real values. 35 Real, 36 37 // No-value. 38 EndOfStatement, 39 Colon, 40 Space, 41 Plus, Minus, Tilde, 42 Slash, // '/' 43 BackSlash, // '\' 44 LParen, RParen, LBrac, RBrac, LCurly, RCurly, 45 Star, Dot, Comma, Dollar, Equal, EqualEqual, 46 47 Pipe, PipePipe, Caret, 48 Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash, 49 Less, LessEqual, LessLess, LessGreater, 50 Greater, GreaterEqual, GreaterGreater, At 51 }; 52 53 private: 54 TokenKind Kind; 55 56 /// A reference to the entire token contents; this is always a pointer into 57 /// a memory buffer owned by the source manager. 58 StringRef Str; 59 60 int64_t IntVal; 61 62 public: 63 AsmToken() {} 64 AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0) 65 : Kind(_Kind), Str(_Str), IntVal(_IntVal) {} 66 67 TokenKind getKind() const { return Kind; } 68 bool is(TokenKind K) const { return Kind == K; } 69 bool isNot(TokenKind K) const { return Kind != K; } 70 71 SMLoc getLoc() const; 72 SMLoc getEndLoc() const; 73 74 /// getStringContents - Get the contents of a string token (without quotes). 75 StringRef getStringContents() const { 76 assert(Kind == String && "This token isn't a string!"); 77 return Str.slice(1, Str.size() - 1); 78 } 79 80 /// getIdentifier - Get the identifier string for the current token, which 81 /// should be an identifier or a string. This gets the portion of the string 82 /// which should be used as the identifier, e.g., it does not include the 83 /// quotes on strings. 84 StringRef getIdentifier() const { 85 if (Kind == Identifier) 86 return getString(); 87 return getStringContents(); 88 } 89 90 /// getString - Get the string for the current token, this includes all 91 /// characters (for example, the quotes on strings) in the token. 92 /// 93 /// The returned StringRef points into the source manager's memory buffer, and 94 /// is safe to store across calls to Lex(). 95 StringRef getString() const { return Str; } 96 97 // FIXME: Don't compute this in advance, it makes every token larger, and is 98 // also not generally what we want (it is nicer for recovery etc. to lex 123br 99 // as a single token, then diagnose as an invalid number). 100 int64_t getIntVal() const { 101 assert(Kind == Integer && "This token isn't an integer!"); 102 return IntVal; 103 } 104 }; 105 106 /// MCAsmLexer - Generic assembler lexer interface, for use by target specific 107 /// assembly lexers. 108 class MCAsmLexer { 109 /// The current token, stored in the base class for faster access. 110 AsmToken CurTok; 111 112 /// The location and description of the current error 113 SMLoc ErrLoc; 114 std::string Err; 115 116 MCAsmLexer(const MCAsmLexer &) LLVM_DELETED_FUNCTION; 117 void operator=(const MCAsmLexer &) LLVM_DELETED_FUNCTION; 118 protected: // Can only create subclasses. 119 const char *TokStart; 120 bool SkipSpace; 121 122 MCAsmLexer(); 123 124 virtual AsmToken LexToken() = 0; 125 126 void SetError(const SMLoc &errLoc, const std::string &err) { 127 ErrLoc = errLoc; 128 Err = err; 129 } 130 131 public: 132 virtual ~MCAsmLexer(); 133 134 /// Lex - Consume the next token from the input stream and return it. 135 /// 136 /// The lexer will continuosly return the end-of-file token once the end of 137 /// the main input file has been reached. 138 const AsmToken &Lex() { 139 return CurTok = LexToken(); 140 } 141 142 virtual StringRef LexUntilEndOfStatement() = 0; 143 144 /// getLoc - Get the current source location. 145 SMLoc getLoc() const; 146 147 /// getTok - Get the current (last) lexed token. 148 const AsmToken &getTok() { 149 return CurTok; 150 } 151 152 /// getErrLoc - Get the current error location 153 const SMLoc &getErrLoc() { 154 return ErrLoc; 155 } 156 157 /// getErr - Get the current error string 158 const std::string &getErr() { 159 return Err; 160 } 161 162 /// getKind - Get the kind of current token. 163 AsmToken::TokenKind getKind() const { return CurTok.getKind(); } 164 165 /// is - Check if the current token has kind \p K. 166 bool is(AsmToken::TokenKind K) const { return CurTok.is(K); } 167 168 /// isNot - Check if the current token has kind \p K. 169 bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); } 170 171 /// setSkipSpace - Set whether spaces should be ignored by the lexer 172 void setSkipSpace(bool val) { SkipSpace = val; } 173 }; 174 175 } // End llvm namespace 176 177 #endif 178