1 //===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H 11 #define LLVM_MC_MCPARSER_MCASMLEXER_H 12 13 #include "llvm/ADT/APInt.h" 14 #include "llvm/ADT/StringRef.h" 15 #include "llvm/Support/Compiler.h" 16 #include "llvm/Support/DataTypes.h" 17 #include "llvm/Support/SMLoc.h" 18 19 namespace llvm { 20 21 /// Target independent representation for an assembler token. 22 class AsmToken { 23 public: 24 enum TokenKind { 25 // Markers 26 Eof, Error, 27 28 // String values. 29 Identifier, 30 String, 31 32 // Integer values. 33 Integer, 34 BigNum, // larger than 64 bits 35 36 // Real values. 37 Real, 38 39 // No-value. 40 EndOfStatement, 41 Colon, 42 Space, 43 Plus, Minus, Tilde, 44 Slash, // '/' 45 BackSlash, // '\' 46 LParen, RParen, LBrac, RBrac, LCurly, RCurly, 47 Star, Dot, Comma, Dollar, Equal, EqualEqual, 48 49 Pipe, PipePipe, Caret, 50 Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash, 51 Less, LessEqual, LessLess, LessGreater, 52 Greater, GreaterEqual, GreaterGreater, At 53 }; 54 55 private: 56 TokenKind Kind; 57 58 /// A reference to the entire token contents; this is always a pointer into 59 /// a memory buffer owned by the source manager. 60 StringRef Str; 61 62 APInt IntVal; 63 64 public: 65 AsmToken() {} 66 AsmToken(TokenKind Kind, StringRef Str, APInt IntVal) 67 : Kind(Kind), Str(Str), IntVal(IntVal) {} 68 AsmToken(TokenKind Kind, StringRef Str, int64_t IntVal = 0) 69 : Kind(Kind), Str(Str), IntVal(64, IntVal, true) {} 70 71 TokenKind getKind() const { return Kind; } 72 bool is(TokenKind K) const { return Kind == K; } 73 bool isNot(TokenKind K) const { return Kind != K; } 74 75 SMLoc getLoc() const; 76 SMLoc getEndLoc() const; 77 SMRange getLocRange() const; 78 79 /// Get the contents of a string token (without quotes). 80 StringRef getStringContents() const { 81 assert(Kind == String && "This token isn't a string!"); 82 return Str.slice(1, Str.size() - 1); 83 } 84 85 /// Get the identifier string for the current token, which should be an 86 /// identifier or a string. This gets the portion of the string which should 87 /// be used as the identifier, e.g., it does not include the quotes on 88 /// strings. 89 StringRef getIdentifier() const { 90 if (Kind == Identifier) 91 return getString(); 92 return getStringContents(); 93 } 94 95 /// Get the string for the current token, this includes all characters (for 96 /// example, the quotes on strings) in the token. 97 /// 98 /// The returned StringRef points into the source manager's memory buffer, and 99 /// is safe to store across calls to Lex(). 100 StringRef getString() const { return Str; } 101 102 // FIXME: Don't compute this in advance, it makes every token larger, and is 103 // also not generally what we want (it is nicer for recovery etc. to lex 123br 104 // as a single token, then diagnose as an invalid number). 105 int64_t getIntVal() const { 106 assert(Kind == Integer && "This token isn't an integer!"); 107 return IntVal.getZExtValue(); 108 } 109 110 APInt getAPIntVal() const { 111 assert((Kind == Integer || Kind == BigNum) && 112 "This token isn't an integer!"); 113 return IntVal; 114 } 115 }; 116 117 /// Generic assembler lexer interface, for use by target specific assembly 118 /// lexers. 119 class MCAsmLexer { 120 /// The current token, stored in the base class for faster access. 121 SmallVector<AsmToken, 1> CurTok; 122 123 /// The location and description of the current error 124 SMLoc ErrLoc; 125 std::string Err; 126 127 MCAsmLexer(const MCAsmLexer &) = delete; 128 void operator=(const MCAsmLexer &) = delete; 129 protected: // Can only create subclasses. 130 const char *TokStart; 131 bool SkipSpace; 132 bool AllowAtInIdentifier; 133 134 MCAsmLexer(); 135 136 virtual AsmToken LexToken() = 0; 137 138 void SetError(SMLoc errLoc, const std::string &err) { 139 ErrLoc = errLoc; 140 Err = err; 141 } 142 143 public: 144 virtual ~MCAsmLexer(); 145 146 /// Consume the next token from the input stream and return it. 147 /// 148 /// The lexer will continuosly return the end-of-file token once the end of 149 /// the main input file has been reached. 150 const AsmToken &Lex() { 151 assert(!CurTok.empty()); 152 CurTok.erase(CurTok.begin()); 153 if (CurTok.empty()) 154 CurTok.emplace_back(LexToken()); 155 return CurTok.front(); 156 } 157 158 void UnLex(AsmToken const &Token) { 159 CurTok.insert(CurTok.begin(), Token); 160 } 161 162 virtual StringRef LexUntilEndOfStatement() = 0; 163 164 /// Get the current source location. 165 SMLoc getLoc() const; 166 167 /// Get the current (last) lexed token. 168 const AsmToken &getTok() const { 169 return CurTok[0]; 170 } 171 172 /// Look ahead at the next token to be lexed. 173 const AsmToken peekTok(bool ShouldSkipSpace = true) { 174 AsmToken Tok; 175 176 MutableArrayRef<AsmToken> Buf(Tok); 177 size_t ReadCount = peekTokens(Buf, ShouldSkipSpace); 178 179 assert(ReadCount == 1); 180 (void)ReadCount; 181 182 return Tok; 183 } 184 185 /// Look ahead an arbitrary number of tokens. 186 virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf, 187 bool ShouldSkipSpace = true) = 0; 188 189 /// Get the current error location 190 SMLoc getErrLoc() { 191 return ErrLoc; 192 } 193 194 /// Get the current error string 195 const std::string &getErr() { 196 return Err; 197 } 198 199 /// Get the kind of current token. 200 AsmToken::TokenKind getKind() const { return getTok().getKind(); } 201 202 /// Check if the current token has kind \p K. 203 bool is(AsmToken::TokenKind K) const { return getTok().is(K); } 204 205 /// Check if the current token has kind \p K. 206 bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); } 207 208 /// Set whether spaces should be ignored by the lexer 209 void setSkipSpace(bool val) { SkipSpace = val; } 210 211 bool getAllowAtInIdentifier() { return AllowAtInIdentifier; } 212 void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; } 213 }; 214 215 } // End llvm namespace 216 217 #endif 218