1 //===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_MC_MCASMLEXER_H 11 #define LLVM_MC_MCASMLEXER_H 12 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/Support/DataTypes.h" 15 #include "llvm/Support/SMLoc.h" 16 17 namespace llvm { 18 class MCAsmLexer; 19 class MCInst; 20 class Target; 21 22 /// AsmToken - Target independent representation for an assembler token. 23 class AsmToken { 24 public: 25 enum TokenKind { 26 // Markers 27 Eof, Error, 28 29 // String values. 30 Identifier, 31 String, 32 33 // Integer values. 34 Integer, 35 36 // Real values. 37 Real, 38 39 // Register values (stored in IntVal). Only used by TargetAsmLexer. 40 Register, 41 42 // No-value. 43 EndOfStatement, 44 Colon, 45 Plus, Minus, Tilde, 46 Slash, // '/' 47 BackSlash, // '\' 48 LParen, RParen, LBrac, RBrac, LCurly, RCurly, 49 Star, Dot, Comma, Dollar, Equal, EqualEqual, 50 51 Pipe, PipePipe, Caret, 52 Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash, 53 Less, LessEqual, LessLess, LessGreater, 54 Greater, GreaterEqual, GreaterGreater, At 55 }; 56 57 TokenKind Kind; 58 59 /// A reference to the entire token contents; this is always a pointer into 60 /// a memory buffer owned by the source manager. 61 StringRef Str; 62 63 int64_t IntVal; 64 65 public: 66 AsmToken() {} 67 AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0) 68 : Kind(_Kind), Str(_Str), IntVal(_IntVal) {} 69 70 TokenKind getKind() const { return Kind; } 71 bool is(TokenKind K) const { return Kind == K; } 72 bool isNot(TokenKind K) const { return Kind != K; } 73 74 SMLoc getLoc() const; 75 76 /// getStringContents - Get the contents of a string token (without quotes). 77 StringRef getStringContents() const { 78 assert(Kind == String && "This token isn't a string!"); 79 return Str.slice(1, Str.size() - 1); 80 } 81 82 /// getIdentifier - Get the identifier string for the current token, which 83 /// should be an identifier or a string. This gets the portion of the string 84 /// which should be used as the identifier, e.g., it does not include the 85 /// quotes on strings. 86 StringRef getIdentifier() const { 87 if (Kind == Identifier) 88 return getString(); 89 return getStringContents(); 90 } 91 92 /// getString - Get the string for the current token, this includes all 93 /// characters (for example, the quotes on strings) in the token. 94 /// 95 /// The returned StringRef points into the source manager's memory buffer, and 96 /// is safe to store across calls to Lex(). 97 StringRef getString() const { return Str; } 98 99 // FIXME: Don't compute this in advance, it makes every token larger, and is 100 // also not generally what we want (it is nicer for recovery etc. to lex 123br 101 // as a single token, then diagnose as an invalid number). 102 int64_t getIntVal() const { 103 assert(Kind == Integer && "This token isn't an integer!"); 104 return IntVal; 105 } 106 107 /// getRegVal - Get the register number for the current token, which should 108 /// be a register. 109 unsigned getRegVal() const { 110 assert(Kind == Register && "This token isn't a register!"); 111 return static_cast<unsigned>(IntVal); 112 } 113 }; 114 115 /// MCAsmLexer - Generic assembler lexer interface, for use by target specific 116 /// assembly lexers. 117 class MCAsmLexer { 118 /// The current token, stored in the base class for faster access. 119 AsmToken CurTok; 120 121 /// The location and description of the current error 122 SMLoc ErrLoc; 123 std::string Err; 124 125 MCAsmLexer(const MCAsmLexer &); // DO NOT IMPLEMENT 126 void operator=(const MCAsmLexer &); // DO NOT IMPLEMENT 127 protected: // Can only create subclasses. 128 const char *TokStart; 129 130 MCAsmLexer(); 131 132 virtual AsmToken LexToken() = 0; 133 134 void SetError(const SMLoc &errLoc, const std::string &err) { 135 ErrLoc = errLoc; 136 Err = err; 137 } 138 139 public: 140 virtual ~MCAsmLexer(); 141 142 /// Lex - Consume the next token from the input stream and return it. 143 /// 144 /// The lexer will continuosly return the end-of-file token once the end of 145 /// the main input file has been reached. 146 const AsmToken &Lex() { 147 return CurTok = LexToken(); 148 } 149 150 virtual StringRef LexUntilEndOfStatement() = 0; 151 152 /// getLoc - Get the current source location. 153 SMLoc getLoc() const; 154 155 /// getTok - Get the current (last) lexed token. 156 const AsmToken &getTok() { 157 return CurTok; 158 } 159 160 /// getErrLoc - Get the current error location 161 const SMLoc &getErrLoc() { 162 return ErrLoc; 163 } 164 165 /// getErr - Get the current error string 166 const std::string &getErr() { 167 return Err; 168 } 169 170 /// getKind - Get the kind of current token. 171 AsmToken::TokenKind getKind() const { return CurTok.getKind(); } 172 173 /// is - Check if the current token has kind \arg K. 174 bool is(AsmToken::TokenKind K) const { return CurTok.is(K); } 175 176 /// isNot - Check if the current token has kind \arg K. 177 bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); } 178 }; 179 180 } // End llvm namespace 181 182 #endif 183