1 /** \file 2 * \brief Defines the interface for a common token. 3 * 4 * All token streams should provide their tokens using an instance 5 * of this common token. A custom pointer is provided, wher you may attach 6 * a further structure to enhance the common token if you feel the need 7 * to do so. The C runtime will assume that a token provides implementations 8 * of the interface functions, but all of them may be rplaced by your own 9 * implementation if you require it. 10 */ 11 #ifndef _ANTLR3_COMMON_TOKEN_HPP 12 #define _ANTLR3_COMMON_TOKEN_HPP 13 14 // [The "BSD licence"] 15 // Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB 16 17 // 18 // All rights reserved. 19 // 20 // Redistribution and use in source and binary forms, with or without 21 // modification, are permitted provided that the following conditions 22 // are met: 23 // 1. Redistributions of source code must retain the above copyright 24 // notice, this list of conditions and the following disclaimer. 25 // 2. Redistributions in binary form must reproduce the above copyright 26 // notice, this list of conditions and the following disclaimer in the 27 // documentation and/or other materials provided with the distribution. 28 // 3. The name of the author may not be used to endorse or promote products 29 // derived from this software without specific prior written permission. 30 // 31 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 32 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 33 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 34 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 35 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 36 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 37 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 38 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 39 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 40 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 41 42 #include <stdlib.h> 43 44 #include "antlr3defs.hpp" 45 46 ANTLR_BEGIN_NAMESPACE() 47 48 /** The definition of an ANTLR3 common token structure, which all implementations 49 * of a token stream should provide, installing any further structures in the 50 * custom pointer element of this structure. 51 * 52 * \remark 53 * Token streams are in essence provided by lexers or other programs that serve 54 * as lexers. 55 */ 56 57 template<class ImplTraits> 58 class CommonToken : public ImplTraits::AllocPolicyType 59 { 60 public: 61 /* Base token types, which all lexer/parser tokens come after in sequence. 62 */ 63 enum TOKEN_TYPE 64 { 65 /** Indicator of an invalid token 66 */ 67 TOKEN_INVALID = 0 68 , EOR_TOKEN_TYPE 69 /** Imaginary token type to cause a traversal of child nodes in a tree parser 70 */ 71 , TOKEN_DOWN 72 /** Imaginary token type to signal the end of a stream of child nodes. 73 */ 74 , TOKEN_UP 75 /** First token that can be used by users/generated code 76 */ 77 , MIN_TOKEN_TYPE = TOKEN_UP + 1 78 79 /** End of file token 80 */ 81 , TOKEN_EOF = (ANTLR_CHARSTREAM_EOF & 0xFFFFFFFF) 82 }; 83 84 typedef typename ImplTraits::TokenIntStreamType TokenIntStreamType; 85 typedef typename ImplTraits::StringType StringType; 86 typedef typename ImplTraits::InputStreamType InputStreamType; 87 typedef typename ImplTraits::StreamDataType StreamDataType; 88 89 private: 90 /** The actual type of this token 91 */ 92 ANTLR_UINT32 m_type; 93 94 /** The virtual channel that this token exists in. 95 */ 96 ANTLR_UINT32 m_channel; 97 98 mutable StringType m_tokText; 99 100 /** The offset into the input stream that the line in which this 101 * token resides starts. 102 */ 103 const StreamDataType* m_lineStart; 104 105 /** The line number in the input stream where this token was derived from 106 */ 107 ANTLR_UINT32 m_line; 108 109 /** The character position in the line that this token was derived from 110 */ 111 ANTLR_INT32 m_charPositionInLine; 112 113 /** Pointer to the input stream that this token originated in. 114 */ 115 InputStreamType* m_input; 116 117 /** What the index of this token is, 0, 1, .., n-2, n-1 tokens 118 */ 119 ANTLR_MARKER m_index; 120 121 /** The character offset in the input stream where the text for this token 122 * starts. 123 */ 124 ANTLR_MARKER m_startIndex; 125 126 /** The character offset in the input stream where the text for this token 127 * stops. 128 */ 129 ANTLR_MARKER m_stopIndex; 130 131 public: 132 CommonToken(); 133 CommonToken(ANTLR_UINT32 type); 134 CommonToken(TOKEN_TYPE type); 135 CommonToken( const CommonToken& ctoken ); 136 137 CommonToken& operator=( const CommonToken& ctoken ); 138 bool operator==( const CommonToken& ctoken ) const; 139 bool operator<( const CommonToken& ctoken ) const; 140 141 InputStreamType* get_input() const; 142 ANTLR_MARKER get_index() const; 143 void set_index( ANTLR_MARKER index ); 144 void set_input( InputStreamType* input ); 145 146 /* ============================== 147 * API 148 */ 149 150 /** Function that returns the text pointer of a token, use 151 * toString() if you want a pANTLR3_STRING version of the token. 152 */ 153 StringType getText() const; 154 155 /** Pointer to a function that 'might' be able to set the text associated 156 * with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually 157 * do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actaully have 158 * strings associated with them but just point into the current input stream. These 159 * tokens will implement this function with a function that errors out (probably 160 * drastically. 161 */ 162 void set_tokText( const StringType& text ); 163 164 /** Pointer to a function that 'might' be able to set the text associated 165 * with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually 166 * do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actully have 167 * strings associated with them but just point into the current input stream. These 168 * tokens will implement this function with a function that errors out (probably 169 * drastically. 170 */ 171 void setText(ANTLR_UINT8* text); 172 void setText(const char* text); 173 174 /** Pointer to a function that returns the token type of this token 175 */ 176 ANTLR_UINT32 get_type() const; 177 ANTLR_UINT32 getType() const; 178 179 /** Pointer to a function that sets the type of this token 180 */ 181 void set_type(ANTLR_UINT32 ttype); 182 183 /** Pointer to a function that gets the 'line' number where this token resides 184 */ 185 ANTLR_UINT32 get_line() const; 186 187 /** Pointer to a function that sets the 'line' number where this token reside 188 */ 189 void set_line(ANTLR_UINT32 line); 190 191 /** Pointer to a function that gets the offset in the line where this token exists 192 */ 193 ANTLR_INT32 get_charPositionInLine() const; 194 ANTLR_INT32 getCharPositionInLine() const; 195 196 /** Pointer to a function that sets the offset in the line where this token exists 197 */ 198 void set_charPositionInLine(ANTLR_INT32 pos); 199 200 /** Pointer to a function that gets the channel that this token was placed in (parsers 201 * can 'tune' to these channels. 202 */ 203 ANTLR_UINT32 get_channel() const; 204 205 /** Pointer to a function that sets the channel that this token should belong to 206 */ 207 void set_channel(ANTLR_UINT32 channel); 208 209 /** Pointer to a function that returns an index 0...n-1 of the token in the token 210 * input stream. 211 */ 212 ANTLR_MARKER get_tokenIndex() const; 213 214 /** Pointer to a function that can set the token index of this token in the token 215 * input stream. 216 */ 217 void set_tokenIndex(ANTLR_MARKER tokenIndex); 218 219 /** Pointer to a function that gets the start index in the input stream for this token. 220 */ 221 ANTLR_MARKER get_startIndex() const; 222 223 /** Pointer to a function that sets the start index in the input stream for this token. 224 */ 225 void set_startIndex(ANTLR_MARKER index); 226 227 /** Pointer to a function that gets the stop index in the input stream for this token. 228 */ 229 ANTLR_MARKER get_stopIndex() const; 230 231 /** Pointer to a function that sets the stop index in the input stream for this token. 232 */ 233 void set_stopIndex(ANTLR_MARKER index); 234 const StreamDataType* get_lineStart() const; 235 void set_lineStart( const StreamDataType* lineStart ); 236 237 /** Pointer to a function that returns this token as a text representation that can be 238 * printed with embedded control codes such as \n replaced with the printable sequence "\\n" 239 * This also yields a string structure that can be used more easily than the pointer to 240 * the input stream in certain situations. 241 */ 242 StringType toString() const; 243 244 }; 245 246 ANTLR_END_NAMESPACE() 247 248 #include "antlr3commontoken.inl" 249 250 #endif 251