1 /** \file 2 * \brief Defines the interface for a common token. 3 * 4 * All token streams should provide their tokens using an instance 5 * of this common token. A custom pointer is provided, wher you may attach 6 * a further structure to enhance the common token if you feel the need 7 * to do so. The C runtime will assume that a token provides implementations 8 * of the interface functions, but all of them may be rplaced by your own 9 * implementation if you require it. 10 */ 11 #ifndef _ANTLR3_COMMON_TOKEN_H 12 #define _ANTLR3_COMMON_TOKEN_H 13 14 // [The "BSD licence"] 15 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC 16 // http://www.temporal-wave.com 17 // http://www.linkedin.com/in/jimidle 18 // 19 // All rights reserved. 20 // 21 // Redistribution and use in source and binary forms, with or without 22 // modification, are permitted provided that the following conditions 23 // are met: 24 // 1. Redistributions of source code must retain the above copyright 25 // notice, this list of conditions and the following disclaimer. 26 // 2. Redistributions in binary form must reproduce the above copyright 27 // notice, this list of conditions and the following disclaimer in the 28 // documentation and/or other materials provided with the distribution. 29 // 3. The name of the author may not be used to endorse or promote products 30 // derived from this software without specific prior written permission. 31 // 32 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 33 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 34 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 35 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 36 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 37 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 38 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 39 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 40 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 41 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 43 #include <antlr3defs.h> 44 45 /** How many tokens to allocate at once in the token factory 46 */ 47 #define ANTLR3_FACTORY_POOL_SIZE 1024 48 49 /* Base token types, which all lexer/parser tokens come after in sequence. 50 */ 51 52 /** Indicator of an invalid token 53 */ 54 #define ANTLR3_TOKEN_INVALID 0 55 56 #define ANTLR3_EOR_TOKEN_TYPE 1 57 58 /** Imaginary token type to cause a traversal of child nodes in a tree parser 59 */ 60 #define ANTLR3_TOKEN_DOWN 2 61 62 /** Imaginary token type to signal the end of a stream of child nodes. 63 */ 64 #define ANTLR3_TOKEN_UP 3 65 66 /** First token that can be used by users/generated code 67 */ 68 69 #define ANTLR3_MIN_TOKEN_TYPE ANTLR3_TOKEN_UP + 1 70 71 /** End of file token 72 */ 73 #define ANTLR3_TOKEN_EOF (ANTLR3_CHARSTREAM_EOF & 0xFFFFFFFF) 74 75 /** Default channel for a token 76 */ 77 #define ANTLR3_TOKEN_DEFAULT_CHANNEL 0 78 79 /** Reserved channel number for a HIDDEN token - a token that 80 * is hidden from the parser. 81 */ 82 #define HIDDEN 99 83 84 #ifdef __cplusplus 85 extern "C" { 86 #endif 87 88 // Indicates whether this token is carrying: 89 // 90 // State | Meaning 91 // ------+-------------------------------------- 92 // 0 | Nothing (neither rewrite text, nor setText) 93 // 1 | char * to user supplied rewrite text 94 // 2 | pANTLR3_STRING because of setText or similar action 95 // 96 #define ANTLR3_TEXT_NONE 0 97 #define ANTLR3_TEXT_CHARP 1 98 #define ANTLR3_TEXT_STRING 2 99 100 /** The definition of an ANTLR3 common token structure, which all implementations 101 * of a token stream should provide, installing any further structures in the 102 * custom pointer element of this structure. 103 * 104 * \remark 105 * Token streams are in essence provided by lexers or other programs that serve 106 * as lexers. 107 */ 108 typedef struct ANTLR3_COMMON_TOKEN_struct 109 { 110 /** The actual type of this token 111 */ 112 ANTLR3_UINT32 type; 113 114 /** Indicates that a token was produced from the token factory and therefore 115 * the the freeToken() method should not do anything itself because 116 * token factory is responsible for deleting it. 117 */ 118 ANTLR3_BOOLEAN factoryMade; 119 120 /// A string factory that we can use if we ever need the text of a token 121 /// and need to manufacture a pANTLR3_STRING 122 /// 123 pANTLR3_STRING_FACTORY strFactory; 124 125 /** The line number in the input stream where this token was derived from 126 */ 127 ANTLR3_UINT32 line; 128 129 /** The offset into the input stream that the line in which this 130 * token resides starts. 131 */ 132 void * lineStart; 133 134 /** The character position in the line that this token was derived from 135 */ 136 ANTLR3_INT32 charPosition; 137 138 /** The virtual channel that this token exists in. 139 */ 140 ANTLR3_UINT32 channel; 141 142 /** Pointer to the input stream that this token originated in. 143 */ 144 pANTLR3_INPUT_STREAM input; 145 146 /** What the index of this token is, 0, 1, .., n-2, n-1 tokens 147 */ 148 ANTLR3_MARKER index; 149 150 /** The character offset in the input stream where the text for this token 151 * starts. 152 */ 153 ANTLR3_MARKER start; 154 155 /** The character offset in the input stream where the text for this token 156 * stops. 157 */ 158 ANTLR3_MARKER stop; 159 160 /// Indicates whether this token is carrying: 161 /// 162 /// State | Meaning 163 /// ------+-------------------------------------- 164 /// 0 | Nothing (neither rewrite text, nor setText) 165 /// 1 | char * to user supplied rewrite text 166 /// 2 | pANTLR3_STRING because of setText or similar action 167 /// 168 /// Affects the union structure tokText below 169 /// (uses 32 bit so alignment is always good) 170 /// 171 ANTLR3_UINT32 textState; 172 173 union 174 { 175 /// Pointer that is used when the token just has a pointer to 176 /// a char *, such as when a rewrite of an imaginary token supplies 177 /// a string in the grammar. No sense in constructing a pANTLR3_STRING just 178 /// for that, as mostly the text will not be accessed - if it is, then 179 /// we will build a pANTLR3_STRING for it a that point. 180 /// 181 pANTLR3_UCHAR chars; 182 183 /// Some token types actually do carry around their associated text, hence 184 /// (*getText)() will return this pointer if it is not NULL 185 /// 186 pANTLR3_STRING text; 187 } 188 tokText; 189 190 /** Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN 191 * as the standard structure for a token, a number of user programmable 192 * elements are allowed in a token. This is one of them. 193 */ 194 ANTLR3_UINT32 user1; 195 196 /** Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN 197 * as the standard structure for a token, a number of user programmable 198 * elements are allowed in a token. This is one of them. 199 */ 200 ANTLR3_UINT32 user2; 201 202 /** Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN 203 * as the standard structure for a token, a number of user programmable 204 * elements are allowed in a token. This is one of them. 205 */ 206 ANTLR3_UINT32 user3; 207 208 /** Pointer to a custom element that the ANTLR3 programmer may define and install 209 */ 210 void * custom; 211 212 /** Pointer to a function that knows how to free the custom structure when the 213 * token is destroyed. 214 */ 215 void (*freeCustom)(void * custom); 216 217 /* ============================== 218 * API 219 */ 220 221 /** Pointer to function that returns the text pointer of a token, use 222 * toString() if you want a pANTLR3_STRING version of the token. 223 */ 224 pANTLR3_STRING (*getText)(struct ANTLR3_COMMON_TOKEN_struct * token); 225 226 /** Pointer to a function that 'might' be able to set the text associated 227 * with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually 228 * do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actaully have 229 * strings associated with them but just point into the current input stream. These 230 * tokens will implement this function with a function that errors out (probably 231 * drastically. 232 */ 233 void (*setText)(struct ANTLR3_COMMON_TOKEN_struct * token, pANTLR3_STRING text); 234 235 /** Pointer to a function that 'might' be able to set the text associated 236 * with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually 237 * do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actully have 238 * strings associated with them but just point into the current input stream. These 239 * tokens will implement this function with a function that errors out (probably 240 * drastically. 241 */ 242 void (*setText8)(struct ANTLR3_COMMON_TOKEN_struct * token, pANTLR3_UINT8 text); 243 244 /** Pointer to a function that returns the token type of this token 245 */ 246 ANTLR3_UINT32 (*getType)(struct ANTLR3_COMMON_TOKEN_struct * token); 247 248 /** Pointer to a function that sets the type of this token 249 */ 250 void (*setType)(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 ttype); 251 252 /** Pointer to a function that gets the 'line' number where this token resides 253 */ 254 ANTLR3_UINT32 (*getLine)(struct ANTLR3_COMMON_TOKEN_struct * token); 255 256 /** Pointer to a function that sets the 'line' number where this token reside 257 */ 258 void (*setLine)(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 line); 259 260 /** Pointer to a function that gets the offset in the line where this token exists 261 */ 262 ANTLR3_INT32 (*getCharPositionInLine) (struct ANTLR3_COMMON_TOKEN_struct * token); 263 264 /** Pointer to a function that sets the offset in the line where this token exists 265 */ 266 void (*setCharPositionInLine) (struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_INT32 pos); 267 268 /** Pointer to a function that gets the channel that this token was placed in (parsers 269 * can 'tune' to these channels. 270 */ 271 ANTLR3_UINT32 (*getChannel) (struct ANTLR3_COMMON_TOKEN_struct * token); 272 273 /** Pointer to a function that sets the channel that this token should belong to 274 */ 275 void (*setChannel) (struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 channel); 276 277 /** Pointer to a function that returns an index 0...n-1 of the token in the token 278 * input stream. 279 */ 280 ANTLR3_MARKER (*getTokenIndex) (struct ANTLR3_COMMON_TOKEN_struct * token); 281 282 /** Pointer to a function that can set the token index of this token in the token 283 * input stream. 284 */ 285 void (*setTokenIndex) (struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER); 286 287 /** Pointer to a function that gets the start index in the input stream for this token. 288 */ 289 ANTLR3_MARKER (*getStartIndex) (struct ANTLR3_COMMON_TOKEN_struct * token); 290 291 /** Pointer to a function that sets the start index in the input stream for this token. 292 */ 293 void (*setStartIndex) (struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER index); 294 295 /** Pointer to a function that gets the stop index in the input stream for this token. 296 */ 297 ANTLR3_MARKER (*getStopIndex) (struct ANTLR3_COMMON_TOKEN_struct * token); 298 299 /** Pointer to a function that sets the stop index in the input stream for this token. 300 */ 301 void (*setStopIndex) (struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER index); 302 303 /** Pointer to a function that returns this token as a text representation that can be 304 * printed with embedded control codes such as \n replaced with the printable sequence "\\n" 305 * This also yields a string structure that can be used more easily than the pointer to 306 * the input stream in certain situations. 307 */ 308 pANTLR3_STRING (*toString) (struct ANTLR3_COMMON_TOKEN_struct * token); 309 } 310 ANTLR3_COMMON_TOKEN; 311 312 /** \brief ANTLR3 Token factory interface to create lots of tokens efficiently 313 * rather than creating and freeing lots of little bits of memory. 314 */ 315 typedef struct ANTLR3_TOKEN_FACTORY_struct 316 { 317 /** Pointers to the array of tokens that this factory has produced so far 318 */ 319 pANTLR3_COMMON_TOKEN *pools; 320 321 /** Current pool tokens we are allocating from 322 */ 323 ANTLR3_INT32 thisPool; 324 325 /** Maximum pool count we have available 326 */ 327 ANTLR3_INT32 maxPool; 328 329 /** The next token to throw out from the pool, will cause a new pool allocation 330 * if this exceeds the available tokenCount 331 */ 332 ANTLR3_UINT32 nextToken; 333 334 /** Trick to initialize tokens and their API quickly, we set up this token when the 335 * factory is created, then just copy the memory it uses into the new token. 336 */ 337 ANTLR3_COMMON_TOKEN unTruc; 338 339 /** Pointer to an input stream that is using this token factory (may be NULL) 340 * which will be assigned to the tokens automatically. 341 */ 342 pANTLR3_INPUT_STREAM input; 343 344 /** Pointer to a function that returns a new token 345 */ 346 pANTLR3_COMMON_TOKEN (*newToken) (struct ANTLR3_TOKEN_FACTORY_struct * factory); 347 348 /** Pointer to a function that resets the factory so you can reuse the pools it 349 * has laready allocated 350 */ 351 void (*reset) (struct ANTLR3_TOKEN_FACTORY_struct * factory); 352 353 /** Pointer to a function that changes teh curent inptu stream so that 354 * new tokens are created with reference to their originating text. 355 */ 356 void (*setInputStream) (struct ANTLR3_TOKEN_FACTORY_struct * factory, pANTLR3_INPUT_STREAM input); 357 /** Pointer to a function the destroys the factory 358 */ 359 void (*close) (struct ANTLR3_TOKEN_FACTORY_struct * factory); 360 } 361 ANTLR3_TOKEN_FACTORY; 362 363 #ifdef __cplusplus 364 } 365 #endif 366 367 #endif 368