1 /** \file 2 * Defines the interface for an ANTLR3 common token stream. Custom token streams should create 3 * one of these and then override any functions by installing their own pointers 4 * to implement the various functions. 5 */ 6 #ifndef _ANTLR3_TOKENSTREAM_H 7 #define _ANTLR3_TOKENSTREAM_H 8 9 // [The "BSD licence"] 10 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC 11 // http://www.temporal-wave.com 12 // http://www.linkedin.com/in/jimidle 13 // 14 // All rights reserved. 15 // 16 // Redistribution and use in source and binary forms, with or without 17 // modification, are permitted provided that the following conditions 18 // are met: 19 // 1. Redistributions of source code must retain the above copyright 20 // notice, this list of conditions and the following disclaimer. 21 // 2. Redistributions in binary form must reproduce the above copyright 22 // notice, this list of conditions and the following disclaimer in the 23 // documentation and/or other materials provided with the distribution. 24 // 3. The name of the author may not be used to endorse or promote products 25 // derived from this software without specific prior written permission. 26 // 27 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 28 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 29 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 30 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 31 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 32 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 33 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 34 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 35 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 36 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37 38 #include <antlr3defs.h> 39 #include <antlr3string.h> 40 #include <antlr3collections.h> 41 #include <antlr3input.h> 42 #include <antlr3commontoken.h> 43 #include <antlr3bitset.h> 44 #include <antlr3debugeventlistener.h> 45 46 #ifdef __cplusplus 47 extern "C" { 48 #endif 49 50 /** Definition of a token source, which has a pointer to a function that 51 * returns the next token (using a token factory if it is going to be 52 * efficient) and a pointer to an ANTLR3_INPUT_STREAM. This is slightly 53 * different to the Java interface because we have no way to implement 54 * multiple interfaces without defining them in the interface structure 55 * or casting (void *), which is too convoluted. 56 */ 57 typedef struct ANTLR3_TOKEN_SOURCE_struct 58 { 59 /** Pointer to a function that returns the next token in the stream. 60 */ 61 pANTLR3_COMMON_TOKEN (*nextToken)(struct ANTLR3_TOKEN_SOURCE_struct * tokenSource); 62 63 /** Whoever is providing tokens, needs to provide a string factory too 64 */ 65 pANTLR3_STRING_FACTORY strFactory; 66 67 /** A special pre-allocated token, which signifies End Of Tokens. Because this must 68 * be set up with the current input index and so on, we embed the structure and 69 * return the address of it. It is marked as factoryMade, so that it is never 70 * attempted to be freed. 71 */ 72 ANTLR3_COMMON_TOKEN eofToken; 73 74 /// A special pre-allocated token, which is returned by mTokens() if the 75 /// lexer rule said to just skip the generated token altogether. 76 /// Having this single token stops us wasting memory by have the token factory 77 /// actually create something that we are going to SKIP(); anyway. 78 /// 79 ANTLR3_COMMON_TOKEN skipToken; 80 81 /** Whatever is supplying the token source interface, needs a pointer to 82 * itself so that this pointer can be passed to it when the nextToken 83 * function is called. 84 */ 85 void * super; 86 87 /** When the token source is constructed, it is populated with the file 88 * name from whence the tokens were produced by the lexer. This pointer is a 89 * copy of the one supplied by the CharStream (and may be NULL) so should 90 * not be manipulated other than to copy or print it. 91 */ 92 pANTLR3_STRING fileName; 93 } 94 ANTLR3_TOKEN_SOURCE; 95 96 /** Definition of the ANTLR3 common token stream interface. 97 * \remark 98 * Much of the documentation for this interface is stolen from Ter's Java implementation. 99 */ 100 typedef struct ANTLR3_TOKEN_STREAM_struct 101 { 102 /** Pointer to the token source for this stream 103 */ 104 pANTLR3_TOKEN_SOURCE tokenSource; 105 106 /** Whatever is providing this interface needs a pointer to itself 107 * so that this can be passed back to it whenever the api functions 108 * are called. 109 */ 110 void * super; 111 112 /** All input streams implement the ANTLR3_INT_STREAM interface... 113 */ 114 pANTLR3_INT_STREAM istream; 115 116 /// Debugger interface, is this is a debugging token stream 117 /// 118 pANTLR3_DEBUG_EVENT_LISTENER debugger; 119 120 /// Indicates the initial stream state for dbgConsume() 121 /// 122 ANTLR3_BOOLEAN initialStreamState; 123 124 /** Get Token at current input pointer + i ahead where i=1 is next Token. 125 * i<0 indicates tokens in the past. So -1 is previous token and -2 is 126 * two tokens ago. LT(0) is undefined. For i>=n, return Token.EOFToken. 127 * Return null for LT(0) and any index that results in an absolute address 128 * that is negative. 129 */ 130 pANTLR3_COMMON_TOKEN (*_LT) (struct ANTLR3_TOKEN_STREAM_struct * tokenStream, ANTLR3_INT32 k); 131 132 /** Get a token at an absolute index i; 0..n-1. This is really only 133 * needed for profiling and debugging and token stream rewriting. 134 * If you don't want to buffer up tokens, then this method makes no 135 * sense for you. Naturally you can't use the rewrite stream feature. 136 * I believe DebugTokenStream can easily be altered to not use 137 * this method, removing the dependency. 138 */ 139 pANTLR3_COMMON_TOKEN (*get) (struct ANTLR3_TOKEN_STREAM_struct * tokenStream, ANTLR3_UINT32 i); 140 141 /** Where is this stream pulling tokens from? This is not the name, but 142 * a pointer into an interface that contains a ANTLR3_TOKEN_SOURCE interface. 143 * The Token Source interface contains a pointer to the input stream and a pointer 144 * to a function that returns the next token. 145 */ 146 pANTLR3_TOKEN_SOURCE (*getTokenSource) (struct ANTLR3_TOKEN_STREAM_struct * tokenStream); 147 148 /** Function that installs a token source for teh stream 149 */ 150 void (*setTokenSource) (struct ANTLR3_TOKEN_STREAM_struct * tokenStream, 151 pANTLR3_TOKEN_SOURCE tokenSource); 152 153 /** Return the text of all the tokens in the stream, as the old tramp in 154 * Leeds market used to say; "Get the lot!" 155 */ 156 pANTLR3_STRING (*toString) (struct ANTLR3_TOKEN_STREAM_struct * tokenStream); 157 158 /** Return the text of all tokens from start to stop, inclusive. 159 * If the stream does not buffer all the tokens then it can just 160 * return an empty ANTLR3_STRING or NULL; Grammars should not access $ruleLabel.text in 161 * an action in that case. 162 */ 163 pANTLR3_STRING (*toStringSS) (struct ANTLR3_TOKEN_STREAM_struct * tokenStream, ANTLR3_UINT32 start, ANTLR3_UINT32 stop); 164 165 /** Because the user is not required to use a token with an index stored 166 * in it, we must provide a means for two token objects themselves to 167 * indicate the start/end location. Most often this will just delegate 168 * to the other toString(int,int). This is also parallel with 169 * the pTREENODE_STREAM->toString(Object,Object). 170 */ 171 pANTLR3_STRING (*toStringTT) (struct ANTLR3_TOKEN_STREAM_struct * tokenStream, pANTLR3_COMMON_TOKEN start, pANTLR3_COMMON_TOKEN stop); 172 173 174 /** Function that sets the token stream into debugging mode 175 */ 176 void (*setDebugListener) (struct ANTLR3_TOKEN_STREAM_struct * tokenStream, pANTLR3_DEBUG_EVENT_LISTENER debugger); 177 178 179 180 /** Function that knows how to free the memory for an ANTLR3_TOKEN_STREAM 181 */ 182 void (*free) (struct ANTLR3_TOKEN_STREAM_struct * tokenStream); 183 } 184 ANTLR3_TOKEN_STREAM; 185 186 /** Common token stream is an implementation of ANTLR_TOKEN_STREAM for the default 187 * parsers and recognizers. You may of course build your own implementation if 188 * you are so inclined. 189 */ 190 typedef struct ANTLR3_COMMON_TOKEN_STREAM_struct 191 { 192 /** The ANTLR3_TOKEN_STREAM interface implementation, which also includes 193 * the intstream implementation. We could duplicate the pANTLR_INT_STREAM 194 * in this interface and initialize it to a copy, but this could be confusing 195 * it just results in one more level of indirection and I think that with 196 * judicial use of 'const' later, the optimizer will do decent job. 197 */ 198 pANTLR3_TOKEN_STREAM tstream; 199 200 /** Whatever is supplying the COMMON_TOKEN_STREAM needs a pointer to itself 201 * so that this can be accessed by any of the API functions which it implements. 202 */ 203 void * super; 204 205 /** Records every single token pulled from the source indexed by the token index. 206 * There might be more efficient ways to do this, such as referencing directly in to 207 * the token factory pools, but for now this is convenient and the ANTLR3_LIST is not 208 * a huge overhead as it only stores pointers anyway, but allows for iterations and 209 * so on. 210 */ 211 pANTLR3_VECTOR tokens; 212 213 /** Override map of tokens. If a token type has an entry in here, then 214 * the pointer in the table points to an int, being the override channel number 215 * that should always be used for this token type. 216 */ 217 pANTLR3_LIST channelOverrides; 218 219 /** Discared set. If a token has an entry in this table, then it is thrown 220 * away (data pointer is always NULL). 221 */ 222 pANTLR3_LIST discardSet; 223 224 /* The channel number that this token stream is tuned to. For instance, whitespace 225 * is usually tuned to channel 99, which no token stream would normally tune to and 226 * so it is thrown away. 227 */ 228 ANTLR3_UINT32 channel; 229 230 /** If this flag is set to ANTLR3_TRUE, then tokens that the stream sees that are not 231 * in the channel that this stream is tuned to, are not tracked in the 232 * tokens table. When set to false, ALL tokens are added to the tracking. 233 */ 234 ANTLR3_BOOLEAN discardOffChannel; 235 236 /** The index into the tokens list of the current token (the next one that will be 237 * consumed. p = -1 indicates that the token list is empty. 238 */ 239 ANTLR3_INT32 p; 240 241 /** A simple filter mechanism whereby you can tell this token stream 242 * to force all tokens of type ttype to be on channel. For example, 243 * when interpreting, we cannot exec actions so we need to tell 244 * the stream to force all WS and NEWLINE to be a different, ignored 245 * channel. 246 */ 247 void (*setTokenTypeChannel) (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream, 248 ANTLR3_UINT32 ttype, ANTLR3_UINT32 channel); 249 250 /** Add a particular token type to the discard set. If a token is found to belong 251 * to this set, then it is skipped/thrown away 252 */ 253 void (*discardTokenType) (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream, ANTLR3_INT32 ttype); 254 255 /** Signal to discard off channel tokens from here on in. 256 */ 257 void (*discardOffChannelToks)(struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream, ANTLR3_BOOLEAN discard); 258 259 /** Function that returns a pointer to the ANTLR3_LIST of all tokens 260 * in the stream (this causes the buffer to fill if we have not get any yet) 261 */ 262 pANTLR3_VECTOR (*getTokens) (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream); 263 264 /** Function that returns all the tokens between a start and a stop index. 265 * TODO: This is a new list (Ack! Maybe this is a reason to have factories for LISTS and HASHTABLES etc :-( come back to this) 266 */ 267 pANTLR3_LIST (*getTokenRange) (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream, ANTLR3_UINT32 start, ANTLR3_UINT32 stop); 268 269 /** Function that returns all the tokens indicated by the specified bitset, within a range of tokens 270 */ 271 pANTLR3_LIST (*getTokensSet) (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream, 272 ANTLR3_UINT32 start, ANTLR3_UINT32 stop, pANTLR3_BITSET types); 273 274 /** Function that returns all the tokens indicated by being a member of the supplied List 275 */ 276 pANTLR3_LIST (*getTokensList) (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream, 277 ANTLR3_UINT32 start, ANTLR3_UINT32 stop, pANTLR3_LIST list); 278 279 /** Function that returns all tokens of a certain type within a range. 280 */ 281 pANTLR3_LIST (*getTokensType) (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream, 282 ANTLR3_UINT32 start, ANTLR3_UINT32 stop, ANTLR3_UINT32 type); 283 284 /** Function that resets the token stream so that it can be reused, but 285 * but that does not free up any resources, such as the token factory 286 * the factory pool and so on. This prevents the need to keep freeing 287 * and reallocating the token pools if the thing you are building is 288 * a multi-shot dameon or somethign like that. It is much faster to 289 * just reuse all the vectors. 290 */ 291 void (*reset) (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream); 292 293 /** Function that knows how to free an ANTLR3_COMMON_TOKEN_STREAM 294 */ 295 void (*free) (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream); 296 } 297 ANTLR3_COMMON_TOKEN_STREAM; 298 299 #ifdef __cplusplus 300 } 301 #endif 302 303 #endif 304