Home | History | Annotate | Download | only in include
      1 /** \file
      2  * Defines the interface for an ANTLR3 common token stream. Custom token streams should create
      3  * one of these and then override any functions by installing their own pointers
      4  * to implement the various functions.
      5  */
      6 #ifndef	_ANTLR3_TOKENSTREAM_H
      7 #define	_ANTLR3_TOKENSTREAM_H
      8 
      9 // [The "BSD licence"]
     10 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
     11 // http://www.temporal-wave.com
     12 // http://www.linkedin.com/in/jimidle
     13 //
     14 // All rights reserved.
     15 //
     16 // Redistribution and use in source and binary forms, with or without
     17 // modification, are permitted provided that the following conditions
     18 // are met:
     19 // 1. Redistributions of source code must retain the above copyright
     20 //    notice, this list of conditions and the following disclaimer.
     21 // 2. Redistributions in binary form must reproduce the above copyright
     22 //    notice, this list of conditions and the following disclaimer in the
     23 //    documentation and/or other materials provided with the distribution.
     24 // 3. The name of the author may not be used to endorse or promote products
     25 //    derived from this software without specific prior written permission.
     26 //
     27 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     28 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     29 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     30 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     31 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     32 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     33 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     34 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     35 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     36 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     37 
     38 #include    <antlr3defs.h>
     39 #include    <antlr3string.h>
     40 #include    <antlr3collections.h>
     41 #include    <antlr3input.h>
     42 #include    <antlr3commontoken.h>
     43 #include    <antlr3bitset.h>
     44 #include	<antlr3debugeventlistener.h>
     45 
     46 #ifdef __cplusplus
     47 extern "C" {
     48 #endif
     49 
     50 /** Definition of a token source, which has a pointer to a function that
     51  *  returns the next token (using a token factory if it is going to be
     52  *  efficient) and a pointer to an ANTLR3_INPUT_STREAM. This is slightly
     53  *  different to the Java interface because we have no way to implement
     54  *  multiple interfaces without defining them in the interface structure
     55  *  or casting (void *), which is too convoluted.
     56  */
     57 typedef struct ANTLR3_TOKEN_SOURCE_struct
     58 {
     59     /** Pointer to a function that returns the next token in the stream.
     60      */
     61     pANTLR3_COMMON_TOKEN    (*nextToken)(struct ANTLR3_TOKEN_SOURCE_struct * tokenSource);
     62 
     63     /** Whoever is providing tokens, needs to provide a string factory too
     64      */
     65     pANTLR3_STRING_FACTORY  strFactory;
     66 
     67     /** A special pre-allocated token, which signifies End Of Tokens. Because this must
     68      *  be set up with the current input index and so on, we embed the structure and
     69      *  return the address of it. It is marked as factoryMade, so that it is never
     70      *  attempted to be freed.
     71      */
     72     ANTLR3_COMMON_TOKEN	    eofToken;
     73 
     74 	/// A special pre-allocated token, which is returned by mTokens() if the
     75 	/// lexer rule said to just skip the generated token altogether.
     76 	/// Having this single token stops us wasting memory by have the token factory
     77 	/// actually create something that we are going to SKIP(); anyway.
     78 	///
     79 	ANTLR3_COMMON_TOKEN		skipToken;
     80 
     81     /** Whatever is supplying the token source interface, needs a pointer to
     82      *  itself so that this pointer can be passed to it when the nextToken
     83      *  function is called.
     84      */
     85     void		    * super;
     86 
     87     /** When the token source is constructed, it is populated with the file
     88      *  name from whence the tokens were produced by the lexer. This pointer is a
     89      *  copy of the one supplied by the CharStream (and may be NULL) so should
     90      *  not be manipulated other than to copy or print it.
     91      */
     92     pANTLR3_STRING	    fileName;
     93 }
     94     ANTLR3_TOKEN_SOURCE;
     95 
     96 /** Definition of the ANTLR3 common token stream interface.
     97  * \remark
     98  * Much of the documentation for this interface is stolen from Ter's Java implementation.
     99  */
    100 typedef	struct ANTLR3_TOKEN_STREAM_struct
    101 {
    102     /** Pointer to the token source for this stream
    103      */
    104     pANTLR3_TOKEN_SOURCE    tokenSource;
    105 
    106     /** Whatever is providing this interface needs a pointer to itself
    107      *  so that this can be passed back to it whenever the api functions
    108      *  are called.
    109      */
    110     void	      * super;
    111 
    112     /** All input streams implement the ANTLR3_INT_STREAM interface...
    113      */
    114     pANTLR3_INT_STREAM	    istream;
    115 
    116 	/// Debugger interface, is this is a debugging token stream
    117 	///
    118 	pANTLR3_DEBUG_EVENT_LISTENER		debugger;
    119 
    120 	/// Indicates the initial stream state for dbgConsume()
    121 	///
    122 	ANTLR3_BOOLEAN			initialStreamState;
    123 
    124     /** Get Token at current input pointer + i ahead where i=1 is next Token.
    125      *  i<0 indicates tokens in the past.  So -1 is previous token and -2 is
    126      *  two tokens ago. LT(0) is undefined.  For i>=n, return Token.EOFToken.
    127      *  Return null for LT(0) and any index that results in an absolute address
    128      *  that is negative.
    129      */
    130     pANTLR3_COMMON_TOKEN    (*_LT)		(struct ANTLR3_TOKEN_STREAM_struct * tokenStream, ANTLR3_INT32 k);
    131 
    132     /** Get a token at an absolute index i; 0..n-1.  This is really only
    133      *  needed for profiling and debugging and token stream rewriting.
    134      *  If you don't want to buffer up tokens, then this method makes no
    135      *  sense for you.  Naturally you can't use the rewrite stream feature.
    136      *  I believe DebugTokenStream can easily be altered to not use
    137      *  this method, removing the dependency.
    138      */
    139     pANTLR3_COMMON_TOKEN    (*get)		(struct ANTLR3_TOKEN_STREAM_struct * tokenStream, ANTLR3_UINT32 i);
    140 
    141     /** Where is this stream pulling tokens from?  This is not the name, but
    142      *  a pointer into an interface that contains a ANTLR3_TOKEN_SOURCE interface.
    143      *  The Token Source interface contains a pointer to the input stream and a pointer
    144      *  to a function that returns the next token.
    145      */
    146     pANTLR3_TOKEN_SOURCE    (*getTokenSource)	(struct ANTLR3_TOKEN_STREAM_struct * tokenStream);
    147 
    148     /** Function that installs a token source for teh stream
    149      */
    150     void		    (*setTokenSource)	(struct ANTLR3_TOKEN_STREAM_struct * tokenStream,
    151 						 pANTLR3_TOKEN_SOURCE		   tokenSource);
    152 
    153     /** Return the text of all the tokens in the stream, as the old tramp in
    154      *  Leeds market used to say; "Get the lot!"
    155      */
    156     pANTLR3_STRING	    (*toString)		(struct ANTLR3_TOKEN_STREAM_struct * tokenStream);
    157 
    158     /** Return the text of all tokens from start to stop, inclusive.
    159      *  If the stream does not buffer all the tokens then it can just
    160      *  return an empty ANTLR3_STRING or NULL;  Grammars should not access $ruleLabel.text in
    161      *  an action in that case.
    162      */
    163     pANTLR3_STRING	    (*toStringSS)	(struct ANTLR3_TOKEN_STREAM_struct * tokenStream, ANTLR3_UINT32 start, ANTLR3_UINT32 stop);
    164 
    165     /** Because the user is not required to use a token with an index stored
    166      *  in it, we must provide a means for two token objects themselves to
    167      *  indicate the start/end location.  Most often this will just delegate
    168      *  to the other toString(int,int).  This is also parallel with
    169      *  the pTREENODE_STREAM->toString(Object,Object).
    170      */
    171     pANTLR3_STRING	    (*toStringTT)	(struct ANTLR3_TOKEN_STREAM_struct * tokenStream, pANTLR3_COMMON_TOKEN start, pANTLR3_COMMON_TOKEN stop);
    172 
    173 
    174     /** Function that sets the token stream into debugging mode
    175      */
    176     void		    (*setDebugListener)	    (struct ANTLR3_TOKEN_STREAM_struct * tokenStream, pANTLR3_DEBUG_EVENT_LISTENER debugger);
    177 
    178 
    179 
    180     /** Function that knows how to free the memory for an ANTLR3_TOKEN_STREAM
    181      */
    182     void		    (*free)		(struct ANTLR3_TOKEN_STREAM_struct * tokenStream);
    183 }
    184     ANTLR3_TOKEN_STREAM;
    185 
    186 /** Common token stream is an implementation of ANTLR_TOKEN_STREAM for the default
    187  *  parsers and recognizers. You may of course build your own implementation if
    188  *  you are so inclined.
    189  */
    190 typedef	struct	ANTLR3_COMMON_TOKEN_STREAM_struct
    191 {
    192     /** The ANTLR3_TOKEN_STREAM interface implementation, which also includes
    193      *  the intstream implementation. We could duplicate the pANTLR_INT_STREAM
    194      *  in this interface and initialize it to a copy, but this could be confusing
    195      *  it just results in one more level of indirection and I think that with
    196      *  judicial use of 'const' later, the optimizer will do decent job.
    197      */
    198     pANTLR3_TOKEN_STREAM    tstream;
    199 
    200     /** Whatever is supplying the COMMON_TOKEN_STREAM needs a pointer to itself
    201      *  so that this can be accessed by any of the API functions which it implements.
    202      */
    203     void		    * super;
    204 
    205     /** Records every single token pulled from the source indexed by the token index.
    206      *  There might be more efficient ways to do this, such as referencing directly in to
    207      *  the token factory pools, but for now this is convenient and the ANTLR3_LIST is not
    208      *  a huge overhead as it only stores pointers anyway, but allows for iterations and
    209      *  so on.
    210      */
    211     pANTLR3_VECTOR	    tokens;
    212 
    213     /** Override map of tokens. If a token type has an entry in here, then
    214      *  the pointer in the table points to an int, being the override channel number
    215      *  that should always be used for this token type.
    216      */
    217     pANTLR3_LIST	    channelOverrides;
    218 
    219     /** Discared set. If a token has an entry in this table, then it is thrown
    220      *  away (data pointer is always NULL).
    221      */
    222     pANTLR3_LIST	    discardSet;
    223 
    224     /* The channel number that this token stream is tuned to. For instance, whitespace
    225      * is usually tuned to channel 99, which no token stream would normally tune to and
    226      * so it is thrown away.
    227      */
    228     ANTLR3_UINT32	    channel;
    229 
    230     /** If this flag is set to ANTLR3_TRUE, then tokens that the stream sees that are not
    231      *  in the channel that this stream is tuned to, are not tracked in the
    232      *  tokens table. When set to false, ALL tokens are added to the tracking.
    233      */
    234     ANTLR3_BOOLEAN	    discardOffChannel;
    235 
    236     /** The index into the tokens list of the current token (the next one that will be
    237      *  consumed. p = -1 indicates that the token list is empty.
    238      */
    239     ANTLR3_INT32	    p;
    240 
    241     /** A simple filter mechanism whereby you can tell this token stream
    242      *  to force all tokens of type ttype to be on channel.  For example,
    243      *  when interpreting, we cannot exec actions so we need to tell
    244      *  the stream to force all WS and NEWLINE to be a different, ignored
    245      *  channel.
    246      */
    247     void		    (*setTokenTypeChannel)  (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream,
    248 							ANTLR3_UINT32 ttype, ANTLR3_UINT32 channel);
    249 
    250     /** Add a particular token type to the discard set. If a token is found to belong
    251      *  to this set, then it is skipped/thrown away
    252      */
    253     void		    (*discardTokenType)	    (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream, ANTLR3_INT32 ttype);
    254 
    255     /** Signal to discard off channel tokens from here on in.
    256      */
    257     void		    (*discardOffChannelToks)(struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream, ANTLR3_BOOLEAN discard);
    258 
    259     /** Function that returns a pointer to the ANTLR3_LIST of all tokens
    260      *  in the stream (this causes the buffer to fill if we have not get any yet)
    261      */
    262     pANTLR3_VECTOR	    (*getTokens)	    (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream);
    263 
    264     /** Function that returns all the tokens between a start and a stop index.
    265      *  TODO: This is a new list (Ack! Maybe this is a reason to have factories for LISTS and HASHTABLES etc :-( come back to this)
    266      */
    267     pANTLR3_LIST	    (*getTokenRange)	    (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream, ANTLR3_UINT32 start, ANTLR3_UINT32 stop);
    268 
    269     /** Function that returns all the tokens indicated by the specified bitset, within a range of tokens
    270      */
    271     pANTLR3_LIST	    (*getTokensSet)	    (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream,
    272 							ANTLR3_UINT32 start, ANTLR3_UINT32 stop, pANTLR3_BITSET types);
    273 
    274     /** Function that returns all the tokens indicated by being a member of the supplied List
    275      */
    276     pANTLR3_LIST	    (*getTokensList)	    (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream,
    277 							ANTLR3_UINT32 start, ANTLR3_UINT32 stop, pANTLR3_LIST list);
    278 
    279     /** Function that returns all tokens of a certain type within a range.
    280      */
    281     pANTLR3_LIST	    (*getTokensType)	    (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream,
    282 							ANTLR3_UINT32 start, ANTLR3_UINT32 stop, ANTLR3_UINT32 type);
    283 
    284     /** Function that resets the token stream so that it can be reused, but
    285      *  but that does not free up any resources, such as the token factory
    286      *  the factory pool and so on. This prevents the need to keep freeing
    287      *  and reallocating the token pools if the thing you are building is
    288      *  a multi-shot dameon or somethign like that. It is much faster to
    289      *  just reuse all the vectors.
    290      */
    291     void                    (*reset)            (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream);
    292 
    293     /** Function that knows how to free an ANTLR3_COMMON_TOKEN_STREAM
    294      */
    295     void		    (*free)		    (struct ANTLR3_COMMON_TOKEN_STREAM_struct * tokenStream);
    296 }
    297     ANTLR3_COMMON_TOKEN_STREAM;
    298 
    299 #ifdef __cplusplus
    300 }
    301 #endif
    302 
    303 #endif
    304