Home | History | Annotate | Download | only in include
      1 /** \file
      2  * \brief Defines the interface for a common token.
      3  *
      4  * All token streams should provide their tokens using an instance
      5  * of this common token. A custom pointer is provided, wher you may attach
      6  * a further structure to enhance the common token if you feel the need
      7  * to do so. The C runtime will assume that a token provides implementations
      8  * of the interface functions, but all of them may be rplaced by your own
      9  * implementation if you require it.
     10  */
     11 #ifndef	_ANTLR3_COMMON_TOKEN_H
     12 #define	_ANTLR3_COMMON_TOKEN_H
     13 
     14 // [The "BSD licence"]
     15 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
     16 // http://www.temporal-wave.com
     17 // http://www.linkedin.com/in/jimidle
     18 //
     19 // All rights reserved.
     20 //
     21 // Redistribution and use in source and binary forms, with or without
     22 // modification, are permitted provided that the following conditions
     23 // are met:
     24 // 1. Redistributions of source code must retain the above copyright
     25 //    notice, this list of conditions and the following disclaimer.
     26 // 2. Redistributions in binary form must reproduce the above copyright
     27 //    notice, this list of conditions and the following disclaimer in the
     28 //    documentation and/or other materials provided with the distribution.
     29 // 3. The name of the author may not be used to endorse or promote products
     30 //    derived from this software without specific prior written permission.
     31 //
     32 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     33 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     34 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     35 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     36 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     37 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     38 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     39 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     40 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     41 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     42 
     43 #include    <antlr3defs.h>
     44 
     45 /** How many tokens to allocate at once in the token factory
     46  */
     47 #define	ANTLR3_FACTORY_POOL_SIZE    1024
     48 
     49 /* Base token types, which all lexer/parser tokens come after in sequence.
     50  */
     51 
     52 /** Indicator of an invalid token
     53  */
     54 #define	ANTLR3_TOKEN_INVALID	0
     55 
     56 #define	ANTLR3_EOR_TOKEN_TYPE	1
     57 
     58 /** Imaginary token type to cause a traversal of child nodes in a tree parser
     59  */
     60 #define	ANTLR3_TOKEN_DOWN		2
     61 
     62 /** Imaginary token type to signal the end of a stream of child nodes.
     63  */
     64 #define	ANTLR3_TOKEN_UP		3
     65 
     66 /** First token that can be used by users/generated code
     67  */
     68 
     69 #define	ANTLR3_MIN_TOKEN_TYPE	ANTLR3_TOKEN_UP + 1
     70 
     71 /** End of file token
     72  */
     73 #define	ANTLR3_TOKEN_EOF	(ANTLR3_CHARSTREAM_EOF & 0xFFFFFFFF)
     74 
     75 /** Default channel for a token
     76  */
     77 #define	ANTLR3_TOKEN_DEFAULT_CHANNEL	0
     78 
     79 /** Reserved channel number for a HIDDEN token - a token that
     80  *  is hidden from the parser.
     81  */
     82 #define	HIDDEN				99
     83 
     84 #ifdef __cplusplus
     85 extern "C" {
     86 #endif
     87 
     88 // Indicates whether this token is carrying:
     89 //
     90 // State | Meaning
     91 // ------+--------------------------------------
     92 //     0 | Nothing (neither rewrite text, nor setText)
     93 //     1 | char * to user supplied rewrite text
     94 //     2 | pANTLR3_STRING because of setText or similar action
     95 //
     96 #define	ANTLR3_TEXT_NONE	0
     97 #define	ANTLR3_TEXT_CHARP	1
     98 #define	ANTLR3_TEXT_STRING	2
     99 
    100 /** The definition of an ANTLR3 common token structure, which all implementations
    101  * of a token stream should provide, installing any further structures in the
    102  * custom pointer element of this structure.
    103  *
    104  * \remark
    105  * Token streams are in essence provided by lexers or other programs that serve
    106  * as lexers.
    107  */
    108 typedef	struct ANTLR3_COMMON_TOKEN_struct
    109 {
    110     /** The actual type of this token
    111      */
    112     ANTLR3_UINT32   type;
    113 
    114     /** Indicates that a token was produced from the token factory and therefore
    115      *  the the freeToken() method should not do anything itself because
    116      *  token factory is responsible for deleting it.
    117      */
    118     ANTLR3_BOOLEAN  factoryMade;
    119 
    120 	/// A string factory that we can use if we ever need the text of a token
    121 	/// and need to manufacture a pANTLR3_STRING
    122 	///
    123 	pANTLR3_STRING_FACTORY	strFactory;
    124 
    125     /** The line number in the input stream where this token was derived from
    126      */
    127     ANTLR3_UINT32   line;
    128 
    129     /** The offset into the input stream that the line in which this
    130      *  token resides starts.
    131      */
    132     void	    * lineStart;
    133 
    134     /** The character position in the line that this token was derived from
    135      */
    136     ANTLR3_INT32    charPosition;
    137 
    138     /** The virtual channel that this token exists in.
    139      */
    140     ANTLR3_UINT32   channel;
    141 
    142     /** Pointer to the input stream that this token originated in.
    143      */
    144     pANTLR3_INPUT_STREAM    input;
    145 
    146     /** What the index of this token is, 0, 1, .., n-2, n-1 tokens
    147      */
    148     ANTLR3_MARKER   index;
    149 
    150     /** The character offset in the input stream where the text for this token
    151      *  starts.
    152      */
    153     ANTLR3_MARKER   start;
    154 
    155     /** The character offset in the input stream where the text for this token
    156      *  stops.
    157      */
    158     ANTLR3_MARKER   stop;
    159 
    160 	/// Indicates whether this token is carrying:
    161 	///
    162 	/// State | Meaning
    163 	/// ------+--------------------------------------
    164 	///     0 | Nothing (neither rewrite text, nor setText)
    165 	///     1 | char * to user supplied rewrite text
    166 	///     2 | pANTLR3_STRING because of setText or similar action
    167 	///
    168 	/// Affects the union structure tokText below
    169 	/// (uses 32 bit so alignment is always good)
    170 	///
    171 	ANTLR3_UINT32	textState;
    172 
    173 	union
    174 	{
    175 		/// Pointer that is used when the token just has a pointer to
    176 		/// a char *, such as when a rewrite of an imaginary token supplies
    177 		/// a string in the grammar. No sense in constructing a pANTLR3_STRING just
    178 		/// for that, as mostly the text will not be accessed - if it is, then
    179 		/// we will build a pANTLR3_STRING for it a that point.
    180 		///
    181 		pANTLR3_UCHAR	chars;
    182 
    183 		/// Some token types actually do carry around their associated text, hence
    184 		/// (*getText)() will return this pointer if it is not NULL
    185 		///
    186 		pANTLR3_STRING	text;
    187 	}
    188 		tokText;
    189 
    190     /**  Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN
    191      *   as the standard structure for a token, a number of user programmable
    192      *	 elements are allowed in a token. This is one of them.
    193      */
    194     ANTLR3_UINT32   user1;
    195 
    196     /**  Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN
    197      *   as the standard structure for a token, a number of user programmable
    198      *	 elements are allowed in a token. This is one of them.
    199      */
    200     ANTLR3_UINT32   user2;
    201 
    202     /**  Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN
    203      *   as the standard structure for a token, a number of user programmable
    204      *	 elements are allowed in a token. This is one of them.
    205      */
    206     ANTLR3_UINT32   user3;
    207 
    208     /** Pointer to a custom element that the ANTLR3 programmer may define and install
    209      */
    210     void    * custom;
    211 
    212     /** Pointer to a function that knows how to free the custom structure when the
    213      *  token is destroyed.
    214      */
    215     void    (*freeCustom)(void * custom);
    216 
    217     /* ==============================
    218      * API
    219      */
    220 
    221     /** Pointer to function that returns the text pointer of a token, use
    222      *  toString() if you want a pANTLR3_STRING version of the token.
    223      */
    224     pANTLR3_STRING  (*getText)(struct ANTLR3_COMMON_TOKEN_struct * token);
    225 
    226     /** Pointer to a function that 'might' be able to set the text associated
    227      *  with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually
    228      *  do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actaully have
    229      *  strings associated with them but just point into the current input stream. These
    230      *  tokens will implement this function with a function that errors out (probably
    231      *  drastically.
    232      */
    233     void	    (*setText)(struct ANTLR3_COMMON_TOKEN_struct * token, pANTLR3_STRING text);
    234 
    235     /** Pointer to a function that 'might' be able to set the text associated
    236      *  with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually
    237      *  do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actully have
    238      *  strings associated with them but just point into the current input stream. These
    239      *  tokens will implement this function with a function that errors out (probably
    240      *  drastically.
    241      */
    242     void	    (*setText8)(struct ANTLR3_COMMON_TOKEN_struct * token, pANTLR3_UINT8 text);
    243 
    244     /** Pointer to a function that returns the token type of this token
    245      */
    246     ANTLR3_UINT32   (*getType)(struct ANTLR3_COMMON_TOKEN_struct * token);
    247 
    248     /** Pointer to a function that sets the type of this token
    249      */
    250     void	    (*setType)(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 ttype);
    251 
    252     /** Pointer to a function that gets the 'line' number where this token resides
    253      */
    254     ANTLR3_UINT32   (*getLine)(struct ANTLR3_COMMON_TOKEN_struct * token);
    255 
    256     /** Pointer to a function that sets the 'line' number where this token reside
    257      */
    258     void	    (*setLine)(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 line);
    259 
    260     /** Pointer to a function that gets the offset in the line where this token exists
    261      */
    262     ANTLR3_INT32    (*getCharPositionInLine)	(struct ANTLR3_COMMON_TOKEN_struct * token);
    263 
    264     /** Pointer to a function that sets the offset in the line where this token exists
    265      */
    266     void	    (*setCharPositionInLine)	(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_INT32 pos);
    267 
    268     /** Pointer to a function that gets the channel that this token was placed in (parsers
    269      *  can 'tune' to these channels.
    270      */
    271     ANTLR3_UINT32   (*getChannel)	(struct ANTLR3_COMMON_TOKEN_struct * token);
    272 
    273     /** Pointer to a function that sets the channel that this token should belong to
    274      */
    275     void	    (*setChannel)	(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 channel);
    276 
    277     /** Pointer to a function that returns an index 0...n-1 of the token in the token
    278      *  input stream.
    279      */
    280     ANTLR3_MARKER   (*getTokenIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token);
    281 
    282     /** Pointer to a function that can set the token index of this token in the token
    283      *  input stream.
    284      */
    285     void			(*setTokenIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER);
    286 
    287     /** Pointer to a function that gets the start index in the input stream for this token.
    288      */
    289     ANTLR3_MARKER   (*getStartIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token);
    290 
    291     /** Pointer to a function that sets the start index in the input stream for this token.
    292      */
    293     void			(*setStartIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER index);
    294 
    295     /** Pointer to a function that gets the stop index in the input stream for this token.
    296      */
    297     ANTLR3_MARKER   (*getStopIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token);
    298 
    299     /** Pointer to a function that sets the stop index in the input stream for this token.
    300      */
    301     void			(*setStopIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER index);
    302 
    303     /** Pointer to a function that returns this token as a text representation that can be
    304      *  printed with embedded control codes such as \n replaced with the printable sequence "\\n"
    305      *  This also yields a string structure that can be used more easily than the pointer to
    306      *  the input stream in certain situations.
    307      */
    308     pANTLR3_STRING  (*toString)		(struct ANTLR3_COMMON_TOKEN_struct * token);
    309 }
    310     ANTLR3_COMMON_TOKEN;
    311 
    312 /** \brief ANTLR3 Token factory interface to create lots of tokens efficiently
    313  *  rather than creating and freeing lots of little bits of memory.
    314  */
    315 typedef	struct ANTLR3_TOKEN_FACTORY_struct
    316 {
    317     /** Pointers to the array of tokens that this factory has produced so far
    318      */
    319     pANTLR3_COMMON_TOKEN    *pools;
    320 
    321     /** Current pool tokens we are allocating from
    322      */
    323     ANTLR3_INT32	    thisPool;
    324 
    325     /** Maximum pool count we have available
    326      */
    327     ANTLR3_INT32            maxPool;
    328 
    329     /** The next token to throw out from the pool, will cause a new pool allocation
    330      *  if this exceeds the available tokenCount
    331      */
    332     ANTLR3_UINT32	    nextToken;
    333 
    334     /** Trick to initialize tokens and their API quickly, we set up this token when the
    335      *  factory is created, then just copy the memory it uses into the new token.
    336      */
    337     ANTLR3_COMMON_TOKEN	    unTruc;
    338 
    339     /** Pointer to an input stream that is using this token factory (may be NULL)
    340      *  which will be assigned to the tokens automatically.
    341      */
    342     pANTLR3_INPUT_STREAM    input;
    343 
    344     /** Pointer to a function that returns a new token
    345      */
    346     pANTLR3_COMMON_TOKEN    (*newToken)	    (struct ANTLR3_TOKEN_FACTORY_struct * factory);
    347 
    348     /** Pointer to a function that resets the factory so you can reuse the pools it
    349      *  has laready allocated
    350      */
    351     void                    (*reset)	    (struct ANTLR3_TOKEN_FACTORY_struct * factory);
    352 
    353     /** Pointer to a function that changes teh curent inptu stream so that
    354      *  new tokens are created with reference to their originating text.
    355      */
    356     void		    (*setInputStream)	(struct ANTLR3_TOKEN_FACTORY_struct * factory, pANTLR3_INPUT_STREAM input);
    357     /** Pointer to a function the destroys the factory
    358      */
    359     void		    (*close)	    (struct ANTLR3_TOKEN_FACTORY_struct * factory);
    360 }
    361     ANTLR3_TOKEN_FACTORY;
    362 
    363 #ifdef __cplusplus
    364 }
    365 #endif
    366 
    367 #endif
    368