Home | History | Annotate | Download | only in include
      1 /** \file
      2  * Simple string interface allows indiscriminate allocation of strings
      3  * such that they can be allocated all over the place and released in
      4  * one chunk via a string factory - saves lots of hassle in remembering what
      5  * strings were allocated where.
      6  */
      7 #ifndef	_ANTLR3_STRING_H
      8 #define	_ANTLR3_STRING_H
      9 
     10 // [The "BSD licence"]
     11 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
     12 // http://www.temporal-wave.com
     13 // http://www.linkedin.com/in/jimidle
     14 //
     15 // All rights reserved.
     16 //
     17 // Redistribution and use in source and binary forms, with or without
     18 // modification, are permitted provided that the following conditions
     19 // are met:
     20 // 1. Redistributions of source code must retain the above copyright
     21 //    notice, this list of conditions and the following disclaimer.
     22 // 2. Redistributions in binary form must reproduce the above copyright
     23 //    notice, this list of conditions and the following disclaimer in the
     24 //    documentation and/or other materials provided with the distribution.
     25 // 3. The name of the author may not be used to endorse or promote products
     26 //    derived from this software without specific prior written permission.
     27 //
     28 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     29 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     30 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     31 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     32 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     33 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     34 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     35 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     36 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     37 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     38 
     39 #include    <antlr3defs.h>
     40 #include    <antlr3collections.h>
     41 
     42 #ifdef __cplusplus
     43 extern "C" {
     44 #endif
     45 
     46 /** Base string class tracks the allocations and provides simple string
     47  *  tracking functions. Mostly you can work directly on the string for things
     48  *  that don't reallocate it, like strchr() etc. Perhaps someone will want to provide implementations for UTF8
     49  *  and so on.
     50  */
     51 typedef	struct ANTLR3_STRING_struct
     52 {
     53 
     54     /** The factory that created this string
     55      */
     56     pANTLR3_STRING_FACTORY	factory;
     57 
     58     /** Pointer to the current string value (starts at NULL unless
     59      *  the string allocator is told to create it with a pre known size.
     60      */
     61     pANTLR3_UINT8		chars;
     62 
     63     /** Current length of the string up to and not including, the trailing '\0'
     64      *  Note that the actual allocation (->size)
     65      *  is always at least one byte more than this to accommodate trailing '\0'
     66      */
     67     ANTLR3_UINT32		len;
     68 
     69     /** Current size of the string in bytes including the trailing '\0'
     70      */
     71     ANTLR3_UINT32		size;
     72 
     73     /** Index of string (allocation number) in case someone wants
     74      *  to explicitly release it.
     75      */
     76     ANTLR3_UINT32		index;
     77 
     78     /** Occasionally it is useful to know what the encoding of the string
     79      *  actually is, hence it is stored here as one the ANTLR3_ENCODING_ values
     80      */
     81     ANTLR3_UINT8		encoding;
     82 
     83     /** Pointer to function that sets the string value to a specific string in the default encoding
     84      *  for this string. For instance, if this is 8 bit, then this function is the same as set8
     85      *  but if the encoding is UTF16, then the pointer is assumed to point to UTF16 characters, not
     86      *  8 bit.
     87      */
     88     pANTLR3_UINT8   (*set)	(struct ANTLR3_STRING_struct * string, const char * chars);
     89 
     90     /** Pointer to function that sets the string value to a specific 8 bit string in the default encoding
     91      *  for this string. For instance, if this is an 8 bit string, then this function is the same as set8
     92      *  but if the encoding is UTF16, then the pointer is assumed to point to 8 bit characters that must
     93      *  be converted to UTF16 characters on the fly.
     94      */
     95     pANTLR3_UINT8   (*set8)	(struct ANTLR3_STRING_struct * string, const char * chars);
     96 
     97     /** Pointer to function adds a raw char * type pointer in the default encoding
     98      *  for this string. For instance, if this is 8 bit, then this function is the same as append8
     99      *  but if the encoding is UTF16, then the pointer is assumed to point to UTF16 characters not
    100      *  8 bit.
    101      */
    102     pANTLR3_UINT8   (*append)	(struct ANTLR3_STRING_struct * string, const char * newbit);
    103 
    104     /** Pointer to function adds a raw char * type pointer in the default encoding
    105      *  for this string. For instance, if this is a UTF16 string, then this function assumes the pointer
    106      *  points to 8 bit characters that must be converted on the fly.
    107      */
    108     pANTLR3_UINT8   (*append8)	(struct ANTLR3_STRING_struct * string, const char * newbit);
    109 
    110     /** Pointer to function that inserts the supplied string at the specified
    111      *  offset in the current string in the default encoding for this string. For instance, if this is an 8
    112      *  bit string, then this is the same as insert8, but if this is a UTF16 string, then the pointer
    113      *  must point to UTF16 characters.
    114      */
    115     pANTLR3_UINT8   (*insert)	(struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 point, const char * newbit);
    116 
    117     /** Pointer to function that inserts the supplied string at the specified
    118      *  offset in the current string in the default encoding for this string. For instance, if this is a UTF16 string
    119      *  then the pointer is assumed to point at 8 bit characteres that must be converted on the fly.
    120      */
    121     pANTLR3_UINT8   (*insert8)	(struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 point, const char * newbit);
    122 
    123     /** Pointer to function that sets the string value to a copy of the supplied string (strings must be in the
    124      *  same encoding.
    125      */
    126     pANTLR3_UINT8   (*setS)	(struct ANTLR3_STRING_struct * string, struct ANTLR3_STRING_struct * chars);
    127 
    128     /** Pointer to function appends a copy of the characters contained in another string. Strings must be in the
    129      *  same encoding.
    130      */
    131     pANTLR3_UINT8   (*appendS)	(struct ANTLR3_STRING_struct * string, struct ANTLR3_STRING_struct * newbit);
    132 
    133     /** Pointer to function that inserts a copy of the characters in the supplied string at the specified
    134      *  offset in the current string. strings must be in the same encoding.
    135      */
    136     pANTLR3_UINT8   (*insertS)	(struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 point, struct ANTLR3_STRING_struct * newbit);
    137 
    138     /** Pointer to function that inserts the supplied integer in string form at the specified
    139      *  offset in the current string.
    140      */
    141     pANTLR3_UINT8   (*inserti)	(struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
    142 
    143     /** Pointer to function that adds a single character to the end of the string, in the encoding of the
    144      *  string - 8 bit, UTF16, utf-8 etc. Input is a single UTF32 (32 bits wide integer) character.
    145      */
    146     pANTLR3_UINT8   (*addc)	(struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 c);
    147 
    148     /** Pointer to function that adds the stringified representation of an integer
    149      *  to the string.
    150      */
    151     pANTLR3_UINT8   (*addi)	(struct ANTLR3_STRING_struct * string, ANTLR3_INT32 i);
    152 
    153     /** Pointer to function that compares the text of a string to the supplied
    154      *  8 bit character string and returns a result a la strcmp()
    155      */
    156     ANTLR3_UINT32   (*compare8)	(struct ANTLR3_STRING_struct * string, const char * compStr);
    157 
    158     /** Pointer to a function that compares the text of a string with the supplied character string
    159      *  (which is assumed to be in the same encoding as the string itself) and returns a result
    160      *  a la strcmp()
    161      */
    162     ANTLR3_UINT32   (*compare)	(struct ANTLR3_STRING_struct * string, const char * compStr);
    163 
    164     /** Pointer to a function that compares the text of a string with the supplied string
    165      *  (which is assumed to be in the same encoding as the string itself) and returns a result
    166      *  a la strcmp()
    167      */
    168     ANTLR3_UINT32   (*compareS)	(struct ANTLR3_STRING_struct * string, struct ANTLR3_STRING_struct * compStr);
    169 
    170     /** Pointer to a function that returns the character indexed at the supplied
    171      *  offset as a 32 bit character.
    172      */
    173     ANTLR3_UCHAR    (*charAt)	(struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 offset);
    174 
    175     /** Pointer to a function that returns a substring of the supplied string a la .subString(s,e)
    176      *  in the Java language.
    177      */
    178     struct ANTLR3_STRING_struct *
    179 					(*subString)    (struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
    180 
    181     /** Pointer to a function that returns the integer representation of any numeric characters
    182      *  at the beginning of the string
    183      */
    184     ANTLR3_INT32	(*toInt32)	    (struct ANTLR3_STRING_struct * string);
    185 
    186     /** Pointer to a function that yields an 8 bit string regardless of the encoding of the supplied
    187      *  string. This is useful when you want to use the text of a token in some way that requires an 8 bit
    188      *  value, such as the key for a hashtable. The function is required to produce a usable string even
    189      *  if the text given as input has characters that do not fit in 8 bit space, it will replace them
    190      *  with some arbitrary character such as '?'
    191      */
    192     struct ANTLR3_STRING_struct *
    193 					(*to8)	    (struct ANTLR3_STRING_struct * string);
    194 
    195 	/// Pointer to a function that yields a UT8 encoded string of the current string,
    196 	/// regardless of the current encoding of the string. Because there is currently no UTF8
    197 	/// handling in the string class, it creates therefore, a string that is useful only for read only
    198 	/// applications as it will not contain methods that deal with UTF8 at the moment.
    199 	///
    200 	struct ANTLR3_STRING_struct *
    201 					(*toUTF8)	(struct ANTLR3_STRING_struct * string);
    202 
    203 }
    204     ANTLR3_STRING;
    205 
    206 /** Definition of the string factory interface, which creates and tracks
    207  *  strings for you of various shapes and sizes.
    208  */
    209 typedef struct	ANTLR3_STRING_FACTORY_struct
    210 {
    211     /** List of all the strings that have been allocated by the factory
    212      */
    213     pANTLR3_VECTOR    strings;
    214 
    215     /* Index of next string that we allocate
    216      */
    217     ANTLR3_UINT32   index;
    218 
    219     /** Pointer to function that manufactures an empty string
    220      */
    221     pANTLR3_STRING  (*newRaw)	(struct ANTLR3_STRING_FACTORY_struct * factory);
    222 
    223     /** Pointer to function that manufactures a raw string with no text in it but space for size
    224      *  characters.
    225      */
    226     pANTLR3_STRING  (*newSize)	(struct ANTLR3_STRING_FACTORY_struct * factory, ANTLR3_UINT32 size);
    227 
    228     /** Pointer to function that manufactures a string from a given pointer and length. The pointer is assumed
    229      *  to point to characters in the same encoding as the string type, hence if this is a UTF16 string the
    230      *  pointer should point to UTF16 characters.
    231      */
    232     pANTLR3_STRING  (*newPtr)	(struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
    233 
    234     /** Pointer to function that manufactures a string from a given pointer and length. The pointer is assumed to
    235      *  point at 8 bit characters which must be converted on the fly to the encoding of the actual string.
    236      */
    237     pANTLR3_STRING  (*newPtr8)	(struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
    238 
    239     /** Pointer to function that manufactures a string from a given pointer and works out the length. The pointer is
    240      *  assumed to point to characters in the same encoding as the string itself, i.e. UTF16 if a UTF16
    241      *  string and so on.
    242      */
    243     pANTLR3_STRING  (*newStr)	(struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_UINT8 string);
    244 
    245     /** Pointer to function that manufactures a string from a given pointer and length. The pointer should
    246      *  point to 8 bit characters regardless of the actual encoding of the string. The 8 bit characters
    247      *  will be converted to the actual string encoding on the fly.
    248      */
    249     pANTLR3_STRING  (*newStr8)	(struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_UINT8 string);
    250 
    251     /** Pointer to function that deletes the string altogether
    252      */
    253     void	    (*destroy)	(struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_STRING string);
    254 
    255     /** Pointer to function that returns a copy of the string in printable form without any control
    256      *  characters in it.
    257      */
    258     pANTLR3_STRING  (*printable)(struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_STRING string);
    259 
    260     /** Pointer to function that closes the factory
    261      */
    262     void	    (*close)	(struct ANTLR3_STRING_FACTORY_struct * factory);
    263 
    264 }
    265     ANTLR3_STRING_FACTORY;
    266 
    267 #ifdef __cplusplus
    268 }
    269 #endif
    270 
    271 #endif
    272 
    273