1 #ifndef ANTLR3COLLECTIONS_H 2 #define ANTLR3COLLECTIONS_H 3 4 // [The "BSD licence"] 5 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC 6 // http://www.temporal-wave.com 7 // http://www.linkedin.com/in/jimidle 8 // 9 // All rights reserved. 10 // 11 // Redistribution and use in source and binary forms, with or without 12 // modification, are permitted provided that the following conditions 13 // are met: 14 // 1. Redistributions of source code must retain the above copyright 15 // notice, this list of conditions and the following disclaimer. 16 // 2. Redistributions in binary form must reproduce the above copyright 17 // notice, this list of conditions and the following disclaimer in the 18 // documentation and/or other materials provided with the distribution. 19 // 3. The name of the author may not be used to endorse or promote products 20 // derived from this software without specific prior written permission. 21 // 22 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 33 #include <antlr3defs.h> 34 #include <antlr3bitset.h> 35 36 #define ANTLR3_HASH_TYPE_INT 0 /**< Indicates the hashed file has integer keys */ 37 #define ANTLR3_HASH_TYPE_STR 1 /**< Indicates the hashed file has numeric keys */ 38 39 #ifdef __cplusplus 40 extern "C" { 41 #endif 42 43 typedef struct ANTLR3_HASH_KEY_struct 44 { 45 ANTLR3_UINT8 type; /**< One of ##ANTLR3_HASH_TYPE_INT or ##ANTLR3_HASH_TYPE_STR */ 46 47 union 48 { 49 pANTLR3_UINT8 sKey; /**< Used if type is ANTLR3_HASH_TYPE_STR */ 50 ANTLR3_INTKEY iKey; /**< used if type is ANTLR3_HASH_TYPE_INT */ 51 } 52 key; 53 54 } ANTLR3_HASH_KEY, *pANTLR3_HASH_KEY; 55 56 /** Internal structure representing an element in a hash bucket. 57 * Stores the original key so that duplicate keys can be rejected 58 * if necessary, and contains function can be supported. If the hash key 59 * could be unique I would have invented the perfect compression algorithm ;-) 60 */ 61 typedef struct ANTLR3_HASH_ENTRY_struct 62 { 63 /** Key that created this particular entry 64 */ 65 ANTLR3_HASH_KEY keybase; 66 67 /** Pointer to the data for this particular entry 68 */ 69 void * data; 70 71 /** Pointer to routine that knows how to release the memory 72 * structure pointed at by data. If this is NULL then we assume 73 * that the data pointer does not need to be freed when the entry 74 * is deleted from the table. 75 */ 76 void (ANTLR3_CDECL *free)(void * data); 77 78 /** Pointer to the next entry in this bucket if there 79 * is one. Sometimes different keys will hash to the same bucket (especially 80 * if the number of buckets is small). We could implement dual hashing algorithms 81 * to minimize this, but that seems over the top for what this is needed for. 82 */ 83 struct ANTLR3_HASH_ENTRY_struct * nextEntry; 84 } 85 ANTLR3_HASH_ENTRY; 86 87 /** Internal structure of a hash table bucket, which tracks 88 * all keys that hash to the same bucket. 89 */ 90 typedef struct ANTLR3_HASH_BUCKET_struct 91 { 92 /** Pointer to the first entry in the bucket (if any, it 93 * may be NULL). Duplicate entries are chained from 94 * here. 95 */ 96 pANTLR3_HASH_ENTRY entries; 97 98 } 99 ANTLR3_HASH_BUCKET; 100 101 /** Structure that tracks a hash table 102 */ 103 typedef struct ANTLR3_HASH_TABLE_struct 104 { 105 /** Indicates whether the table allows duplicate keys 106 */ 107 int allowDups; 108 109 /** Number of buckets available in this table 110 */ 111 ANTLR3_UINT32 modulo; 112 113 /** Points to the memory where the array of buckets 114 * starts. 115 */ 116 pANTLR3_HASH_BUCKET buckets; 117 118 /** How many elements currently exist in the table. 119 */ 120 ANTLR3_UINT32 count; 121 122 /** Whether the hash table should strdup the keys it is given or not. 123 */ 124 ANTLR3_BOOLEAN doStrdup; 125 126 /** Pointer to function to completely delete this table 127 */ 128 void (*free) (struct ANTLR3_HASH_TABLE_struct * table); 129 130 /* String keyed hashtable functions */ 131 void (*del) (struct ANTLR3_HASH_TABLE_struct * table, void * key); 132 pANTLR3_HASH_ENTRY (*remove) (struct ANTLR3_HASH_TABLE_struct * table, void * key); 133 void * (*get) (struct ANTLR3_HASH_TABLE_struct * table, void * key); 134 ANTLR3_INT32 (*put) (struct ANTLR3_HASH_TABLE_struct * table, void * key, void * element, void (ANTLR3_CDECL *freeptr)(void *)); 135 136 /* Integer based hash functions */ 137 void (*delI) (struct ANTLR3_HASH_TABLE_struct * table, ANTLR3_INTKEY key); 138 pANTLR3_HASH_ENTRY (*removeI) (struct ANTLR3_HASH_TABLE_struct * table, ANTLR3_INTKEY key); 139 void * (*getI) (struct ANTLR3_HASH_TABLE_struct * table, ANTLR3_INTKEY key); 140 ANTLR3_INT32 (*putI) (struct ANTLR3_HASH_TABLE_struct * table, ANTLR3_INTKEY key, void * element, void (ANTLR3_CDECL *freeptr)(void *)); 141 142 ANTLR3_UINT32 (*size) (struct ANTLR3_HASH_TABLE_struct * table); 143 } 144 ANTLR3_HASH_TABLE; 145 146 147 /** Internal structure representing an enumeration of a table. 148 * This is returned by antlr3Enumeration() 149 * Allows the programmer to traverse the table in hash order without 150 * knowing what is in the actual table. 151 * 152 * Note that it is up to the caller to ensure that the table 153 * structure does not change in the hash bucket that is currently being 154 * enumerated as this structure just tracks the next pointers in the 155 * bucket series. 156 */ 157 typedef struct ANTLR3_HASH_ENUM_struct 158 { 159 /* Pointer to the table we are enumerating 160 */ 161 pANTLR3_HASH_TABLE table; 162 163 /* Bucket we are currently enumerating (if NULL then we are done) 164 */ 165 ANTLR3_UINT32 bucket; 166 167 /* Next entry to return, if NULL, then move to next bucket if any 168 */ 169 pANTLR3_HASH_ENTRY entry; 170 171 /* Interface 172 */ 173 int (*next) (struct ANTLR3_HASH_ENUM_struct * en, pANTLR3_HASH_KEY *key, void ** data); 174 void (*free) (struct ANTLR3_HASH_ENUM_struct * table); 175 } 176 ANTLR3_HASH_ENUM; 177 178 /** Structure that represents a LIST collection 179 */ 180 typedef struct ANTLR3_LIST_struct 181 { 182 /** Hash table that is storing the list elements 183 */ 184 pANTLR3_HASH_TABLE table; 185 186 void (*free) (struct ANTLR3_LIST_struct * list); 187 void (*del) (struct ANTLR3_LIST_struct * list, ANTLR3_INTKEY key); 188 void * (*get) (struct ANTLR3_LIST_struct * list, ANTLR3_INTKEY key); 189 void * (*remove) (struct ANTLR3_LIST_struct * list, ANTLR3_INTKEY key); 190 ANTLR3_INT32 (*add) (struct ANTLR3_LIST_struct * list, void * element, void (ANTLR3_CDECL *freeptr)(void *)); 191 ANTLR3_INT32 (*put) (struct ANTLR3_LIST_struct * list, ANTLR3_INTKEY key, void * element, void (ANTLR3_CDECL *freeptr)(void *)); 192 ANTLR3_UINT32 (*size) (struct ANTLR3_LIST_struct * list); 193 194 } 195 ANTLR3_LIST; 196 197 /** Structure that represents a Stack collection 198 */ 199 typedef struct ANTLR3_STACK_struct 200 { 201 /** List that supports the stack structure 202 */ 203 pANTLR3_VECTOR vector; 204 205 /** Used for quick access to the top of the stack 206 */ 207 void * top; 208 void (*free) (struct ANTLR3_STACK_struct * stack); 209 void * (*pop) (struct ANTLR3_STACK_struct * stack); 210 void * (*get) (struct ANTLR3_STACK_struct * stack, ANTLR3_INTKEY key); 211 ANTLR3_BOOLEAN (*push) (struct ANTLR3_STACK_struct * stack, void * element, void (ANTLR3_CDECL *freeptr)(void *)); 212 ANTLR3_UINT32 (*size) (struct ANTLR3_STACK_struct * stack); 213 void * (*peek) (struct ANTLR3_STACK_struct * stack); 214 215 } 216 ANTLR3_STACK; 217 218 /* Structure that represents a vector element 219 */ 220 typedef struct ANTLR3_VECTOR_ELEMENT_struct 221 { 222 void * element; 223 void (ANTLR3_CDECL *freeptr)(void *); 224 } 225 ANTLR3_VECTOR_ELEMENT, *pANTLR3_VECTOR_ELEMENT; 226 227 #define ANTLR3_VECTOR_INTERNAL_SIZE 16 228 /* Structure that represents a vector collection. A vector is a simple list 229 * that contains a pointer to the element and a pointer to a function that 230 * that can free the element if it is removed. It auto resizes but does not 231 * use hash techniques as it is referenced by a simple numeric index. It is not a 232 * sparse list, so if any element is deleted, then the ones following are moved 233 * down in memory and the count is adjusted. 234 */ 235 typedef struct ANTLR3_VECTOR_struct 236 { 237 /** Array of pointers to vector elements 238 */ 239 pANTLR3_VECTOR_ELEMENT elements; 240 241 /** Number of entries currently in the list; 242 */ 243 ANTLR3_UINT32 count; 244 245 /** Many times, a vector holds just a few nodes in an AST and it 246 * is too much overhead to malloc the space for elements so 247 * at the expense of a few bytes of memory, we hold the first 248 * few elements internally. It means we must copy them when 249 * we grow beyond this initial size, but that is less overhead than 250 * the malloc/free callas we would otherwise require. 251 */ 252 ANTLR3_VECTOR_ELEMENT internal[ANTLR3_VECTOR_INTERNAL_SIZE]; 253 254 /** Indicates if the structure was made by a factory, in which 255 * case only the factory can free the memory for the actual vector, 256 * though the vector free function is called and will recurse through its 257 * entries calling any free pointers for each entry. 258 */ 259 ANTLR3_BOOLEAN factoryMade; 260 261 /** Total number of entries in elements at any point in time 262 */ 263 ANTLR3_UINT32 elementsSize; 264 265 void (ANTLR3_CDECL *free) (struct ANTLR3_VECTOR_struct * vector); 266 void (*del) (struct ANTLR3_VECTOR_struct * vector, ANTLR3_UINT32 entry); 267 void * (*get) (struct ANTLR3_VECTOR_struct * vector, ANTLR3_UINT32 entry); 268 void * (*remove) (struct ANTLR3_VECTOR_struct * vector, ANTLR3_UINT32 entry); 269 void (*clear) (struct ANTLR3_VECTOR_struct * vector); 270 ANTLR3_BOOLEAN (*swap) (struct ANTLR3_VECTOR_struct *, ANTLR3_UINT32 entry1, ANTLR3_UINT32 entry2); 271 ANTLR3_UINT32 (*add) (struct ANTLR3_VECTOR_struct * vector, void * element, void (ANTLR3_CDECL *freeptr)(void *)); 272 ANTLR3_UINT32 (*set) (struct ANTLR3_VECTOR_struct * vector, ANTLR3_UINT32 entry, void * element, void (ANTLR3_CDECL *freeptr)(void *), ANTLR3_BOOLEAN freeExisting); 273 ANTLR3_UINT32 (*size) (struct ANTLR3_VECTOR_struct * vector); 274 } 275 ANTLR3_VECTOR; 276 277 /** Default vector pool size if otherwise unspecified 278 */ 279 #define ANTLR3_FACTORY_VPOOL_SIZE 256 280 281 /** Structure that tracks vectors in a vector and auto deletes the vectors 282 * in the vector factory when closed. 283 */ 284 typedef struct ANTLR3_VECTOR_FACTORY_struct 285 { 286 287 /** List of all vector pools allocated so far 288 */ 289 pANTLR3_VECTOR *pools; 290 291 /** Count of the vector pools allocated so far (current active pool) 292 */ 293 ANTLR3_INT32 thisPool; 294 295 /** The next vector available in the pool 296 */ 297 ANTLR3_UINT32 nextVector; 298 299 /** Trick to quickly initialize a new vector via memcpy and not a function call 300 */ 301 ANTLR3_VECTOR unTruc; 302 303 /** Consumers from the factory can release a factory produced vector 304 * back to the factory so that it may be reused (and thus conserve memory) 305 * by another caller. The available vectors are stored here. Note that 306 * the only vectors avaible in the free chain are produced by this factory, so they 307 * need not be explicitly freed when the factory is closed. 308 */ 309 pANTLR3_STACK freeStack; 310 311 /** Function to close the vector factory 312 */ 313 void (*close) (struct ANTLR3_VECTOR_FACTORY_struct * factory); 314 315 /** Function to supply a new vector 316 */ 317 pANTLR3_VECTOR (*newVector) (struct ANTLR3_VECTOR_FACTORY_struct * factory); 318 319 /// Function to return a vector to the factory for reuse 320 /// 321 void (*returnVector) (struct ANTLR3_VECTOR_FACTORY_struct * factory, pANTLR3_VECTOR vector); 322 323 } 324 ANTLR3_VECTOR_FACTORY; 325 326 327 /* -------------- TRIE Interfaces ---------------- */ 328 329 330 /** Structure that holds the payload entry in an ANTLR3_INT_TRIE or ANTLR3_STRING_TRIE 331 */ 332 typedef struct ANTLR3_TRIE_ENTRY_struct 333 { 334 ANTLR3_UINT32 type; 335 void (ANTLR3_CDECL *freeptr)(void *); 336 union 337 { 338 ANTLR3_INTKEY intVal; 339 void * ptr; 340 } data; 341 342 struct ANTLR3_TRIE_ENTRY_struct * next; /* Allows duplicate entries for same key in insertion order */ 343 } 344 ANTLR3_TRIE_ENTRY, * pANTLR3_TRIE_ENTRY; 345 346 347 /** Structure that defines an element/node in an ANTLR3_INT_TRIE 348 */ 349 typedef struct ANTLR3_INT_TRIE_NODE_struct 350 { 351 ANTLR3_UINT32 bitNum; /**< This is the left/right bit index for traversal along the nodes */ 352 ANTLR3_INTKEY key; /**< This is the actual key that the entry represents if it is a terminal node */ 353 pANTLR3_TRIE_ENTRY buckets; /**< This is the data bucket(s) that the key indexes, which may be NULL */ 354 struct ANTLR3_INT_TRIE_NODE_struct * leftN; /**< Pointer to the left node from here when sKey & bitNum = 0 */ 355 struct ANTLR3_INT_TRIE_NODE_struct * rightN; /**< Pointer to the right node from here when sKey & bitNum, = 1 */ 356 } 357 ANTLR3_INT_TRIE_NODE, * pANTLR3_INT_TRIE_NODE; 358 359 /** Structure that defines an ANTLR3_INT_TRIE. For this particular implementation, 360 * as you might expect, the key is turned into a "string" by looking at bit(key, depth) 361 * of the integer key. Using 64 bit keys gives us a depth limit of 64 (or bit 0..63) 362 * and potentially a huge trie. This is the algorithm for a Patricia Trie. 363 * Note also that this trie [can] accept multiple entries for the same key and is 364 * therefore a kind of elastic bucket patricia trie. 365 * 366 * If you find this code useful, please feel free to 'steal' it for any purpose 367 * as covered by the BSD license under which ANTLR is issued. You can cut the code 368 * but as the ANTLR library is only about 50K (Windows Vista), you might find it 369 * easier to just link the library. Please keep all comments and licenses and so on 370 * in any version of this you create of course. 371 * 372 * Jim Idle. 373 * 374 */ 375 typedef struct ANTLR3_INT_TRIE_struct 376 { 377 pANTLR3_INT_TRIE_NODE root; /* Root node of this integer trie */ 378 pANTLR3_INT_TRIE_NODE current; /* Used to traverse the TRIE with the next() method */ 379 ANTLR3_UINT32 count; /* Current entry count */ 380 ANTLR3_BOOLEAN allowDups; /* Whether this trie accepts duplicate keys */ 381 382 383 pANTLR3_TRIE_ENTRY (*get) (struct ANTLR3_INT_TRIE_struct * trie, ANTLR3_INTKEY key); 384 ANTLR3_BOOLEAN (*del) (struct ANTLR3_INT_TRIE_struct * trie, ANTLR3_INTKEY key); 385 ANTLR3_BOOLEAN (*add) (struct ANTLR3_INT_TRIE_struct * trie, ANTLR3_INTKEY key, ANTLR3_UINT32 type, ANTLR3_INTKEY intVal, void * data, void (ANTLR3_CDECL *freeptr)(void *)); 386 void (*free) (struct ANTLR3_INT_TRIE_struct * trie); 387 388 } 389 ANTLR3_INT_TRIE; 390 391 /** 392 * A topological sort system that given a set of dependencies of a node m on node n, 393 * can sort them in dependency order. This is a generally useful utility object 394 * that does not care what the things are it is sorting. Generally the set 395 * to be sorted will be numeric indexes into some other structure such as an ANTLR3_VECTOR. 396 * I have provided a sort method that given ANTLR3_VECTOR as an input will sort 397 * the vector entries in place, as well as a sort method that just returns an 398 * array of the sorted noded indexes, in case you are not sorting ANTLR3_VECTORS but 399 * some set of your own device. 400 * 401 * Of the two main algorithms that could be used, I chose to use the depth first 402 * search for unvisited nodes as a) This runs in linear time, and b) it is what 403 * we used in the ANTLR Tool to perform a topological sort of the input grammar files 404 * based on their dependencies. 405 */ 406 typedef struct ANTLR3_TOPO_struct 407 { 408 /** 409 * A vector of vectors of edges, built by calling the addEdge method() 410 * to indicate that node number n depends on node number m. Each entry in the vector 411 * contains a bitset, which has a bit index set for each node upon which the 412 * entry node depends. 413 */ 414 pANTLR3_BITSET * edges; 415 416 /** 417 * A vector used to build up the sorted output order. Note that 418 * as the vector contains UINT32 then the maximum node index is 419 * 'limited' to 2^32, as nodes should be zero based. 420 */ 421 pANTLR3_UINT32 sorted; 422 423 /** 424 * A vector used to detect cycles in the edge dependecies. It is used 425 * as a stack and each time we descend a node to one of its edges we 426 * add the node into this stack. If we find a node that we have already 427 * visited in the stack, then it means there wasa cycle such as 9->8->1->9 428 * as the only way a node can be on the stack is if we are currently 429 * descnding from it as we remove it from the stack as we exit from 430 * descending its dependencies 431 */ 432 pANTLR3_UINT32 cycle; 433 434 /** 435 * A flag that indicates the algorithm found a cycle in the edges 436 * such as 9->8->1->9 437 * If this flag is set after you have called one of the sort routines 438 * then the detected cycle will be contained in the cycle array and 439 * cycleLimit will point to the one after the last entry in the cycle. 440 */ 441 ANTLR3_BOOLEAN hasCycle; 442 443 /** 444 * A watermark used to accumulate potential cycles in the cycle array. 445 * This should be zero when we are done. Check hasCycle after calling one 446 * of the sort methods and if it is ANTLR3_TRUE then you can find the cycle 447 * in cycle[0]...cycle[cycleMark-1] 448 */ 449 ANTLR3_UINT32 cycleMark; 450 451 /** 452 * One more than the largest node index that is contained in edges/sorted. 453 */ 454 ANTLR3_UINT32 limit; 455 456 /** 457 * The set of visited nodes as determined by a set entry in 458 * the bitmap. 459 */ 460 pANTLR3_BITSET visited; 461 462 /** 463 * A method that adds an edge from one node to another. An edge 464 * of n -> m indicates that node n is dependent on node m. Note that 465 * while building these edges, it is perfectly OK to add nodes out of 466 * sequence. So, if you have edges: 467 * 468 * 3 -> 0 469 * 2 -> 1 470 * 1 -> 3 471 * 472 * The you can add them in that order and so add node 3 before nodes 2 and 1 473 * 474 */ 475 void (*addEdge) (struct ANTLR3_TOPO_struct * topo, ANTLR3_UINT32 edge, ANTLR3_UINT32 dependency); 476 477 478 /** 479 * A method that returns a pointer to an array of sorted node indexes. 480 * The array is sorted in topological sorted order. Note that the array 481 * is only as large as the largest node index you created an edge for. This means 482 * that if you had an input of 32 nodes, but that largest node with an edge 483 * was 16, then the returned array will be the sorted order of the first 16 484 * nodes and the last 16 nodes of your array are basically fine as they are 485 * as they had no dependencies and do not need any particular sort order. 486 * 487 * NB: If the structure that contains the array is freed, then the sorted 488 * array will be freed too so you should use the value of limit to 489 * make a long term copy of this array if you do not want to keep the topo 490 * structure around as well. 491 */ 492 pANTLR3_UINT32 (*sortToArray) (struct ANTLR3_TOPO_struct * topo); 493 494 /** 495 * A method that sorts the supplied ANTLR3_VECTOR in place based 496 * on the previously supplied edge data. 497 */ 498 void (*sortVector) (struct ANTLR3_TOPO_struct * topo, pANTLR3_VECTOR v); 499 500 /** 501 * A method to free this structure and any associated memory. 502 */ 503 void (*free) (struct ANTLR3_TOPO_struct * topo); 504 } 505 ANTLR3_TOPO; 506 507 #ifdef __cplusplus 508 } 509 #endif 510 511 #endif 512 513 514