1 /// 2 /// \file 3 /// Contains the C implementation of ANTLR3 bitsets as adapted from Terence Parr's 4 /// Java implementation. 5 /// 6 7 // [The "BSD licence"] 8 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC 9 // http://www.temporal-wave.com 10 // http://www.linkedin.com/in/jimidle 11 // 12 // All rights reserved. 13 // 14 // Redistribution and use in source and binary forms, with or without 15 // modification, are permitted provided that the following conditions 16 // are met: 17 // 1. Redistributions of source code must retain the above copyright 18 // notice, this list of conditions and the following disclaimer. 19 // 2. Redistributions in binary form must reproduce the above copyright 20 // notice, this list of conditions and the following disclaimer in the 21 // documentation and/or other materials provided with the distribution. 22 // 3. The name of the author may not be used to endorse or promote products 23 // derived from this software without specific prior written permission. 24 // 25 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 26 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 27 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 28 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 29 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 30 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 31 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 32 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 33 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 34 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 35 36 #include <antlr3bitset.h> 37 38 // External interface 39 // 40 41 static pANTLR3_BITSET antlr3BitsetClone (pANTLR3_BITSET inSet); 42 static pANTLR3_BITSET antlr3BitsetOR (pANTLR3_BITSET bitset1, pANTLR3_BITSET bitset2); 43 static void antlr3BitsetORInPlace (pANTLR3_BITSET bitset, pANTLR3_BITSET bitset2); 44 static ANTLR3_UINT32 antlr3BitsetSize (pANTLR3_BITSET bitset); 45 static void antlr3BitsetAdd (pANTLR3_BITSET bitset, ANTLR3_INT32 bit); 46 static ANTLR3_BOOLEAN antlr3BitsetEquals (pANTLR3_BITSET bitset1, pANTLR3_BITSET bitset2); 47 static ANTLR3_BOOLEAN antlr3BitsetMember (pANTLR3_BITSET bitset, ANTLR3_UINT32 bit); 48 static ANTLR3_UINT32 antlr3BitsetNumBits (pANTLR3_BITSET bitset); 49 static void antlr3BitsetRemove (pANTLR3_BITSET bitset, ANTLR3_UINT32 bit); 50 static ANTLR3_BOOLEAN antlr3BitsetIsNil (pANTLR3_BITSET bitset); 51 static pANTLR3_INT32 antlr3BitsetToIntList (pANTLR3_BITSET bitset); 52 53 // Local functions 54 // 55 static void growToInclude (pANTLR3_BITSET bitset, ANTLR3_INT32 bit); 56 static void grow (pANTLR3_BITSET bitset, ANTLR3_INT32 newSize); 57 static ANTLR3_UINT64 bitMask (ANTLR3_UINT32 bitNumber); 58 static ANTLR3_UINT32 numWordsToHold (ANTLR3_UINT32 bit); 59 static ANTLR3_UINT32 wordNumber (ANTLR3_UINT32 bit); 60 static void antlr3BitsetFree (pANTLR3_BITSET bitset); 61 62 static void 63 antlr3BitsetFree(pANTLR3_BITSET bitset) 64 { 65 if (bitset->blist.bits != NULL) 66 { 67 ANTLR3_FREE(bitset->blist.bits); 68 bitset->blist.bits = NULL; 69 } 70 ANTLR3_FREE(bitset); 71 72 return; 73 } 74 75 ANTLR3_API pANTLR3_BITSET 76 antlr3BitsetNew(ANTLR3_UINT32 numBits) 77 { 78 pANTLR3_BITSET bitset; 79 80 ANTLR3_UINT32 numelements; 81 82 // Allocate memory for the bitset structure itself 83 // 84 bitset = (pANTLR3_BITSET) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_BITSET)); 85 86 if (bitset == NULL) 87 { 88 return NULL; 89 } 90 91 // Avoid memory thrashing at the up front expense of a few bytes 92 // 93 if (numBits < (8 * ANTLR3_BITSET_BITS)) 94 { 95 numBits = 8 * ANTLR3_BITSET_BITS; 96 } 97 98 // No we need to allocate the memory for the number of bits asked for 99 // in multiples of ANTLR3_UINT64. 100 // 101 numelements = ((numBits -1) >> ANTLR3_BITSET_LOG_BITS) + 1; 102 103 bitset->blist.bits = (pANTLR3_BITWORD) ANTLR3_MALLOC((size_t)(numelements * sizeof(ANTLR3_BITWORD))); 104 memset(bitset->blist.bits, 0, (size_t)(numelements * sizeof(ANTLR3_BITWORD))); 105 bitset->blist.length = numelements; 106 107 if (bitset->blist.bits == NULL) 108 { 109 ANTLR3_FREE(bitset); 110 return NULL; 111 } 112 113 antlr3BitsetSetAPI(bitset); 114 115 116 // All seems good 117 // 118 return bitset; 119 } 120 121 ANTLR3_API void 122 antlr3BitsetSetAPI(pANTLR3_BITSET bitset) 123 { 124 bitset->clone = antlr3BitsetClone; 125 bitset->bor = antlr3BitsetOR; 126 bitset->borInPlace = antlr3BitsetORInPlace; 127 bitset->size = antlr3BitsetSize; 128 bitset->add = antlr3BitsetAdd; 129 bitset->grow = grow; 130 bitset->equals = antlr3BitsetEquals; 131 bitset->isMember = antlr3BitsetMember; 132 bitset->numBits = antlr3BitsetNumBits; 133 bitset->remove = antlr3BitsetRemove; 134 bitset->isNilNode = antlr3BitsetIsNil; 135 bitset->toIntList = antlr3BitsetToIntList; 136 137 bitset->free = antlr3BitsetFree; 138 } 139 140 ANTLR3_API pANTLR3_BITSET 141 antlr3BitsetCopy(pANTLR3_BITSET_LIST blist) 142 { 143 pANTLR3_BITSET bitset; 144 int numElements; 145 146 // Allocate memory for the bitset structure itself 147 // 148 bitset = (pANTLR3_BITSET) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_BITSET)); 149 150 if (bitset == NULL) 151 { 152 return NULL; 153 } 154 155 numElements = blist->length; 156 157 // Avoid memory thrashing at the expense of a few more bytes 158 // 159 if (numElements < 8) 160 { 161 numElements = 8; 162 } 163 164 // Install the length in ANTLR3_UINT64 units 165 // 166 bitset->blist.length = numElements; 167 168 bitset->blist.bits = (pANTLR3_BITWORD)ANTLR3_MALLOC((size_t)(numElements * sizeof(ANTLR3_BITWORD))); 169 170 if (bitset->blist.bits == NULL) 171 { 172 ANTLR3_FREE(bitset); 173 return NULL; 174 } 175 176 ANTLR3_MEMCPY(bitset->blist.bits, blist->bits, (ANTLR3_UINT64)(numElements * sizeof(ANTLR3_BITWORD))); 177 178 // All seems good 179 // 180 return bitset; 181 } 182 183 static pANTLR3_BITSET 184 antlr3BitsetClone(pANTLR3_BITSET inSet) 185 { 186 pANTLR3_BITSET bitset; 187 188 // Allocate memory for the bitset structure itself 189 // 190 bitset = antlr3BitsetNew(ANTLR3_BITSET_BITS * inSet->blist.length); 191 192 if (bitset == NULL) 193 { 194 return NULL; 195 } 196 197 // Install the actual bits in the source set 198 // 199 ANTLR3_MEMCPY(bitset->blist.bits, inSet->blist.bits, (ANTLR3_UINT64)(inSet->blist.length * sizeof(ANTLR3_BITWORD))); 200 201 // All seems good 202 // 203 return bitset; 204 } 205 206 207 ANTLR3_API pANTLR3_BITSET 208 antlr3BitsetList(pANTLR3_HASH_TABLE list) 209 { 210 pANTLR3_BITSET bitSet; 211 pANTLR3_HASH_ENUM en; 212 pANTLR3_HASH_KEY key; 213 ANTLR3_UINT64 bit; 214 215 // We have no idea what exactly is in the list 216 // so create a default bitset and then just add stuff 217 // as we enumerate. 218 // 219 bitSet = antlr3BitsetNew(0); 220 221 en = antlr3EnumNew(list); 222 223 while (en->next(en, &key, (void **)(&bit)) == ANTLR3_SUCCESS) 224 { 225 bitSet->add(bitSet, (ANTLR3_UINT32)bit); 226 } 227 en->free(en); 228 229 return NULL; 230 } 231 232 /// 233 /// \brief 234 /// Creates a new bitset with at least one 64 bit bset of bits, but as 235 /// many 64 bit sets as are required. 236 /// 237 /// \param[in] bset 238 /// A variable number of bits to add to the set, ending in -1 (impossible bit). 239 /// 240 /// \returns 241 /// A new bit set with all of the specified bitmaps in it and the API 242 /// initialized. 243 /// 244 /// Call as: 245 /// - pANTLR3_BITSET = antlrBitsetLoad(bset, bset11, ..., -1); 246 /// - pANTLR3_BITSET = antlrBitsetOf(-1); Create empty bitset 247 /// 248 /// \remarks 249 /// Stdargs function - must supply -1 as last paremeter, which is NOT 250 /// added to the set. 251 /// 252 /// 253 ANTLR3_API pANTLR3_BITSET 254 antlr3BitsetLoad(pANTLR3_BITSET_LIST inBits) 255 { 256 pANTLR3_BITSET bitset; 257 ANTLR3_UINT32 count; 258 259 // Allocate memory for the bitset structure itself 260 // the input parameter is the bit number (0 based) 261 // to include in the bitset, so we need at at least 262 // bit + 1 bits. If any arguments indicate a 263 // a bit higher than the default number of bits (0 means default size) 264 // then Add() will take care 265 // of it. 266 // 267 bitset = antlr3BitsetNew(0); 268 269 if (bitset == NULL) 270 { 271 return NULL; 272 } 273 274 if (inBits != NULL) 275 { 276 // Now we can add the element bits into the set 277 // 278 count=0; 279 while (count < inBits->length) 280 { 281 if (bitset->blist.length <= count) 282 { 283 bitset->grow(bitset, count+1); 284 } 285 286 bitset->blist.bits[count] = *((inBits->bits)+count); 287 count++; 288 } 289 } 290 291 // return the new bitset 292 // 293 return bitset; 294 } 295 296 /// 297 /// \brief 298 /// Creates a new bitset with at least one element, but as 299 /// many elements are required. 300 /// 301 /// \param[in] bit 302 /// A variable number of bits to add to the set, ending in -1 (impossible bit). 303 /// 304 /// \returns 305 /// A new bit set with all of the specified elements added into it. 306 /// 307 /// Call as: 308 /// - pANTLR3_BITSET = antlrBitsetOf(n, n1, n2, -1); 309 /// - pANTLR3_BITSET = antlrBitsetOf(-1); Create empty bitset 310 /// 311 /// \remarks 312 /// Stdargs function - must supply -1 as last paremeter, which is NOT 313 /// added to the set. 314 /// 315 /// 316 ANTLR3_API pANTLR3_BITSET 317 antlr3BitsetOf(ANTLR3_INT32 bit, ...) 318 { 319 pANTLR3_BITSET bitset; 320 321 va_list ap; 322 323 // Allocate memory for the bitset structure itself 324 // the input parameter is the bit number (0 based) 325 // to include in the bitset, so we need at at least 326 // bit + 1 bits. If any arguments indicate a 327 // a bit higher than the default number of bits (0 menas default size) 328 // then Add() will take care 329 // of it. 330 // 331 bitset = antlr3BitsetNew(0); 332 333 if (bitset == NULL) 334 { 335 return NULL; 336 } 337 338 // Now we can add the element bits into the set 339 // 340 va_start(ap, bit); 341 while (bit != -1) 342 { 343 antlr3BitsetAdd(bitset, bit); 344 bit = va_arg(ap, ANTLR3_UINT32); 345 } 346 va_end(ap); 347 348 // return the new bitset 349 // 350 return bitset; 351 } 352 353 static pANTLR3_BITSET 354 antlr3BitsetOR(pANTLR3_BITSET bitset1, pANTLR3_BITSET bitset2) 355 { 356 pANTLR3_BITSET bitset; 357 358 if (bitset1 == NULL) 359 { 360 return antlr3BitsetClone(bitset2); 361 } 362 363 if (bitset2 == NULL) 364 { 365 return antlr3BitsetClone(bitset1); 366 } 367 368 // Allocate memory for the newly ordered bitset structure itself. 369 // 370 bitset = antlr3BitsetClone(bitset1); 371 372 antlr3BitsetORInPlace(bitset, bitset2); 373 374 return bitset; 375 376 } 377 378 static void 379 antlr3BitsetAdd(pANTLR3_BITSET bitset, ANTLR3_INT32 bit) 380 { 381 ANTLR3_UINT32 word; 382 383 word = wordNumber(bit); 384 385 if (word >= bitset->blist.length) 386 { 387 growToInclude(bitset, bit); 388 } 389 390 bitset->blist.bits[word] |= bitMask(bit); 391 392 } 393 394 static void 395 grow(pANTLR3_BITSET bitset, ANTLR3_INT32 newSize) 396 { 397 pANTLR3_BITWORD newBits; 398 399 // Space for newly sized bitset - TODO: come back to this and use realloc?, it may 400 // be more efficient... 401 // 402 newBits = (pANTLR3_BITWORD) ANTLR3_CALLOC(1, (size_t)(newSize * sizeof(ANTLR3_BITWORD))); 403 if (bitset->blist.bits != NULL) 404 { 405 // Copy existing bits 406 // 407 ANTLR3_MEMCPY((void *)newBits, (const void *)bitset->blist.bits, (size_t)(bitset->blist.length * sizeof(ANTLR3_BITWORD))); 408 409 // Out with the old bits... de de de derrr 410 // 411 ANTLR3_FREE(bitset->blist.bits); 412 } 413 414 // In with the new bits... keerrrang. 415 // 416 bitset->blist.bits = newBits; 417 bitset->blist.length = newSize; 418 } 419 420 static void 421 growToInclude(pANTLR3_BITSET bitset, ANTLR3_INT32 bit) 422 { 423 ANTLR3_UINT32 bl; 424 ANTLR3_UINT32 nw; 425 426 bl = (bitset->blist.length << 1); 427 nw = numWordsToHold(bit); 428 429 if (bl > nw) 430 { 431 bitset->grow(bitset, bl); 432 } 433 else 434 { 435 bitset->grow(bitset, nw); 436 } 437 } 438 439 static void 440 antlr3BitsetORInPlace(pANTLR3_BITSET bitset, pANTLR3_BITSET bitset2) 441 { 442 ANTLR3_UINT32 minimum; 443 ANTLR3_UINT32 i; 444 445 if (bitset2 == NULL) 446 { 447 return; 448 } 449 450 451 // First make sure that the target bitset is big enough 452 // for the new bits to be ored in. 453 // 454 if (bitset->blist.length < bitset2->blist.length) 455 { 456 growToInclude(bitset, (bitset2->blist.length * sizeof(ANTLR3_BITWORD))); 457 } 458 459 // Or the miniimum number of bits after any resizing went on 460 // 461 if (bitset->blist.length < bitset2->blist.length) 462 { 463 minimum = bitset->blist.length; 464 } 465 else 466 { 467 minimum = bitset2->blist.length; 468 } 469 470 for (i = minimum; i > 0; i--) 471 { 472 bitset->blist.bits[i-1] |= bitset2->blist.bits[i-1]; 473 } 474 } 475 476 static ANTLR3_UINT64 477 bitMask(ANTLR3_UINT32 bitNumber) 478 { 479 return ((ANTLR3_UINT64)1) << (bitNumber & (ANTLR3_BITSET_MOD_MASK)); 480 } 481 482 static ANTLR3_UINT32 483 antlr3BitsetSize(pANTLR3_BITSET bitset) 484 { 485 ANTLR3_UINT32 degree; 486 ANTLR3_INT32 i; 487 ANTLR3_INT8 bit; 488 489 // TODO: Come back to this, it may be faster to & with 0x01 490 // then shift right a copy of the 4 bits, than shift left a constant of 1. 491 // But then again, the optimizer might just work this out 492 // anyway. 493 // 494 degree = 0; 495 for (i = bitset->blist.length - 1; i>= 0; i--) 496 { 497 if (bitset->blist.bits[i] != 0) 498 { 499 for (bit = ANTLR3_BITSET_BITS - 1; bit >= 0; bit--) 500 { 501 if ((bitset->blist.bits[i] & (((ANTLR3_BITWORD)1) << bit)) != 0) 502 { 503 degree++; 504 } 505 } 506 } 507 } 508 return degree; 509 } 510 511 static ANTLR3_BOOLEAN 512 antlr3BitsetEquals(pANTLR3_BITSET bitset1, pANTLR3_BITSET bitset2) 513 { 514 ANTLR3_INT32 minimum; 515 ANTLR3_INT32 i; 516 517 if (bitset1 == NULL || bitset2 == NULL) 518 { 519 return ANTLR3_FALSE; 520 } 521 522 // Work out the minimum comparison set 523 // 524 if (bitset1->blist.length < bitset2->blist.length) 525 { 526 minimum = bitset1->blist.length; 527 } 528 else 529 { 530 minimum = bitset2->blist.length; 531 } 532 533 // Make sure explict in common bits are equal 534 // 535 for (i = minimum - 1; i >=0 ; i--) 536 { 537 if (bitset1->blist.bits[i] != bitset2->blist.bits[i]) 538 { 539 return ANTLR3_FALSE; 540 } 541 } 542 543 // Now make sure the bits of the larger set are all turned 544 // off. 545 // 546 if (bitset1->blist.length > (ANTLR3_UINT32)minimum) 547 { 548 for (i = minimum ; (ANTLR3_UINT32)i < bitset1->blist.length; i++) 549 { 550 if (bitset1->blist.bits[i] != 0) 551 { 552 return ANTLR3_FALSE; 553 } 554 } 555 } 556 else if (bitset2->blist.length > (ANTLR3_UINT32)minimum) 557 { 558 for (i = minimum; (ANTLR3_UINT32)i < bitset2->blist.length; i++) 559 { 560 if (bitset2->blist.bits[i] != 0) 561 { 562 return ANTLR3_FALSE; 563 } 564 } 565 } 566 567 return ANTLR3_TRUE; 568 } 569 570 static ANTLR3_BOOLEAN 571 antlr3BitsetMember(pANTLR3_BITSET bitset, ANTLR3_UINT32 bit) 572 { 573 ANTLR3_UINT32 wordNo; 574 575 wordNo = wordNumber(bit); 576 577 if (wordNo >= bitset->blist.length) 578 { 579 return ANTLR3_FALSE; 580 } 581 582 if ((bitset->blist.bits[wordNo] & bitMask(bit)) == 0) 583 { 584 return ANTLR3_FALSE; 585 } 586 else 587 { 588 return ANTLR3_TRUE; 589 } 590 } 591 592 static void 593 antlr3BitsetRemove(pANTLR3_BITSET bitset, ANTLR3_UINT32 bit) 594 { 595 ANTLR3_UINT32 wordNo; 596 597 wordNo = wordNumber(bit); 598 599 if (wordNo < bitset->blist.length) 600 { 601 bitset->blist.bits[wordNo] &= ~(bitMask(bit)); 602 } 603 } 604 static ANTLR3_BOOLEAN 605 antlr3BitsetIsNil(pANTLR3_BITSET bitset) 606 { 607 ANTLR3_INT32 i; 608 609 for (i = bitset->blist.length -1; i>= 0; i--) 610 { 611 if (bitset->blist.bits[i] != 0) 612 { 613 return ANTLR3_FALSE; 614 } 615 } 616 617 return ANTLR3_TRUE; 618 } 619 620 static ANTLR3_UINT32 621 numWordsToHold(ANTLR3_UINT32 bit) 622 { 623 return (bit >> ANTLR3_BITSET_LOG_BITS) + 1; 624 } 625 626 static ANTLR3_UINT32 627 wordNumber(ANTLR3_UINT32 bit) 628 { 629 return bit >> ANTLR3_BITSET_LOG_BITS; 630 } 631 632 static ANTLR3_UINT32 633 antlr3BitsetNumBits(pANTLR3_BITSET bitset) 634 { 635 return bitset->blist.length << ANTLR3_BITSET_LOG_BITS; 636 } 637 638 /** Produce an integer list of all the bits that are turned on 639 * in this bitset. Used for error processing in the main as the bitset 640 * reresents a number of integer tokens which we use for follow sets 641 * and so on. 642 * 643 * The first entry is the number of elements following in the list. 644 */ 645 static pANTLR3_INT32 646 antlr3BitsetToIntList (pANTLR3_BITSET bitset) 647 { 648 ANTLR3_UINT32 numInts; // How many integers we will need 649 ANTLR3_UINT32 numBits; // How many bits are in the set 650 ANTLR3_UINT32 i; 651 ANTLR3_UINT32 index; 652 653 pANTLR3_INT32 intList; 654 655 numInts = bitset->size(bitset) + 1; 656 numBits = bitset->numBits(bitset); 657 658 intList = (pANTLR3_INT32)ANTLR3_MALLOC(numInts * sizeof(ANTLR3_INT32)); 659 660 if (intList == NULL) 661 { 662 return NULL; // Out of memory 663 } 664 665 intList[0] = numInts; 666 667 // Enumerate the bits that are turned on 668 // 669 for (i = 0, index = 1; i<numBits; i++) 670 { 671 if (bitset->isMember(bitset, i) == ANTLR3_TRUE) 672 { 673 intList[index++] = i; 674 } 675 } 676 677 // Result set 678 // 679 return intList; 680 } 681 682