1 /* dfa - DFA construction routines */ 2 3 /*- 4 * Copyright (c) 1990 The Regents of the University of California. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Vern Paxson. 9 * 10 * The United States Government has rights in this work pursuant 11 * to contract no. DE-AC03-76SF00098 between the United States 12 * Department of Energy and the University of California. 13 * 14 * Redistribution and use in source and binary forms with or without 15 * modification are permitted provided that: (1) source distributions retain 16 * this entire copyright notice and comment, and (2) distributions including 17 * binaries display the following acknowledgement: ``This product includes 18 * software developed by the University of California, Berkeley and its 19 * contributors'' in the documentation or other materials provided with the 20 * distribution and in all advertising materials mentioning features or use 21 * of this software. Neither the name of the University nor the names of 22 * its contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 25 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 26 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 27 */ 28 29 /* $Header: /home/daffy/u0/vern/flex/RCS/dfa.c,v 2.26 95/04/20 13:53:14 vern Exp $ */ 30 31 #include "flexdef.h" 32 33 34 /* declare functions that have forward references */ 35 36 void dump_associated_rules PROTO((FILE*, int)); 37 void dump_transitions PROTO((FILE*, int[])); 38 void sympartition PROTO((int[], int, int[], int[])); 39 int symfollowset PROTO((int[], int, int, int[])); 40 41 42 /* check_for_backing_up - check a DFA state for backing up 43 * 44 * synopsis 45 * void check_for_backing_up( int ds, int state[numecs] ); 46 * 47 * ds is the number of the state to check and state[] is its out-transitions, 48 * indexed by equivalence class. 49 */ 50 51 void check_for_backing_up( ds, state ) 52 int ds; 53 int state[]; 54 { 55 if ( (reject && ! dfaacc[ds].dfaacc_set) || 56 (! reject && ! dfaacc[ds].dfaacc_state) ) 57 { /* state is non-accepting */ 58 ++num_backing_up; 59 60 if ( backing_up_report ) 61 { 62 fprintf( backing_up_file, 63 _( "State #%d is non-accepting -\n" ), ds ); 64 65 /* identify the state */ 66 dump_associated_rules( backing_up_file, ds ); 67 68 /* Now identify it further using the out- and 69 * jam-transitions. 70 */ 71 dump_transitions( backing_up_file, state ); 72 73 putc( '\n', backing_up_file ); 74 } 75 } 76 } 77 78 79 /* check_trailing_context - check to see if NFA state set constitutes 80 * "dangerous" trailing context 81 * 82 * synopsis 83 * void check_trailing_context( int nfa_states[num_states+1], int num_states, 84 * int accset[nacc+1], int nacc ); 85 * 86 * NOTES 87 * Trailing context is "dangerous" if both the head and the trailing 88 * part are of variable size \and/ there's a DFA state which contains 89 * both an accepting state for the head part of the rule and NFA states 90 * which occur after the beginning of the trailing context. 91 * 92 * When such a rule is matched, it's impossible to tell if having been 93 * in the DFA state indicates the beginning of the trailing context or 94 * further-along scanning of the pattern. In these cases, a warning 95 * message is issued. 96 * 97 * nfa_states[1 .. num_states] is the list of NFA states in the DFA. 98 * accset[1 .. nacc] is the list of accepting numbers for the DFA state. 99 */ 100 101 void check_trailing_context( nfa_states, num_states, accset, nacc ) 102 int *nfa_states, num_states; 103 int *accset; 104 int nacc; 105 { 106 register int i, j; 107 108 for ( i = 1; i <= num_states; ++i ) 109 { 110 int ns = nfa_states[i]; 111 register int type = state_type[ns]; 112 register int ar = assoc_rule[ns]; 113 114 if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE ) 115 { /* do nothing */ 116 } 117 118 else if ( type == STATE_TRAILING_CONTEXT ) 119 { 120 /* Potential trouble. Scan set of accepting numbers 121 * for the one marking the end of the "head". We 122 * assume that this looping will be fairly cheap 123 * since it's rare that an accepting number set 124 * is large. 125 */ 126 for ( j = 1; j <= nacc; ++j ) 127 if ( accset[j] & YY_TRAILING_HEAD_MASK ) 128 { 129 line_warning( 130 _( "dangerous trailing context" ), 131 rule_linenum[ar] ); 132 return; 133 } 134 } 135 } 136 } 137 138 139 /* dump_associated_rules - list the rules associated with a DFA state 140 * 141 * Goes through the set of NFA states associated with the DFA and 142 * extracts the first MAX_ASSOC_RULES unique rules, sorts them, 143 * and writes a report to the given file. 144 */ 145 146 void dump_associated_rules( file, ds ) 147 FILE *file; 148 int ds; 149 { 150 register int i, j; 151 register int num_associated_rules = 0; 152 int rule_set[MAX_ASSOC_RULES + 1]; 153 int *dset = dss[ds]; 154 int size = dfasiz[ds]; 155 156 for ( i = 1; i <= size; ++i ) 157 { 158 register int rule_num = rule_linenum[assoc_rule[dset[i]]]; 159 160 for ( j = 1; j <= num_associated_rules; ++j ) 161 if ( rule_num == rule_set[j] ) 162 break; 163 164 if ( j > num_associated_rules ) 165 { /* new rule */ 166 if ( num_associated_rules < MAX_ASSOC_RULES ) 167 rule_set[++num_associated_rules] = rule_num; 168 } 169 } 170 171 bubble( rule_set, num_associated_rules ); 172 173 fprintf( file, _( " associated rule line numbers:" ) ); 174 175 for ( i = 1; i <= num_associated_rules; ++i ) 176 { 177 if ( i % 8 == 1 ) 178 putc( '\n', file ); 179 180 fprintf( file, "\t%d", rule_set[i] ); 181 } 182 183 putc( '\n', file ); 184 } 185 186 187 /* dump_transitions - list the transitions associated with a DFA state 188 * 189 * synopsis 190 * dump_transitions( FILE *file, int state[numecs] ); 191 * 192 * Goes through the set of out-transitions and lists them in human-readable 193 * form (i.e., not as equivalence classes); also lists jam transitions 194 * (i.e., all those which are not out-transitions, plus EOF). The dump 195 * is done to the given file. 196 */ 197 198 void dump_transitions( file, state ) 199 FILE *file; 200 int state[]; 201 { 202 register int i, ec; 203 int out_char_set[CSIZE]; 204 205 for ( i = 0; i < csize; ++i ) 206 { 207 ec = ABS( ecgroup[i] ); 208 out_char_set[i] = state[ec]; 209 } 210 211 fprintf( file, _( " out-transitions: " ) ); 212 213 list_character_set( file, out_char_set ); 214 215 /* now invert the members of the set to get the jam transitions */ 216 for ( i = 0; i < csize; ++i ) 217 out_char_set[i] = ! out_char_set[i]; 218 219 fprintf( file, _( "\n jam-transitions: EOF " ) ); 220 221 list_character_set( file, out_char_set ); 222 223 putc( '\n', file ); 224 } 225 226 227 /* epsclosure - construct the epsilon closure of a set of ndfa states 228 * 229 * synopsis 230 * int *epsclosure( int t[num_states], int *numstates_addr, 231 * int accset[num_rules+1], int *nacc_addr, 232 * int *hashval_addr ); 233 * 234 * NOTES 235 * The epsilon closure is the set of all states reachable by an arbitrary 236 * number of epsilon transitions, which themselves do not have epsilon 237 * transitions going out, unioned with the set of states which have non-null 238 * accepting numbers. t is an array of size numstates of nfa state numbers. 239 * Upon return, t holds the epsilon closure and *numstates_addr is updated. 240 * accset holds a list of the accepting numbers, and the size of accset is 241 * given by *nacc_addr. t may be subjected to reallocation if it is not 242 * large enough to hold the epsilon closure. 243 * 244 * hashval is the hash value for the dfa corresponding to the state set. 245 */ 246 247 int *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr ) 248 int *t, *ns_addr, accset[], *nacc_addr, *hv_addr; 249 { 250 register int stkpos, ns, tsp; 251 int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum; 252 int stkend, nstate; 253 static int did_stk_init = false, *stk; 254 255 #define MARK_STATE(state) \ 256 trans1[state] = trans1[state] - MARKER_DIFFERENCE; 257 258 #define IS_MARKED(state) (trans1[state] < 0) 259 260 #define UNMARK_STATE(state) \ 261 trans1[state] = trans1[state] + MARKER_DIFFERENCE; 262 263 #define CHECK_ACCEPT(state) \ 264 { \ 265 nfaccnum = accptnum[state]; \ 266 if ( nfaccnum != NIL ) \ 267 accset[++nacc] = nfaccnum; \ 268 } 269 270 #define DO_REALLOCATION \ 271 { \ 272 current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \ 273 ++num_reallocs; \ 274 t = reallocate_integer_array( t, current_max_dfa_size ); \ 275 stk = reallocate_integer_array( stk, current_max_dfa_size ); \ 276 } \ 277 278 #define PUT_ON_STACK(state) \ 279 { \ 280 if ( ++stkend >= current_max_dfa_size ) \ 281 DO_REALLOCATION \ 282 stk[stkend] = state; \ 283 MARK_STATE(state) \ 284 } 285 286 #define ADD_STATE(state) \ 287 { \ 288 if ( ++numstates >= current_max_dfa_size ) \ 289 DO_REALLOCATION \ 290 t[numstates] = state; \ 291 hashval += state; \ 292 } 293 294 #define STACK_STATE(state) \ 295 { \ 296 PUT_ON_STACK(state) \ 297 CHECK_ACCEPT(state) \ 298 if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \ 299 ADD_STATE(state) \ 300 } 301 302 303 if ( ! did_stk_init ) 304 { 305 stk = allocate_integer_array( current_max_dfa_size ); 306 did_stk_init = true; 307 } 308 309 nacc = stkend = hashval = 0; 310 311 for ( nstate = 1; nstate <= numstates; ++nstate ) 312 { 313 ns = t[nstate]; 314 315 /* The state could be marked if we've already pushed it onto 316 * the stack. 317 */ 318 if ( ! IS_MARKED(ns) ) 319 { 320 PUT_ON_STACK(ns) 321 CHECK_ACCEPT(ns) 322 hashval += ns; 323 } 324 } 325 326 for ( stkpos = 1; stkpos <= stkend; ++stkpos ) 327 { 328 ns = stk[stkpos]; 329 transsym = transchar[ns]; 330 331 if ( transsym == SYM_EPSILON ) 332 { 333 tsp = trans1[ns] + MARKER_DIFFERENCE; 334 335 if ( tsp != NO_TRANSITION ) 336 { 337 if ( ! IS_MARKED(tsp) ) 338 STACK_STATE(tsp) 339 340 tsp = trans2[ns]; 341 342 if ( tsp != NO_TRANSITION && ! IS_MARKED(tsp) ) 343 STACK_STATE(tsp) 344 } 345 } 346 } 347 348 /* Clear out "visit" markers. */ 349 350 for ( stkpos = 1; stkpos <= stkend; ++stkpos ) 351 { 352 if ( IS_MARKED(stk[stkpos]) ) 353 UNMARK_STATE(stk[stkpos]) 354 else 355 flexfatal( 356 _( "consistency check failed in epsclosure()" ) ); 357 } 358 359 *ns_addr = numstates; 360 *hv_addr = hashval; 361 *nacc_addr = nacc; 362 363 return t; 364 } 365 366 367 /* increase_max_dfas - increase the maximum number of DFAs */ 368 369 void increase_max_dfas() 370 { 371 current_max_dfas += MAX_DFAS_INCREMENT; 372 373 ++num_reallocs; 374 375 base = reallocate_integer_array( base, current_max_dfas ); 376 def = reallocate_integer_array( def, current_max_dfas ); 377 dfasiz = reallocate_integer_array( dfasiz, current_max_dfas ); 378 accsiz = reallocate_integer_array( accsiz, current_max_dfas ); 379 dhash = reallocate_integer_array( dhash, current_max_dfas ); 380 dss = reallocate_int_ptr_array( dss, current_max_dfas ); 381 dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas ); 382 383 if ( nultrans ) 384 nultrans = 385 reallocate_integer_array( nultrans, current_max_dfas ); 386 } 387 388 389 /* ntod - convert an ndfa to a dfa 390 * 391 * Creates the dfa corresponding to the ndfa we've constructed. The 392 * dfa starts out in state #1. 393 */ 394 395 void ntod() 396 { 397 int *accset, ds, nacc, newds; 398 int sym, hashval, numstates, dsize; 399 int num_full_table_rows; /* used only for -f */ 400 int *nset, *dset; 401 int targptr, totaltrans, i, comstate, comfreq, targ; 402 int symlist[CSIZE + 1]; 403 int num_start_states; 404 int todo_head, todo_next; 405 406 /* Note that the following are indexed by *equivalence classes* 407 * and not by characters. Since equivalence classes are indexed 408 * beginning with 1, even if the scanner accepts NUL's, this 409 * means that (since every character is potentially in its own 410 * equivalence class) these arrays must have room for indices 411 * from 1 to CSIZE, so their size must be CSIZE + 1. 412 */ 413 int duplist[CSIZE + 1], state[CSIZE + 1]; 414 int targfreq[CSIZE + 1], targstate[CSIZE + 1]; 415 416 accset = allocate_integer_array( num_rules + 1 ); 417 nset = allocate_integer_array( current_max_dfa_size ); 418 419 /* The "todo" queue is represented by the head, which is the DFA 420 * state currently being processed, and the "next", which is the 421 * next DFA state number available (not in use). We depend on the 422 * fact that snstods() returns DFA's \in increasing order/, and thus 423 * need only know the bounds of the dfas to be processed. 424 */ 425 todo_head = todo_next = 0; 426 427 for ( i = 0; i <= csize; ++i ) 428 { 429 duplist[i] = NIL; 430 symlist[i] = false; 431 } 432 433 for ( i = 0; i <= num_rules; ++i ) 434 accset[i] = NIL; 435 436 if ( trace ) 437 { 438 dumpnfa( scset[1] ); 439 fputs( _( "\n\nDFA Dump:\n\n" ), stderr ); 440 } 441 442 inittbl(); 443 444 /* Check to see whether we should build a separate table for 445 * transitions on NUL characters. We don't do this for full-speed 446 * (-F) scanners, since for them we don't have a simple state 447 * number lying around with which to index the table. We also 448 * don't bother doing it for scanners unless (1) NUL is in its own 449 * equivalence class (indicated by a positive value of 450 * ecgroup[NUL]), (2) NUL's equivalence class is the last 451 * equivalence class, and (3) the number of equivalence classes is 452 * the same as the number of characters. This latter case comes 453 * about when useecs is false or when it's true but every character 454 * still manages to land in its own class (unlikely, but it's 455 * cheap to check for). If all these things are true then the 456 * character code needed to represent NUL's equivalence class for 457 * indexing the tables is going to take one more bit than the 458 * number of characters, and therefore we won't be assured of 459 * being able to fit it into a YY_CHAR variable. This rules out 460 * storing the transitions in a compressed table, since the code 461 * for interpreting them uses a YY_CHAR variable (perhaps it 462 * should just use an integer, though; this is worth pondering ... 463 * ###). 464 * 465 * Finally, for full tables, we want the number of entries in the 466 * table to be a power of two so the array references go fast (it 467 * will just take a shift to compute the major index). If 468 * encoding NUL's transitions in the table will spoil this, we 469 * give it its own table (note that this will be the case if we're 470 * not using equivalence classes). 471 */ 472 473 /* Note that the test for ecgroup[0] == numecs below accomplishes 474 * both (1) and (2) above 475 */ 476 if ( ! fullspd && ecgroup[0] == numecs ) 477 { 478 /* NUL is alone in its equivalence class, which is the 479 * last one. 480 */ 481 int use_NUL_table = (numecs == csize); 482 483 if ( fulltbl && ! use_NUL_table ) 484 { 485 /* We still may want to use the table if numecs 486 * is a power of 2. 487 */ 488 int power_of_two; 489 490 for ( power_of_two = 1; power_of_two <= csize; 491 power_of_two *= 2 ) 492 if ( numecs == power_of_two ) 493 { 494 use_NUL_table = true; 495 break; 496 } 497 } 498 499 if ( use_NUL_table ) 500 nultrans = allocate_integer_array( current_max_dfas ); 501 502 /* From now on, nultrans != nil indicates that we're 503 * saving null transitions for later, separate encoding. 504 */ 505 } 506 507 508 if ( fullspd ) 509 { 510 for ( i = 0; i <= numecs; ++i ) 511 state[i] = 0; 512 513 place_state( state, 0, 0 ); 514 dfaacc[0].dfaacc_state = 0; 515 } 516 517 else if ( fulltbl ) 518 { 519 if ( nultrans ) 520 /* We won't be including NUL's transitions in the 521 * table, so build it for entries from 0 .. numecs - 1. 522 */ 523 num_full_table_rows = numecs; 524 525 else 526 /* Take into account the fact that we'll be including 527 * the NUL entries in the transition table. Build it 528 * from 0 .. numecs. 529 */ 530 num_full_table_rows = numecs + 1; 531 532 /* Unless -Ca, declare it "short" because it's a real 533 * long-shot that that won't be large enough. 534 */ 535 out_str_dec( "static yyconst %s yy_nxt[][%d] =\n {\n", 536 /* '}' so vi doesn't get too confused */ 537 long_align ? "long" : "short", num_full_table_rows ); 538 539 outn( " {" ); 540 541 /* Generate 0 entries for state #0. */ 542 for ( i = 0; i < num_full_table_rows; ++i ) 543 mk2data( 0 ); 544 545 dataflush(); 546 outn( " },\n" ); 547 } 548 549 /* Create the first states. */ 550 551 num_start_states = lastsc * 2; 552 553 for ( i = 1; i <= num_start_states; ++i ) 554 { 555 numstates = 1; 556 557 /* For each start condition, make one state for the case when 558 * we're at the beginning of the line (the '^' operator) and 559 * one for the case when we're not. 560 */ 561 if ( i % 2 == 1 ) 562 nset[numstates] = scset[(i / 2) + 1]; 563 else 564 nset[numstates] = 565 mkbranch( scbol[i / 2], scset[i / 2] ); 566 567 nset = epsclosure( nset, &numstates, accset, &nacc, &hashval ); 568 569 if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) ) 570 { 571 numas += nacc; 572 totnst += numstates; 573 ++todo_next; 574 575 if ( variable_trailing_context_rules && nacc > 0 ) 576 check_trailing_context( nset, numstates, 577 accset, nacc ); 578 } 579 } 580 581 if ( ! fullspd ) 582 { 583 if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) ) 584 flexfatal( 585 _( "could not create unique end-of-buffer state" ) ); 586 587 ++numas; 588 ++num_start_states; 589 ++todo_next; 590 } 591 592 while ( todo_head < todo_next ) 593 { 594 targptr = 0; 595 totaltrans = 0; 596 597 for ( i = 1; i <= numecs; ++i ) 598 state[i] = 0; 599 600 ds = ++todo_head; 601 602 dset = dss[ds]; 603 dsize = dfasiz[ds]; 604 605 if ( trace ) 606 fprintf( stderr, _( "state # %d:\n" ), ds ); 607 608 sympartition( dset, dsize, symlist, duplist ); 609 610 for ( sym = 1; sym <= numecs; ++sym ) 611 { 612 if ( symlist[sym] ) 613 { 614 symlist[sym] = 0; 615 616 if ( duplist[sym] == NIL ) 617 { 618 /* Symbol has unique out-transitions. */ 619 numstates = symfollowset( dset, dsize, 620 sym, nset ); 621 nset = epsclosure( nset, &numstates, 622 accset, &nacc, &hashval ); 623 624 if ( snstods( nset, numstates, accset, 625 nacc, hashval, &newds ) ) 626 { 627 totnst = totnst + numstates; 628 ++todo_next; 629 numas += nacc; 630 631 if ( 632 variable_trailing_context_rules && 633 nacc > 0 ) 634 check_trailing_context( 635 nset, numstates, 636 accset, nacc ); 637 } 638 639 state[sym] = newds; 640 641 if ( trace ) 642 fprintf( stderr, "\t%d\t%d\n", 643 sym, newds ); 644 645 targfreq[++targptr] = 1; 646 targstate[targptr] = newds; 647 ++numuniq; 648 } 649 650 else 651 { 652 /* sym's equivalence class has the same 653 * transitions as duplist(sym)'s 654 * equivalence class. 655 */ 656 targ = state[duplist[sym]]; 657 state[sym] = targ; 658 659 if ( trace ) 660 fprintf( stderr, "\t%d\t%d\n", 661 sym, targ ); 662 663 /* Update frequency count for 664 * destination state. 665 */ 666 667 i = 0; 668 while ( targstate[++i] != targ ) 669 ; 670 671 ++targfreq[i]; 672 ++numdup; 673 } 674 675 ++totaltrans; 676 duplist[sym] = NIL; 677 } 678 } 679 680 if ( caseins && ! useecs ) 681 { 682 register int j; 683 684 for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j ) 685 { 686 if ( state[i] == 0 && state[j] != 0 ) 687 /* We're adding a transition. */ 688 ++totaltrans; 689 690 else if ( state[i] != 0 && state[j] == 0 ) 691 /* We're taking away a transition. */ 692 --totaltrans; 693 694 state[i] = state[j]; 695 } 696 } 697 698 numsnpairs += totaltrans; 699 700 if ( ds > num_start_states ) 701 check_for_backing_up( ds, state ); 702 703 if ( nultrans ) 704 { 705 nultrans[ds] = state[NUL_ec]; 706 state[NUL_ec] = 0; /* remove transition */ 707 } 708 709 if ( fulltbl ) 710 { 711 outn( " {" ); 712 713 /* Supply array's 0-element. */ 714 if ( ds == end_of_buffer_state ) 715 mk2data( -end_of_buffer_state ); 716 else 717 mk2data( end_of_buffer_state ); 718 719 for ( i = 1; i < num_full_table_rows; ++i ) 720 /* Jams are marked by negative of state 721 * number. 722 */ 723 mk2data( state[i] ? state[i] : -ds ); 724 725 dataflush(); 726 outn( " },\n" ); 727 } 728 729 else if ( fullspd ) 730 place_state( state, ds, totaltrans ); 731 732 else if ( ds == end_of_buffer_state ) 733 /* Special case this state to make sure it does what 734 * it's supposed to, i.e., jam on end-of-buffer. 735 */ 736 stack1( ds, 0, 0, JAMSTATE ); 737 738 else /* normal, compressed state */ 739 { 740 /* Determine which destination state is the most 741 * common, and how many transitions to it there are. 742 */ 743 744 comfreq = 0; 745 comstate = 0; 746 747 for ( i = 1; i <= targptr; ++i ) 748 if ( targfreq[i] > comfreq ) 749 { 750 comfreq = targfreq[i]; 751 comstate = targstate[i]; 752 } 753 754 bldtbl( state, ds, totaltrans, comstate, comfreq ); 755 } 756 } 757 758 if ( fulltbl ) 759 dataend(); 760 761 else if ( ! fullspd ) 762 { 763 cmptmps(); /* create compressed template entries */ 764 765 /* Create tables for all the states with only one 766 * out-transition. 767 */ 768 while ( onesp > 0 ) 769 { 770 mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp], 771 onedef[onesp] ); 772 --onesp; 773 } 774 775 mkdeftbl(); 776 } 777 778 flex_free( (void *) accset ); 779 flex_free( (void *) nset ); 780 } 781 782 783 /* snstods - converts a set of ndfa states into a dfa state 784 * 785 * synopsis 786 * is_new_state = snstods( int sns[numstates], int numstates, 787 * int accset[num_rules+1], int nacc, 788 * int hashval, int *newds_addr ); 789 * 790 * On return, the dfa state number is in newds. 791 */ 792 793 int snstods( sns, numstates, accset, nacc, hashval, newds_addr ) 794 int sns[], numstates, accset[], nacc, hashval, *newds_addr; 795 { 796 int didsort = 0; 797 register int i, j; 798 int newds, *oldsns; 799 800 for ( i = 1; i <= lastdfa; ++i ) 801 if ( hashval == dhash[i] ) 802 { 803 if ( numstates == dfasiz[i] ) 804 { 805 oldsns = dss[i]; 806 807 if ( ! didsort ) 808 { 809 /* We sort the states in sns so we 810 * can compare it to oldsns quickly. 811 * We use bubble because there probably 812 * aren't very many states. 813 */ 814 bubble( sns, numstates ); 815 didsort = 1; 816 } 817 818 for ( j = 1; j <= numstates; ++j ) 819 if ( sns[j] != oldsns[j] ) 820 break; 821 822 if ( j > numstates ) 823 { 824 ++dfaeql; 825 *newds_addr = i; 826 return 0; 827 } 828 829 ++hshcol; 830 } 831 832 else 833 ++hshsave; 834 } 835 836 /* Make a new dfa. */ 837 838 if ( ++lastdfa >= current_max_dfas ) 839 increase_max_dfas(); 840 841 newds = lastdfa; 842 843 dss[newds] = allocate_integer_array( numstates + 1 ); 844 845 /* If we haven't already sorted the states in sns, we do so now, 846 * so that future comparisons with it can be made quickly. 847 */ 848 849 if ( ! didsort ) 850 bubble( sns, numstates ); 851 852 for ( i = 1; i <= numstates; ++i ) 853 dss[newds][i] = sns[i]; 854 855 dfasiz[newds] = numstates; 856 dhash[newds] = hashval; 857 858 if ( nacc == 0 ) 859 { 860 if ( reject ) 861 dfaacc[newds].dfaacc_set = (int *) 0; 862 else 863 dfaacc[newds].dfaacc_state = 0; 864 865 accsiz[newds] = 0; 866 } 867 868 else if ( reject ) 869 { 870 /* We sort the accepting set in increasing order so the 871 * disambiguating rule that the first rule listed is considered 872 * match in the event of ties will work. We use a bubble 873 * sort since the list is probably quite small. 874 */ 875 876 bubble( accset, nacc ); 877 878 dfaacc[newds].dfaacc_set = allocate_integer_array( nacc + 1 ); 879 880 /* Save the accepting set for later */ 881 for ( i = 1; i <= nacc; ++i ) 882 { 883 dfaacc[newds].dfaacc_set[i] = accset[i]; 884 885 if ( accset[i] <= num_rules ) 886 /* Who knows, perhaps a REJECT can yield 887 * this rule. 888 */ 889 rule_useful[accset[i]] = true; 890 } 891 892 accsiz[newds] = nacc; 893 } 894 895 else 896 { 897 /* Find lowest numbered rule so the disambiguating rule 898 * will work. 899 */ 900 j = num_rules + 1; 901 902 for ( i = 1; i <= nacc; ++i ) 903 if ( accset[i] < j ) 904 j = accset[i]; 905 906 dfaacc[newds].dfaacc_state = j; 907 908 if ( j <= num_rules ) 909 rule_useful[j] = true; 910 } 911 912 *newds_addr = newds; 913 914 return 1; 915 } 916 917 918 /* symfollowset - follow the symbol transitions one step 919 * 920 * synopsis 921 * numstates = symfollowset( int ds[current_max_dfa_size], int dsize, 922 * int transsym, int nset[current_max_dfa_size] ); 923 */ 924 925 int symfollowset( ds, dsize, transsym, nset ) 926 int ds[], dsize, transsym, nset[]; 927 { 928 int ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist; 929 930 numstates = 0; 931 932 for ( i = 1; i <= dsize; ++i ) 933 { /* for each nfa state ns in the state set of ds */ 934 ns = ds[i]; 935 sym = transchar[ns]; 936 tsp = trans1[ns]; 937 938 if ( sym < 0 ) 939 { /* it's a character class */ 940 sym = -sym; 941 ccllist = cclmap[sym]; 942 lenccl = ccllen[sym]; 943 944 if ( cclng[sym] ) 945 { 946 for ( j = 0; j < lenccl; ++j ) 947 { 948 /* Loop through negated character 949 * class. 950 */ 951 ch = ccltbl[ccllist + j]; 952 953 if ( ch == 0 ) 954 ch = NUL_ec; 955 956 if ( ch > transsym ) 957 /* Transsym isn't in negated 958 * ccl. 959 */ 960 break; 961 962 else if ( ch == transsym ) 963 /* next 2 */ goto bottom; 964 } 965 966 /* Didn't find transsym in ccl. */ 967 nset[++numstates] = tsp; 968 } 969 970 else 971 for ( j = 0; j < lenccl; ++j ) 972 { 973 ch = ccltbl[ccllist + j]; 974 975 if ( ch == 0 ) 976 ch = NUL_ec; 977 978 if ( ch > transsym ) 979 break; 980 else if ( ch == transsym ) 981 { 982 nset[++numstates] = tsp; 983 break; 984 } 985 } 986 } 987 988 else if ( sym >= 'A' && sym <= 'Z' && caseins ) 989 flexfatal( 990 _( "consistency check failed in symfollowset" ) ); 991 992 else if ( sym == SYM_EPSILON ) 993 { /* do nothing */ 994 } 995 996 else if ( ABS( ecgroup[sym] ) == transsym ) 997 nset[++numstates] = tsp; 998 999 bottom: ; 1000 } 1001 1002 return numstates; 1003 } 1004 1005 1006 /* sympartition - partition characters with same out-transitions 1007 * 1008 * synopsis 1009 * sympartition( int ds[current_max_dfa_size], int numstates, 1010 * int symlist[numecs], int duplist[numecs] ); 1011 */ 1012 1013 void sympartition( ds, numstates, symlist, duplist ) 1014 int ds[], numstates; 1015 int symlist[], duplist[]; 1016 { 1017 int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich; 1018 1019 /* Partitioning is done by creating equivalence classes for those 1020 * characters which have out-transitions from the given state. Thus 1021 * we are really creating equivalence classes of equivalence classes. 1022 */ 1023 1024 for ( i = 1; i <= numecs; ++i ) 1025 { /* initialize equivalence class list */ 1026 duplist[i] = i - 1; 1027 dupfwd[i] = i + 1; 1028 } 1029 1030 duplist[1] = NIL; 1031 dupfwd[numecs] = NIL; 1032 1033 for ( i = 1; i <= numstates; ++i ) 1034 { 1035 ns = ds[i]; 1036 tch = transchar[ns]; 1037 1038 if ( tch != SYM_EPSILON ) 1039 { 1040 if ( tch < -lastccl || tch >= csize ) 1041 { 1042 flexfatal( 1043 _( "bad transition character detected in sympartition()" ) ); 1044 } 1045 1046 if ( tch >= 0 ) 1047 { /* character transition */ 1048 int ec = ecgroup[tch]; 1049 1050 mkechar( ec, dupfwd, duplist ); 1051 symlist[ec] = 1; 1052 } 1053 1054 else 1055 { /* character class */ 1056 tch = -tch; 1057 1058 lenccl = ccllen[tch]; 1059 cclp = cclmap[tch]; 1060 mkeccl( ccltbl + cclp, lenccl, dupfwd, 1061 duplist, numecs, NUL_ec ); 1062 1063 if ( cclng[tch] ) 1064 { 1065 j = 0; 1066 1067 for ( k = 0; k < lenccl; ++k ) 1068 { 1069 ich = ccltbl[cclp + k]; 1070 1071 if ( ich == 0 ) 1072 ich = NUL_ec; 1073 1074 for ( ++j; j < ich; ++j ) 1075 symlist[j] = 1; 1076 } 1077 1078 for ( ++j; j <= numecs; ++j ) 1079 symlist[j] = 1; 1080 } 1081 1082 else 1083 for ( k = 0; k < lenccl; ++k ) 1084 { 1085 ich = ccltbl[cclp + k]; 1086 1087 if ( ich == 0 ) 1088 ich = NUL_ec; 1089 1090 symlist[ich] = 1; 1091 } 1092 } 1093 } 1094 } 1095 } 1096