1 /* 2 * misc.c 3 * 4 * Manage tokens, regular expressions. 5 * Print methods for debugging 6 * Compute follow lists onto tail ends of rules. 7 * 8 * The following functions are visible: 9 * 10 * int addTname(char *); Add token name 11 * int addTexpr(char *); Add token expression 12 * int Tnum(char *); Get number of expr/token 13 * void Tklink(char *, char *); Link a name with an expression 14 * int hasAction(expr); Does expr already have action assigned? 15 * void setHasAction(expr); Indicate that expr now has an action 16 * Entry *newEntry(char *,int); Create new table entry with certain size 17 * void list_add(ListNode **list, char *e) 18 * void list_free(ListNode **list, int freeData); *** MR10 *** 19 * void list_apply(ListNode *list, void (*f)()) 20 * void lexclass(char *m); switch to new/old lexical class 21 * void lexmode(int i); switch to old lexical class i 22 * 23 * SOFTWARE RIGHTS 24 * 25 * We reserve no LEGAL rights to the Purdue Compiler Construction Tool 26 * Set (PCCTS) -- PCCTS is in the public domain. An individual or 27 * company may do whatever they wish with source code distributed with 28 * PCCTS or the code generated by PCCTS, including the incorporation of 29 * PCCTS, or its output, into commerical software. 30 * 31 * We encourage users to develop software with PCCTS. However, we do ask 32 * that credit is given to us for developing PCCTS. By "credit", 33 * we mean that if you incorporate our source code into one of your 34 * programs (commercial product, research project, or otherwise) that you 35 * acknowledge this fact somewhere in the documentation, research report, 36 * etc... If you like PCCTS and have developed a nice tool with the 37 * output, please mention that you developed it using PCCTS. In 38 * addition, we ask that this header remain intact in our source code. 39 * As long as these guidelines are kept, we expect to continue enhancing 40 * this system and expect to make other tools available as they are 41 * completed. 42 * 43 * ANTLR 1.33 44 * Terence Parr 45 * Parr Research Corporation 46 * with Purdue University and AHPCRC, University of Minnesota 47 * 1989-2001 48 */ 49 50 #include <stdio.h> 51 #include "pcctscfg.h" 52 #include "set.h" 53 #include "syn.h" 54 #include "hash.h" 55 #include "generic.h" 56 #include "dlgdef.h" 57 #include <ctype.h> 58 59 static int tsize=TSChunk; /* size of token str arrays */ 60 61 static void 62 #ifdef __USE_PROTOS 63 RemapForcedTokensInSyntaxDiagram(Node *); 64 #else 65 RemapForcedTokensInSyntaxDiagram(); 66 #endif 67 68 /* T o k e n M a n i p u l a t i o n */ 69 70 /* 71 * add token 't' to the TokenStr/Expr array. Make more room if necessary. 72 * 't' is either an expression or a token name. 73 * 74 * There is only one TokenStr array, but multiple ExprStr's. Therefore, 75 * for each lex class (element of lclass) we must extend the ExprStr array. 76 * ExprStr's and TokenStr are always all the same size. 77 * 78 * Also, there is a Texpr hash table for each automaton. 79 */ 80 static void 81 #ifdef __USE_PROTOS 82 Ttrack( char *t ) 83 #else 84 Ttrack( t ) 85 char *t; 86 #endif 87 { 88 if ( TokenNum >= tsize ) /* terminal table overflow? */ 89 { 90 char **p; 91 int i, more, j; 92 93 more = TSChunk * (1 + ((TokenNum-tsize) / TSChunk)); 94 tsize += more; 95 TokenStr = (char **) realloc((char *)TokenStr, tsize*sizeof(char *)); 96 require(TokenStr != NULL, "Ttrack: can't extend TokenStr"); 97 for (i=0; i<NumLexClasses; i++) 98 { 99 lclass[i].exprs = (char **) 100 realloc((char *)lclass[i].exprs, tsize*sizeof(char *)); 101 require(lclass[i].exprs != NULL, "Ttrack: can't extend ExprStr"); 102 for (p= &lclass[i].exprs[tsize-more],j=1; j<=more; j++) *p++ = NULL; 103 } 104 for (p= &TokenStr[tsize-more],i=1; i<=more; i++) *p++ = NULL; 105 lexmode( CurrentLexClass ); /* reset ExprStr in case table moved */ 106 } 107 /* note: we use the actual ExprStr/TokenStr array 108 * here as TokenInd doesn't exist yet 109 */ 110 if ( *t == '"' ) ExprStr[TokenNum] = t; 111 else TokenStr[TokenNum] = t; 112 } 113 114 static Expr * 115 #ifdef __USE_PROTOS 116 newExpr( char *e ) 117 #else 118 newExpr( e ) 119 char *e; 120 #endif 121 { 122 Expr *p = (Expr *) calloc(1, sizeof(Expr)); 123 require(p!=NULL, "newExpr: cannot alloc Expr node"); 124 125 p->expr = e; 126 p->lclass = CurrentLexClass; 127 return p; 128 } 129 130 /* switch to lexical class/mode m. This amounts to creating a new 131 * lex mode if one does not already exist and making ExprStr point 132 * to the correct char string array. We must also switch Texpr tables. 133 * 134 * BTW, we need multiple ExprStr arrays because more than one automaton 135 * may have the same label for a token, but with different expressions. 136 * We need to track an expr for each automaton. If we disallowed this 137 * feature, only one ExprStr would be required. 138 */ 139 void 140 #ifdef __USE_PROTOS 141 lexclass( char *m ) 142 #else 143 lexclass( m ) 144 char *m; 145 #endif 146 { 147 int i; 148 TermEntry *p; 149 static char EOFSTR[] = "\"@\""; 150 151 if ( hash_get(Tname, m) != NULL ) 152 { 153 warn(eMsg1("lexclass name conflicts with token/errclass label '%s'",m)); 154 } 155 /* does m already exist? */ 156 i = LexClassIndex(m); 157 if ( i != -1 ) {lexmode(i); return;} 158 /* must make new one */ 159 NumLexClasses++; 160 CurrentLexClass = NumLexClasses-1; 161 require(NumLexClasses<=MaxLexClasses, "number of allowable lexclasses exceeded\nIncrease MaxLexClasses in generic.h and recompile all C files"); 162 lclass[CurrentLexClass].classnum = m; 163 lclass[CurrentLexClass].exprs = (char **) calloc(tsize, sizeof(char *)); 164 require(lclass[CurrentLexClass].exprs!=NULL, 165 "lexclass: cannot allocate ExprStr"); 166 lclass[CurrentLexClass].htable = newHashTable(); 167 ExprStr = lclass[CurrentLexClass].exprs; 168 Texpr = lclass[CurrentLexClass].htable; 169 /* define EOF for each automaton */ 170 p = newTermEntry( EOFSTR ); 171 p->token = EofToken; /* couldn't have remapped tokens yet, use EofToken */ 172 hash_add(Texpr, EOFSTR, (Entry *)p); 173 list_add(&ExprOrder, (void *)newExpr(EOFSTR)); 174 /* note: we use the actual ExprStr array 175 * here as TokenInd doesn't exist yet 176 */ 177 ExprStr[EofToken] = EOFSTR; 178 } 179 180 void 181 #ifdef __USE_PROTOS 182 lexmode( int i ) 183 #else 184 lexmode( i ) 185 int i; 186 #endif 187 { 188 require(i<NumLexClasses, "lexmode: invalid mode"); 189 ExprStr = lclass[i].exprs; 190 Texpr = lclass[i].htable; 191 CurrentLexClass = i; 192 } 193 194 /* return index into lclass array of lexical class. return -1 if nonexistent */ 195 int 196 #ifdef __USE_PROTOS 197 LexClassIndex( char *cl ) 198 #else 199 LexClassIndex( cl ) 200 char *cl; 201 #endif 202 { 203 int i; 204 205 for (i=0; i<NumLexClasses; i++) 206 { 207 if ( strcmp(lclass[i].classnum, cl) == 0 ) return i; 208 } 209 return -1; 210 } 211 212 int 213 #ifdef __USE_PROTOS 214 hasAction( char *expr ) 215 #else 216 hasAction( expr ) 217 char *expr; 218 #endif 219 { 220 TermEntry *p; 221 require(expr!=NULL, "hasAction: invalid expr"); 222 223 p = (TermEntry *) hash_get(Texpr, expr); 224 require(p!=NULL, eMsg1("hasAction: expr '%s' doesn't exist",expr)); 225 return (p->action!=NULL); 226 } 227 228 void 229 #ifdef __USE_PROTOS 230 setHasAction( char *expr, char *action ) 231 #else 232 setHasAction( expr, action ) 233 char *expr; 234 char *action; 235 #endif 236 { 237 TermEntry *p; 238 require(expr!=NULL, "setHasAction: invalid expr"); 239 240 p = (TermEntry *) hash_get(Texpr, expr); 241 require(p!=NULL, eMsg1("setHasAction: expr '%s' doesn't exist",expr)); 242 p->action = action; 243 } 244 245 ForcedToken * 246 #ifdef __USE_PROTOS 247 newForcedToken(char *token, int tnum) 248 #else 249 newForcedToken(token, tnum) 250 char *token; 251 int tnum; 252 #endif 253 { 254 ForcedToken *ft = (ForcedToken *) calloc(1, sizeof(ForcedToken)); 255 require(ft!=NULL, "out of memory"); 256 ft->token = token; 257 ft->tnum = tnum; 258 return ft; 259 } 260 261 /* 262 * Make a token indirection array that remaps token numbers and then walk 263 * the appropriate symbol tables and SynDiag to change token numbers 264 */ 265 void 266 #ifdef __USE_PROTOS 267 RemapForcedTokens(void) 268 #else 269 RemapForcedTokens() 270 #endif 271 { 272 ListNode *p; 273 ForcedToken *q; 274 int max_token_number=0; /* MR9 23-Sep-97 Removed "unsigned" */ 275 int i; 276 277 if ( ForcedTokens == NULL ) return; 278 279 /* find max token num */ 280 for (p = ForcedTokens->next; p!=NULL; p=p->next) 281 { 282 q = (ForcedToken *) p->elem; 283 if ( q->tnum > max_token_number ) max_token_number = q->tnum; 284 } 285 fprintf(stderr, "max token number is %d\n", max_token_number); 286 287 /* make token indirection array */ 288 TokenInd = (int *) calloc(max_token_number+1, sizeof(int)); 289 LastTokenCounted = TokenNum; 290 TokenNum = max_token_number+1; 291 require(TokenInd!=NULL, "RemapForcedTokens: cannot allocate TokenInd"); 292 293 /* fill token indirection array and change token id htable ; swap token indices */ 294 for (i=1; i<TokenNum; i++) TokenInd[i] = i; 295 for (p = ForcedTokens->next; p!=NULL; p=p->next) 296 { 297 TermEntry *te; 298 int old_pos, t; 299 300 q = (ForcedToken *) p->elem; 301 fprintf(stderr, "%s forced to %d\n", q->token, q->tnum); 302 te = (TermEntry *) hash_get(Tname, q->token); 303 require(te!=NULL, "RemapForcedTokens: token not in hash table"); 304 old_pos = te->token; 305 fprintf(stderr, "Before: TokenInd[old_pos==%d] is %d\n", old_pos, TokenInd[old_pos]); 306 fprintf(stderr, "Before: TokenInd[target==%d] is %d\n", q->tnum, TokenInd[q->tnum]); 307 q = (ForcedToken *) p->elem; 308 t = TokenInd[old_pos]; 309 TokenInd[old_pos] = q->tnum; 310 TokenInd[q->tnum] = t; 311 te->token = q->tnum; /* update token type id symbol table */ 312 fprintf(stderr, "After: TokenInd[old_pos==%d] is %d\n", old_pos, TokenInd[old_pos]); 313 fprintf(stderr, "After: TokenInd[target==%d] is %d\n", q->tnum, TokenInd[q->tnum]); 314 315 /* Change the token number in the sym tab entry for the exprs 316 * at the old position of the token id and the target position 317 */ 318 /* update expr at target (if any) of forced token id */ 319 if ( q->tnum < TokenNum ) /* is it a valid position? */ 320 { 321 for (i=0; i<NumLexClasses; i++) 322 { 323 if ( lclass[i].exprs[q->tnum]!=NULL ) 324 { 325 /* update the symbol table for this expr */ 326 TermEntry *e = (TermEntry *) hash_get(lclass[i].htable, lclass[i].exprs[q->tnum]); 327 require(e!=NULL, "RemapForcedTokens: expr not in hash table"); 328 e->token = old_pos; 329 fprintf(stderr, "found expr '%s' at target %d in lclass[%d]; changed to %d\n", 330 lclass[i].exprs[q->tnum], q->tnum, i, old_pos); 331 } 332 } 333 } 334 /* update expr at old position (if any) of forced token id */ 335 for (i=0; i<NumLexClasses; i++) 336 { 337 if ( lclass[i].exprs[old_pos]!=NULL ) 338 { 339 /* update the symbol table for this expr */ 340 TermEntry *e = (TermEntry *) hash_get(lclass[i].htable, lclass[i].exprs[old_pos]); 341 require(e!=NULL, "RemapForcedTokens: expr not in hash table"); 342 e->token = q->tnum; 343 fprintf(stderr, "found expr '%s' for id %s in lclass[%d]; changed to %d\n", 344 lclass[i].exprs[old_pos], q->token, i, q->tnum); 345 } 346 } 347 } 348 349 /* Update SynDiag */ 350 RemapForcedTokensInSyntaxDiagram((Node *)SynDiag); 351 } 352 353 static void 354 #ifdef __USE_PROTOS 355 RemapForcedTokensInSyntaxDiagram(Node *p) 356 #else 357 RemapForcedTokensInSyntaxDiagram(p) 358 Node *p; 359 #endif 360 { 361 Junction *j = (Junction *) p; 362 RuleRefNode *r = (RuleRefNode *) p; 363 TokNode *t = (TokNode *)p; 364 365 if ( p==NULL ) return; 366 require(p->ntype>=1 && p->ntype<=NumNodeTypes, "Remap...: invalid diagram node"); 367 switch ( p->ntype ) 368 { 369 case nJunction : 370 if ( j->visited ) return; 371 if ( j->jtype == EndRule ) return; 372 j->visited = TRUE; 373 RemapForcedTokensInSyntaxDiagram( j->p1 ); 374 RemapForcedTokensInSyntaxDiagram( j->p2 ); 375 j->visited = FALSE; 376 return; 377 case nRuleRef : 378 RemapForcedTokensInSyntaxDiagram( r->next ); 379 return; 380 case nToken : 381 if ( t->remapped ) return; /* we've been here before */ 382 t->remapped = 1; 383 fprintf(stderr, "remapping %d to %d\n", t->token, TokenInd[t->token]); 384 t->token = TokenInd[t->token]; 385 RemapForcedTokensInSyntaxDiagram( t->next ); 386 return; 387 case nAction : 388 RemapForcedTokensInSyntaxDiagram( ((ActionNode *)p)->next ); 389 return; 390 default : 391 fatal_internal("invalid node type"); 392 } 393 } 394 395 /* 396 * Add a token name. Return the token number associated with it. If it already 397 * exists, then return the token number assigned to it. 398 * 399 * Track the order in which tokens are found so that the DLG output maintains 400 * that order. It also lets us map token numbers to strings. 401 */ 402 int 403 #ifdef __USE_PROTOS 404 addTname( char *token ) 405 #else 406 addTname( token ) 407 char *token; 408 #endif 409 { 410 TermEntry *p; 411 require(token!=NULL, "addTname: invalid token name"); 412 413 if ( (p=(TermEntry *)hash_get(Tname, token)) != NULL ) return p->token; 414 p = newTermEntry( token ); 415 Ttrack( p->str ); 416 p->token = TokenNum++; 417 hash_add(Tname, token, (Entry *)p); 418 return p->token; 419 } 420 421 /* This is the same as addTname except we force the TokenNum to be tnum. 422 * We don't have to use the Forced token stuff as no tokens will have 423 * been defined with #tokens when this is called. This is only called 424 * when a #tokdefs meta-op is used. 425 */ 426 int 427 #ifdef __USE_PROTOS 428 addForcedTname( char *token, int tnum ) 429 #else 430 addForcedTname( token, tnum ) 431 char *token; 432 int tnum; 433 #endif 434 { 435 TermEntry *p; 436 require(token!=NULL, "addTname: invalid token name"); 437 438 if ( (p=(TermEntry *)hash_get(Tname, token)) != NULL ) return p->token; 439 p = newTermEntry( token ); 440 Ttrack( p->str ); 441 p->token = tnum; 442 hash_add(Tname, token, (Entry *)p); 443 return p->token; 444 } 445 446 /* 447 * Add a token expr. Return the token number associated with it. If it already 448 * exists, then return the token number assigned to it. 449 */ 450 int 451 #ifdef __USE_PROTOS 452 addTexpr( char *expr ) 453 #else 454 addTexpr( expr ) 455 char *expr; 456 #endif 457 { 458 TermEntry *p; 459 require(expr!=NULL, "addTexpr: invalid regular expression"); 460 461 if ( (p=(TermEntry *)hash_get(Texpr, expr)) != NULL ) return p->token; 462 p = newTermEntry( expr ); 463 Ttrack( p->str ); 464 /* track the order in which they occur */ 465 list_add(&ExprOrder, (void *)newExpr(p->str)); 466 p->token = TokenNum++; 467 hash_add(Texpr, expr, (Entry *)p); 468 return p->token; 469 } 470 471 /* return the token number of 'term'. Return 0 if no 'term' exists */ 472 int 473 #ifdef __USE_PROTOS 474 Tnum( char *term ) 475 #else 476 Tnum( term ) 477 char *term; 478 #endif 479 { 480 TermEntry *p; 481 require(term!=NULL, "Tnum: invalid terminal"); 482 483 if ( *term=='"' ) p = (TermEntry *) hash_get(Texpr, term); 484 else p = (TermEntry *) hash_get(Tname, term); 485 if ( p == NULL ) return 0; 486 else return p->token; 487 } 488 489 /* associate a Name with an expr. If both have been already assigned 490 * token numbers, then an error is reported. Add the token or expr 491 * that has not been added if no error. This 'represents' the #token 492 * ANTLR pseudo-op. If both have not been defined, define them both 493 * linked to same token number. 494 */ 495 void 496 #ifdef __USE_PROTOS 497 Tklink( char *token, char *expr ) 498 #else 499 Tklink( token, expr ) 500 char *token; 501 char *expr; 502 #endif 503 { 504 TermEntry *p, *q; 505 require(token!=NULL && expr!=NULL, "Tklink: invalid token name and/or expr"); 506 507 p = (TermEntry *) hash_get(Tname, token); 508 q = (TermEntry *) hash_get(Texpr, expr); 509 if ( p != NULL && q != NULL ) /* both defined */ 510 { 511 warn( eMsg2("token name %s and rexpr %s already defined; ignored", 512 token, expr) ); 513 return; 514 } 515 if ( p==NULL && q==NULL ) /* both not defined */ 516 { 517 int t = addTname( token ); 518 q = newTermEntry( expr ); 519 hash_add(Texpr, expr, (Entry *)q); 520 q->token = t; 521 /* note: we use the actual ExprStr array 522 * here as TokenInd doesn't exist yet 523 */ 524 ExprStr[t] = q->str; 525 /* track the order in which they occur */ 526 list_add(&ExprOrder, (void *)newExpr(q->str)); 527 return; 528 } 529 if ( p != NULL ) /* one is defined, one is not */ 530 { 531 q = newTermEntry( expr ); 532 hash_add(Texpr, expr, (Entry *)q); 533 q->token = p->token; 534 ExprStr[p->token] = q->str; /* both expr and token str defined now */ 535 list_add(&ExprOrder, (void *)newExpr(q->str)); 536 } 537 else /* trying to associate name with expr here*/ 538 { 539 p = newTermEntry( token ); 540 hash_add(Tname, token, (Entry *)p); 541 p->token = q->token; 542 TokenStr[p->token] = p->str;/* both expr and token str defined now */ 543 } 544 } 545 546 /* 547 * Given a string, this function allocates and returns a pointer to a 548 * hash table record of size 'sz' whose "str" pointer is reset to a position 549 * in the string table. 550 */ 551 Entry * 552 #ifdef __USE_PROTOS 553 newEntry( char *text, int sz ) 554 #else 555 newEntry( text, sz ) 556 char *text; 557 int sz; 558 #endif 559 { 560 Entry *p; 561 require(text!=NULL, "new: NULL terminal"); 562 563 if ( (p = (Entry *) calloc(1,sz)) == 0 ) 564 { 565 fatal_internal("newEntry: out of memory for terminals\n"); 566 exit(PCCTS_EXIT_FAILURE); 567 } 568 p->str = mystrdup(text); 569 570 return(p); 571 } 572 573 /* 574 * add an element to a list. 575 * 576 * Any non-empty list has a sentinel node whose 'elem' pointer is really 577 * a pointer to the last element. (i.e. length(list) = #elemIn(list)+1). 578 * Elements are appended to the list. 579 */ 580 void 581 #ifdef __USE_PROTOS 582 list_add( ListNode **list, void *e ) 583 #else 584 list_add( list, e ) 585 ListNode **list; 586 void *e; 587 #endif 588 { 589 ListNode *p, *tail; 590 require(e!=NULL, "list_add: attempting to add NULL list element"); 591 592 p = newListNode; 593 require(p!=NULL, "list_add: cannot alloc new list node"); 594 p->elem = e; 595 if ( *list == NULL ) 596 { 597 ListNode *sentinel = newListNode; 598 require(sentinel!=NULL, "list_add: cannot alloc sentinel node"); 599 *list=sentinel; 600 sentinel->next = p; 601 sentinel->elem = (char *)p; /* set tail pointer */ 602 } 603 else /* find end of list */ 604 { 605 tail = (ListNode *) (*list)->elem; /* get tail pointer */ 606 tail->next = p; 607 (*list)->elem = (char *) p; /* reset tail */ 608 } 609 } 610 611 /* MR10 list_free() frees the ListNode elements in the list */ 612 /* MR10 if freeData then free the data elements of the list too */ 613 614 void 615 #ifdef __USE_PROTOS 616 list_free(ListNode **list,int freeData) 617 #else 618 list_free(list,freeData) 619 ListNode **list; 620 int freeData; 621 #endif 622 { 623 ListNode *p; 624 ListNode *next; 625 626 if (list == NULL) return; 627 if (*list == NULL) return; 628 for (p=*list; p != NULL; p=next) { 629 next=p->next; 630 if (freeData && p->elem != NULL) { 631 free( (char *) p->elem); 632 }; 633 free( (char *) p); 634 }; 635 *list=NULL; 636 } 637 638 void 639 #ifdef __USE_PROTOS 640 list_apply( ListNode *list, void (*f)(void *) ) 641 #else 642 list_apply( list, f ) 643 ListNode *list; 644 void (*f)(); 645 #endif 646 { 647 ListNode *p; 648 require(f!=NULL, "list_apply: NULL function to apply"); 649 650 if ( list == NULL ) return; 651 for (p = list->next; p!=NULL; p=p->next) (*f)( p->elem ); 652 } 653 654 /* MR27 */ 655 656 #ifdef __USE_PROTOS 657 int list_search_cstring(ListNode *list, char * cstring) 658 #else 659 int list_search_cstring(list, cstring) 660 ListNode * list; 661 char * cstring; 662 #endif 663 { 664 ListNode *p; 665 if (list == NULL ) return 0; 666 for (p = list->next; p!=NULL; p=p->next) { 667 if (p->elem == NULL) continue; 668 if (0 == strcmp((char *) p->elem , cstring)) return 1; 669 } 670 return 0; 671 } 672 673 /* F O L L O W C y c l e S t u f f */ 674 675 /* make a key based upon (rulename, computation, k value). 676 * Computation values are 'i'==FIRST, 'o'==FOLLOW. 677 */ 678 679 /* MR10 Make the key all characters so it can be read easily */ 680 /* MR10 by a simple dump program. Also, separates */ 681 /* MR10 'o' and 'i' from rule name */ 682 683 char * 684 #ifdef __USE_PROTOS 685 Fkey( char *rule, int computation, int k ) 686 #else 687 Fkey( rule, computation, k ) 688 char *rule; 689 int computation; 690 int k; 691 #endif 692 { 693 static char key[MaxRuleName+2+2+1]; /* MR10 */ 694 int i; 695 696 if ( k > 99 ) /* MR10 */ 697 fatal("k>99 is too big for this implementation of ANTLR!\n"); /* MR10 */ 698 if ( (i=strlen(rule)) > MaxRuleName ) /* MR10 */ 699 fatal( eMsgd("rule name > max of %d\n", MaxRuleName) ); /* MR10 */ 700 strcpy(key,rule); 701 702 /* MR10 */ key[i]='*'; 703 /* MR10 */ key[i+1] = (char) computation; /* MR20 G. Hobbelt */ 704 /* MR10 */ if (k < 10) { 705 /* MR10 */ key[i+2] = (char) ( '0' + k); 706 /* MR10 */ key[i+3] = '\0'; 707 /* MR10 */ } else { 708 /* MR10 */ key[i+2] = (char) ( '0' + k/10); 709 /* MR10 */ key[i+3] = (char) ( '0' + k % 10); 710 /* MR10 */ key[i+4] = '\0'; 711 /* MR10 */ }; 712 713 return key; 714 } 715 716 /* Push a rule onto the kth FOLLOW stack */ 717 void 718 #ifdef __USE_PROTOS 719 FoPush( char *rule, int k ) 720 #else 721 FoPush( rule, k ) 722 char *rule; 723 int k; 724 #endif 725 { 726 RuleEntry *r; 727 require(rule!=NULL, "FoPush: tried to push NULL rule"); 728 require(k<=CLL_k, "FoPush: tried to access non-existent stack"); 729 730 /*fprintf(stderr, "FoPush(%s)\n", rule);*/ 731 r = (RuleEntry *) hash_get(Rname, rule); 732 if ( r == NULL ) {fatal_internal( eMsg1("rule %s must be defined but isn't", rule) );} 733 if ( FoStack[k] == NULL ) /* Does the kth stack exist yet? */ 734 { 735 /*fprintf(stderr, "allocating FoStack\n");*/ 736 FoStack[k] = (int *) calloc(FoStackSize, sizeof(int)); 737 require(FoStack[k]!=NULL, "FoPush: cannot allocate FOLLOW stack\n"); 738 } 739 if ( FoTOS[k] == NULL ) 740 { 741 FoTOS[k]=FoStack[k]; 742 *(FoTOS[k]) = r->rulenum; 743 } 744 else 745 { 746 #ifdef MEMCHK 747 require(valid(FoStack[k]), "FoPush: invalid FoStack"); 748 #endif 749 if ( FoTOS[k] >= &(FoStack[k][FoStackSize-1]) ) 750 fatal( eMsgd("exceeded max depth of FOLLOW recursion (%d)\n", 751 FoStackSize) ); 752 require(FoTOS[k]>=FoStack[k], 753 eMsg1("FoPush: FoStack stack-ptr is playing out of its sandbox", 754 rule)); 755 ++(FoTOS[k]); 756 *(FoTOS[k]) = r->rulenum; 757 } 758 { 759 /* 760 **** int *p; 761 **** fprintf(stderr, "FoStack[k=%d]:\n", k); 762 **** for (p=FoStack[k]; p<=FoTOS[k]; p++) 763 **** { 764 **** fprintf(stderr, "\t%s\n", RulePtr[*p]->rname); 765 **** } 766 */ 767 } 768 } 769 770 /* Pop one rule off of the FOLLOW stack. TOS ptr is NULL if empty. */ 771 void 772 #ifdef __USE_PROTOS 773 FoPop( int k ) 774 #else 775 FoPop( k ) 776 int k; 777 #endif 778 { 779 require(k<=CLL_k, "FoPop: tried to access non-existent stack"); 780 /*fprintf(stderr, "FoPop\n");*/ 781 require(FoTOS[k]>=FoStack[k]&&FoTOS[k]<=&(FoStack[k][FoStackSize-1]), 782 "FoPop: FoStack stack-ptr is playing out of its sandbox"); 783 if ( FoTOS[k] == FoStack[k] ) FoTOS[k] = NULL; 784 else (FoTOS[k])--; 785 } 786 787 /* Compute FOLLOW cycle. 788 * Mark all FOLLOW sets for rules in cycle as incomplete. 789 * Then, save cycle on the cycle list (Cycles) for later resolution. 790 * The Cycle is stored in the form: 791 * (head of cycle==croot, rest of rules in cycle==cyclicDep) 792 * 793 * e.g. (Fo means "FOLLOW of", "-->" means requires or depends on) 794 * 795 * Fo(x)-->Fo(a)-->Fo(b)-->Fo(c)-->Fo(x) 796 * ^----Infinite recursion (cycle) 797 * 798 * the cycle would be: x -> {a,b,c} or stored as (x,{a,b,c}). Fo(x) depends 799 * on the FOLLOW of a,b, and c. The root of a cycle is always complete after 800 * Fo(x) finishes. Fo(a,b,c) however are not. It turns out that all rules 801 * in a FOLLOW cycle have the same FOLLOW set. 802 */ 803 void 804 #ifdef __USE_PROTOS 805 RegisterCycle( char *rule, int k ) 806 #else 807 RegisterCycle( rule, k ) 808 char *rule; 809 int k; 810 #endif 811 { 812 CacheEntry *f; 813 Cycle *c; 814 int *p; 815 RuleEntry *r; 816 require(rule!=NULL, "RegisterCycle: tried to register NULL rule"); 817 require(k<=CLL_k, "RegisterCycle: tried to access non-existent stack"); 818 819 /*fprintf(stderr, "RegisterCycle(%s)\n", rule);*/ 820 /* Find cycle start */ 821 r = (RuleEntry *) hash_get(Rname, rule); 822 require(r!=NULL,eMsg1("rule %s must be defined but isn't", rule)); 823 require(FoTOS[k]>=FoStack[k]&&FoTOS[k]<=&(FoStack[k][FoStackSize-1]), 824 eMsg1("RegisterCycle(%s): FoStack stack-ptr is playing out of its sandbox", 825 rule)); 826 /*** if ( FoTOS[k]<FoStack[k]||FoTOS[k]>&(FoStack[k][FoStackSize-1]) ) 827 **** { 828 **** fprintf(stderr, "RegisterCycle(%s): FoStack stack-ptr is playing out of its sandbox\n", 829 **** rule); 830 **** fprintf(stderr, "RegisterCycle: sp==0x%x out of bounds 0x%x...0x%x\n", 831 **** FoTOS[k], FoStack[k], &(FoStack[k][FoStackSize-1])); 832 **** exit(PCCTS_EXIT_FAILURE); 833 **** } 834 ****/ 835 836 #ifdef MEMCHK 837 require(valid(FoStack[k]), "RegisterCycle: invalid FoStack"); 838 #endif 839 for (p=FoTOS[k]; *p != r->rulenum && p >= FoStack[k]; --p) {;} 840 require(p>=FoStack[k], "RegisterCycle: FoStack is screwed up beyond belief"); 841 if ( p == FoTOS[k] ) return; /* don't worry about cycles to oneself */ 842 843 /* compute cyclic dependents (rules in cycle except head) */ 844 c = newCycle; 845 require(c!=NULL, "RegisterCycle: couldn't alloc new cycle"); 846 c->cyclicDep = empty; 847 c->croot = *p++; /* record root of cycle */ 848 for (; p<=FoTOS[k]; p++) 849 { 850 /* Mark all dependent rules as incomplete */ 851 f = (CacheEntry *) hash_get(Fcache, Fkey(RulePtr[*p]->rname,'o',k)); 852 if ( f==NULL ) 853 { 854 f = newCacheEntry( Fkey(RulePtr[*p]->rname,'o',k) ); 855 hash_add(Fcache, Fkey(RulePtr[*p]->rname,'o',k), (Entry *)f); 856 } 857 f->incomplete = TRUE; 858 859 set_orel(*p, &(c->cyclicDep)); /* mark rule as dependent of croot */ 860 } 861 list_add(&(Cycles[k]), (void *)c); 862 } 863 864 /* make all rules in cycle complete 865 * 866 * while ( some set has changed ) do 867 * for each cycle do 868 * if degree of FOLLOW set for croot > old degree then 869 * update all FOLLOW sets for rules in cyclic dependency 870 * change = TRUE 871 * endif 872 * endfor 873 * endwhile 874 */ 875 void 876 #ifdef __USE_PROTOS 877 ResolveFoCycles( int k ) 878 #else 879 ResolveFoCycles( k ) 880 int k; 881 #endif 882 { 883 ListNode *p, *q; 884 Cycle *c; 885 int changed = 1; 886 CacheEntry *f,*g; 887 int r; 888 /* int i; */ /* MR10 not useful */ 889 unsigned d; 890 891 unsigned *cursor; /* MR10 */ 892 unsigned *origin; /* MR10 */ 893 894 /*fprintf(stderr, "Resolving following cycles for %d\n", k);*/ 895 while ( changed ) 896 { 897 changed = 0; 898 /* MR10 i = 0; */ 899 for (p = Cycles[k]->next; p!=NULL; p=p->next) 900 { 901 c = (Cycle *) p->elem; 902 /*fprintf(stderr, "cycle %d: %s -->", i++, RulePtr[c->croot]->rname);*/ 903 /*s_fprT(stderr, c->cyclicDep);*/ 904 /*fprintf(stderr, "\n");*/ 905 f = (CacheEntry *) 906 hash_get(Fcache, Fkey(RulePtr[c->croot]->rname,'o',k)); 907 require(f!=NULL, eMsg1("FOLLOW(%s) must be in cache but isn't", RulePtr[c->croot]->rname) ); 908 if ( (d=set_deg(f->fset)) > c->deg ) 909 { 910 /*fprintf(stderr, "Fo(%s) has changed\n", RulePtr[c->croot]->rname);*/ 911 changed = 1; 912 c->deg = d; /* update cycle FOLLOW set degree */ 913 914 /* MR10 */ origin=set_pdq(c->cyclicDep); 915 /* MR10 */ for (cursor=origin; *cursor != nil; cursor++) { 916 /* MR10 */ r=*cursor; 917 918 /******** while ( !set_nil(c->cyclicDep) ) { *****/ 919 /******** r = set_int(c->cyclicDep); *****/ 920 /******** set_rm(r, c->cyclicDep); *****/ 921 922 /*fprintf(stderr, "updating Fo(%s)\n", RulePtr[r]->rname);*/ 923 g = (CacheEntry *) 924 hash_get(Fcache, Fkey(RulePtr[r]->rname,'o',k)); 925 require(g!=NULL, eMsg1("FOLLOW(%s) must be in cache but isn't", RulePtr[r]->rname) ); 926 set_orin(&(g->fset), f->fset); 927 g->incomplete = FALSE; 928 } 929 /* MR10 */ free( (char *) origin); 930 /* MR10 */ origin=NULL; 931 } 932 } 933 /* MR10 - this if statement appears to be meaningless since i is always 0 */ 934 /* MR10 if ( i == 1 ) changed = 0; */ /* if only 1 cycle, no need to repeat */ 935 } 936 /* kill Cycle list */ 937 for (q = Cycles[k]->next; q != NULL; q=p) 938 { 939 p = q->next; 940 set_free( ((Cycle *)q->elem)->cyclicDep ); 941 free((char *)q); 942 } 943 free( (char *)Cycles[k] ); 944 Cycles[k] = NULL; 945 } 946 947 948 /* P r i n t i n g S y n t a x D i a g r a m s */ 949 950 static void 951 #ifdef __USE_PROTOS 952 pBlk( Junction *q, int btype ) 953 #else 954 pBlk( q, btype ) 955 Junction *q; 956 int btype; 957 #endif 958 { 959 int k,a; 960 Junction *alt, *p; 961 962 q->end->pvisited = TRUE; 963 if ( btype == aLoopBegin ) 964 { 965 require(q->p2!=NULL, "pBlk: invalid ()* block"); 966 PRINT(q->p1); 967 alt = (Junction *)q->p2; 968 PRINT(alt->p1); 969 if ( PrintAnnotate ) 970 { 971 printf(" /* Opt "); 972 k = 1; 973 while ( !set_nil(alt->fset[k]) ) 974 { 975 s_fprT(stdout, alt->fset[k]); 976 if ( k++ == CLL_k ) break; 977 if ( !set_nil(alt->fset[k]) ) printf(", "); 978 } 979 printf(" */\n"); 980 } 981 return; 982 } 983 for (a=1,alt=q; alt != NULL; alt= (Junction *) alt->p2, a++) 984 { 985 if ( alt->p1 != NULL ) PRINT(alt->p1); 986 if ( PrintAnnotate ) 987 { 988 printf( " /* [%d] ", alt->altnum); 989 k = 1; 990 while ( !set_nil(alt->fset[k]) ) 991 { 992 s_fprT(stdout, alt->fset[k]); 993 if ( k++ == CLL_k ) break; 994 if ( !set_nil(alt->fset[k]) ) printf(", "); 995 } 996 if ( alt->p2 == NULL && btype == aOptBlk ) 997 printf( " (optional branch) */\n"); 998 else printf( " */\n"); 999 } 1000 1001 /* ignore implied empty alt of Plus blocks */ 1002 if ( alt->p2 != NULL && ((Junction *)alt->p2)->ignore ) break; 1003 1004 if ( alt->p2 != NULL && !(((Junction *)alt->p2)->p2==NULL && btype == aOptBlk) ) 1005 { 1006 if ( pLevel == 1 ) 1007 { 1008 printf("\n"); 1009 if ( a+1==pAlt1 || a+1==pAlt2 ) printf("=>"); 1010 printf("\t"); 1011 } 1012 else printf(" "); 1013 printf("|"); 1014 if ( pLevel == 1 ) 1015 { 1016 p = (Junction *) ((Junction *)alt->p2)->p1; 1017 while ( p!=NULL ) 1018 { 1019 if ( p->ntype==nAction ) 1020 { 1021 p=(Junction *)((ActionNode *)p)->next; 1022 continue; 1023 } 1024 if ( p->ntype!=nJunction ) 1025 { 1026 break; 1027 } 1028 if ( p->jtype==EndBlk || p->jtype==EndRule ) 1029 { 1030 p = NULL; 1031 break; 1032 } 1033 p = (Junction *)p->p1; 1034 } 1035 if ( p==NULL ) printf("\n\t"); /* Empty alt? */ 1036 } 1037 } 1038 } 1039 q->end->pvisited = FALSE; 1040 } 1041 1042 /* How to print out a junction */ 1043 void 1044 #ifdef __USE_PROTOS 1045 pJunc( Junction *q ) 1046 #else 1047 pJunc( q ) 1048 Junction *q; 1049 #endif 1050 { 1051 int dum_k; 1052 int doing_rule; 1053 require(q!=NULL, "pJunc: NULL node"); 1054 require(q->ntype==nJunction, "pJunc: not junction"); 1055 1056 if ( q->pvisited == TRUE ) return; 1057 q->pvisited = TRUE; 1058 switch ( q->jtype ) 1059 { 1060 case aSubBlk : 1061 if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k); 1062 if ( q->end->p1 != NULL && ((Junction *)q->end->p1)->ntype==nJunction && 1063 ((Junction *)q->end->p1)->jtype == EndRule ) doing_rule = 1; 1064 else doing_rule = 0; 1065 pLevel++; 1066 if ( pLevel==1 ) 1067 { 1068 if ( pAlt1==1 ) printf("=>"); 1069 printf("\t"); 1070 } 1071 else printf(" "); 1072 if ( doing_rule ) 1073 { 1074 if ( pLevel==1 ) printf(" "); 1075 pBlk(q,q->jtype); 1076 } 1077 else { 1078 printf("("); 1079 if ( pLevel==1 ) printf(" "); 1080 pBlk(q,q->jtype); 1081 if ( pLevel>1 ) printf(" "); 1082 printf(")"); 1083 } 1084 if ( q->guess ) printf("?"); 1085 pLevel--; 1086 if ( PrintAnnotate ) freeBlkFsets(q); 1087 if ( q->end->p1 != NULL ) PRINT(q->end->p1); 1088 break; 1089 case aOptBlk : 1090 if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k); 1091 pLevel++; 1092 if ( pLevel==1 ) 1093 { 1094 if ( pAlt1==1 ) printf("=>"); 1095 printf("\t"); 1096 } 1097 else printf(" "); 1098 printf("{"); 1099 if ( pLevel==1 ) printf(" "); 1100 pBlk(q,q->jtype); 1101 if ( pLevel>1 ) printf(" "); 1102 else printf("\n\t"); 1103 printf("}"); 1104 pLevel--; 1105 if ( PrintAnnotate ) freeBlkFsets(q); 1106 if ( q->end->p1 != NULL ) PRINT(q->end->p1); 1107 break; 1108 case aLoopBegin : 1109 if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k); 1110 pLevel++; 1111 if ( pLevel==1 ) 1112 { 1113 if ( pAlt1==1 ) printf("=>"); 1114 printf("\t"); 1115 } 1116 else printf(" "); 1117 printf("("); 1118 if ( pLevel==1 ) printf(" "); 1119 pBlk(q,q->jtype); 1120 if ( pLevel>1 ) printf(" "); 1121 else printf("\n\t"); 1122 printf(")*"); 1123 pLevel--; 1124 if ( PrintAnnotate ) freeBlkFsets(q); 1125 if ( q->end->p1 != NULL ) PRINT(q->end->p1); 1126 break; 1127 case aLoopBlk : 1128 if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k); 1129 pBlk(q,q->jtype); 1130 if ( PrintAnnotate ) freeBlkFsets(q); 1131 break; 1132 case aPlusBlk : 1133 if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k); 1134 pLevel++; 1135 if ( pLevel==1 ) 1136 { 1137 if ( pAlt1==1 ) printf("=>"); 1138 printf("\t"); 1139 } 1140 else printf(" "); 1141 printf("("); 1142 if ( pLevel==1 ) printf(" "); 1143 pBlk(q,q->jtype); 1144 if ( pLevel>1 ) printf(" "); 1145 printf(")+"); 1146 pLevel--; 1147 if ( PrintAnnotate ) freeBlkFsets(q); 1148 if ( q->end->p1 != NULL ) PRINT(q->end->p1); 1149 break; 1150 case EndBlk : 1151 break; 1152 case RuleBlk : 1153 printf( "\n%s :\n", q->rname); 1154 PRINT(q->p1); 1155 if ( q->p2 != NULL ) PRINT(q->p2); 1156 break; 1157 case Generic : 1158 if ( q->p1 != NULL ) PRINT(q->p1); 1159 q->pvisited = FALSE; 1160 if ( q->p2 != NULL ) PRINT(q->p2); 1161 break; 1162 case EndRule : 1163 printf( "\n\t;\n"); 1164 break; 1165 } 1166 q->pvisited = FALSE; 1167 } 1168 1169 /* How to print out a rule reference node */ 1170 void 1171 #ifdef __USE_PROTOS 1172 pRuleRef( RuleRefNode *p ) 1173 #else 1174 pRuleRef( p ) 1175 RuleRefNode *p; 1176 #endif 1177 { 1178 require(p!=NULL, "pRuleRef: NULL node"); 1179 require(p->ntype==nRuleRef, "pRuleRef: not rule ref node"); 1180 1181 printf( " %s", p->text); 1182 PRINT(p->next); 1183 } 1184 1185 /* How to print out a terminal node */ 1186 void 1187 #ifdef __USE_PROTOS 1188 pToken( TokNode *p ) 1189 #else 1190 pToken( p ) 1191 TokNode *p; 1192 #endif 1193 { 1194 require(p!=NULL, "pToken: NULL node"); 1195 require(p->ntype==nToken, "pToken: not token node"); 1196 1197 if ( p->wild_card ) printf(" ."); 1198 printf( " %s", TerminalString(p->token)); 1199 PRINT(p->next); 1200 } 1201 1202 /* How to print out a terminal node */ 1203 void 1204 #ifdef __USE_PROTOS 1205 pAction( ActionNode *p ) 1206 #else 1207 pAction( p ) 1208 ActionNode *p; 1209 #endif 1210 { 1211 require(p!=NULL, "pAction: NULL node"); 1212 require(p->ntype==nAction, "pAction: not action node"); 1213 1214 PRINT(p->next); 1215 } 1216 1217 /* F i l l F o l l o w L i s t s */ 1218 1219 /* 1220 * Search all rules for all rule reference nodes, q to rule, r. 1221 * Add q->next to follow list dangling off of rule r. 1222 * i.e. 1223 * 1224 * r: -o-R-o-->o--> Ptr to node following rule r in another rule 1225 * | 1226 * o--> Ptr to node following another reference to r. 1227 * 1228 * This is the data structure employed to avoid FOLLOW set computation. We 1229 * simply compute the FIRST (reach) of the EndRule Node which follows the 1230 * list found at the end of all rules which are referenced elsewhere. Rules 1231 * not invoked by other rules have no follow list (r->end->p1==NULL). 1232 * Generally, only start symbols are not invoked by another rule. 1233 * 1234 * Note that this mechanism also gives a free cross-reference mechanism. 1235 * 1236 * The entire syntax diagram is layed out like this: 1237 * 1238 * SynDiag 1239 * | 1240 * v 1241 * o-->R1--o 1242 * | 1243 * o-->R2--o 1244 * | 1245 * ... 1246 * | 1247 * o-->Rn--o 1248 * 1249 */ 1250 void 1251 #ifdef __USE_PROTOS 1252 FoLink( Node *p ) 1253 #else 1254 FoLink( p ) 1255 Node *p; 1256 #endif 1257 { 1258 RuleEntry *q; 1259 Junction *j = (Junction *) p; 1260 RuleRefNode *r = (RuleRefNode *) p; 1261 1262 if ( p==NULL ) return; 1263 require(p->ntype>=1 && p->ntype<=NumNodeTypes, 1264 eMsgd("FoLink: invalid diagram node: ntype==%d",p->ntype)); 1265 switch ( p->ntype ) 1266 { 1267 case nJunction : 1268 if ( j->fvisited ) return; 1269 if ( j->jtype == EndRule ) return; 1270 j->fvisited = TRUE; 1271 FoLink( j->p1 ); 1272 FoLink( j->p2 ); 1273 /* MR14 */ 1274 /* MR14 */ /* Need to determine whether the guess block is an */ 1275 /* MR14 */ /* of the form (alpha)? beta before follow sets are */ 1276 /* MR14 */ /* computed. This is necessary to solve problem */ 1277 /* MR14 */ /* of doing follow on the alpha of an (alpha)? beta block. */ 1278 /* MR14 */ 1279 /* MR14 */ /* This is performed by analysis_point as a side-effect. */ 1280 /* MR14 */ 1281 /* MR14 */ 1282 /* MR14 */ if (j->jtype == aSubBlk && j->guess) { 1283 /* MR14 */ Junction *ignore; 1284 /* MR14 */ ignore=analysis_point(j); 1285 /* MR14 */ } 1286 /* MR14 */ 1287 return; 1288 case nRuleRef : 1289 if ( r->linked ) return; 1290 q = (RuleEntry *) hash_get(Rname, r->text); 1291 if ( q == NULL ) 1292 { 1293 warnFL( eMsg1("rule %s not defined",r->text), FileStr[r->file], r->line ); 1294 } 1295 else 1296 { 1297 if ( r->parms!=NULL && RulePtr[q->rulenum]->pdecl==NULL ) 1298 { 1299 warnFL( eMsg1("rule %s accepts no parameter(s)", r->text), 1300 FileStr[r->file], r->line ); 1301 } 1302 if ( r->parms==NULL && RulePtr[q->rulenum]->pdecl!=NULL ) 1303 { 1304 warnFL( eMsg1("rule %s requires parameter(s)", r->text), 1305 FileStr[r->file], r->line ); 1306 } 1307 if ( r->assign!=NULL && RulePtr[q->rulenum]->ret==NULL ) 1308 { 1309 warnFL( eMsg1("rule %s yields no return value(s)", r->text), 1310 FileStr[r->file], r->line ); 1311 } 1312 if ( r->assign==NULL && RulePtr[q->rulenum]->ret!=NULL ) 1313 { 1314 warnFL( eMsg1("rule %s returns a value(s)", r->text), 1315 FileStr[r->file], r->line ); 1316 } 1317 if ( !r->linked ) 1318 { 1319 addFoLink( r->next, r->rname, RulePtr[q->rulenum] ); 1320 r->linked = TRUE; 1321 } 1322 } 1323 FoLink( r->next ); 1324 return; 1325 case nToken : 1326 FoLink( ((TokNode *)p)->next ); 1327 return; 1328 case nAction : 1329 FoLink( ((ActionNode *)p)->next ); 1330 return; 1331 default : 1332 fatal_internal("invalid node type"); 1333 } 1334 } 1335 1336 /* 1337 * Add a reference to the end of a rule. 1338 * 1339 * 'r' points to the RuleBlk node in a rule. r->end points to the last node 1340 * (EndRule jtype) in a rule. 1341 * 1342 * Initial: 1343 * r->end --> o 1344 * 1345 * After: 1346 * r->end --> o-->o--> Ptr to node following rule r in another rule 1347 * | 1348 * o--> Ptr to node following another reference to r. 1349 * 1350 * Note that the links are added to the head of the list so that r->end->p1 1351 * always points to the most recently added follow-link. At the end, it should 1352 * point to the last reference found in the grammar (starting from the 1st rule). 1353 */ 1354 void 1355 #ifdef __USE_PROTOS 1356 addFoLink( Node *p, char *rname, Junction *r ) 1357 #else 1358 addFoLink( p, rname, r ) 1359 Node *p; 1360 char *rname; 1361 Junction *r; 1362 #endif 1363 { 1364 Junction *j; 1365 require(r!=NULL, "addFoLink: incorrect rule graph"); 1366 require(r->end!=NULL, "addFoLink: incorrect rule graph"); 1367 require(r->end->jtype==EndRule, "addFoLink: incorrect rule graph"); 1368 require(p!=NULL, "addFoLink: NULL FOLLOW link"); 1369 1370 j = newJunction(); 1371 j->rname = rname; /* rname on follow links point to target rule */ 1372 j->p1 = p; /* link to other rule */ 1373 j->p2 = (Node *) r->end->p1;/* point to head of list */ 1374 r->end->p1 = (Node *) j; /* reset head to point to new node */ 1375 } 1376 1377 void 1378 #ifdef __USE_PROTOS 1379 GenCrossRef( Junction *p ) 1380 #else 1381 GenCrossRef( p ) 1382 Junction *p; 1383 #endif 1384 { 1385 set a; 1386 Junction *j; 1387 RuleEntry *q; 1388 unsigned e; 1389 require(p!=NULL, "GenCrossRef: why are you passing me a null grammar?"); 1390 1391 printf("Cross Reference:\n\n"); 1392 a = empty; 1393 for (; p!=NULL; p = (Junction *)p->p2) 1394 { 1395 printf("Rule %20s referenced by {", p->rname); 1396 /* make a set of rules for uniqueness */ 1397 for (j = (Junction *)(p->end)->p1; j!=NULL; j = (Junction *)j->p2) 1398 { 1399 q = (RuleEntry *) hash_get(Rname, j->rname); 1400 require(q!=NULL, "GenCrossRef: FoLinks are screwed up"); 1401 set_orel(q->rulenum, &a); 1402 } 1403 for (; !set_nil(a); set_rm(e, a)) 1404 { 1405 e = set_int(a); 1406 printf(" %s", RulePtr[e]->rname); 1407 } 1408 printf(" }\n"); 1409 } 1410 set_free( a ); 1411 } 1412 1413 /* 1414 The single argument is a pointer to the start of an element of a 1415 C++ style function prototypet list. Given a pointer to the start of 1416 an formal we must locate the comma (or the end of the string) 1417 and locate the datatype, formal name, and initial value expression. 1418 1419 The function returns a pointer to the character following the comma 1420 which terminates the formal declaration, or a pointer to the end of 1421 the string if none was found. 1422 1423 I thought we were parsing specialists, how come I'm doing this by 1424 hand written code ? 1425 1426 Examples of input: 1427 1428 Foo f, 1429 Foo f = Foo(1), 1430 Foo f = Foo(1,2), 1431 Foo f = &farray[1,2], 1432 Foo f = ",", 1433 Foo f = ',', 1434 TFoo<int,char> f = TFoo<int,char>(1,2), 1435 1436 A non-zero value for nesting indicates a problem matching '(' and ')', 1437 '[' and ']', '<' and '>', '{' and '}', or improperly terminated string 1438 or character literal. 1439 1440 */ 1441 1442 1443 /* 1444 * Don't care if it is a valid string literal or not, just find the end 1445 * Start with pointer to leading "\"" 1446 */ 1447 1448 #ifdef __USE_PROTOS 1449 char * skipStringLiteral(char *pCurrent) 1450 #else 1451 char * skipStringLiteral(pCurrent) 1452 char *pCurrent; 1453 #endif 1454 { 1455 char *p = pCurrent; 1456 if (*p == 0) return p; 1457 require (*p == '\"', "skipStringLiteral") 1458 p++; 1459 for (p = p; *p != 0; p++) { 1460 if (*p == '\\') { 1461 p++; 1462 if (*p == 0) break; 1463 p++; 1464 } 1465 if (*p == '\"') { 1466 p++; 1467 break; 1468 } 1469 } 1470 return p; 1471 } 1472 1473 /* 1474 * Don't care if it is a valid character literal or not, just find the end 1475 * Start with pointer to leading "'" 1476 */ 1477 1478 #ifdef __USE_PROTOS 1479 char * skipCharLiteral(char *pStart) 1480 #else 1481 char * skipCharLiteral(pStart) 1482 char *pStart; 1483 #endif 1484 { 1485 char *p = pStart; 1486 if (*p == 0) return p; 1487 require (*p == '\'', "skipCharLiteral") 1488 p++; 1489 for (p = p; *p != 0; p++) { 1490 if (*p == '\\') { 1491 p++; 1492 if (*p == 0) break; 1493 p++; 1494 } 1495 if (*p == '\'') { 1496 p++; 1497 break; 1498 } 1499 } 1500 return p; 1501 } 1502 1503 #ifdef __USE_PROTOS 1504 char * skipSpaces(char *pStart) 1505 #else 1506 char * skipSpaces(pStart) 1507 char * pStart; 1508 #endif 1509 { 1510 char *p = pStart; 1511 while (*p != 0 && isspace(*p)) p++; 1512 return p; 1513 } 1514 1515 #ifdef __USE_PROTOS 1516 char * skipToSeparatorOrEqualSign(char *pStart, int *pNest) 1517 #else 1518 char * skipToSeparatorOrEqualSign(pStart, pNest) 1519 char *pStart; 1520 int *pNest; 1521 #endif 1522 { 1523 char *p = pStart; 1524 1525 int nest = 0; 1526 1527 *pNest = (-1); 1528 1529 while (*p != 0) { 1530 switch (*p) { 1531 1532 case '(' : 1533 case '[' : 1534 case '<' : 1535 case '{' : 1536 nest++; 1537 p++; 1538 break; 1539 1540 case ')' : 1541 case ']' : 1542 case '>' : 1543 case '}' : 1544 nest--; 1545 p++; 1546 break; 1547 1548 case '"' : 1549 p = skipStringLiteral(p); 1550 break; 1551 1552 case '\'' : 1553 p = skipCharLiteral(p); 1554 break; 1555 1556 case '\\': 1557 p++; 1558 if (*p == 0) goto EXIT; 1559 p++; 1560 break; 1561 1562 case ',': 1563 case '=': 1564 if (nest == 0) goto EXIT; 1565 p++; 1566 break; 1567 1568 default: 1569 p++; 1570 } 1571 } 1572 EXIT: 1573 *pNest = nest; 1574 return p; 1575 } 1576 1577 #ifdef __USE_PROTOS 1578 char * skipToSeparator(char *pStart, int *pNest) 1579 #else 1580 char * skipToSeparator(pStart, pNest) 1581 char *pStart; 1582 int *pNest; 1583 #endif 1584 { 1585 char * p = pStart; 1586 for ( ; ; ) { 1587 p = skipToSeparatorOrEqualSign(p, pNest); 1588 if (*pNest != 0) return p; 1589 if (*p == ',') return p; 1590 if (*p == 0) return p; 1591 p++; 1592 } 1593 } 1594 1595 /* skip to just past the "=" separating the declaration from the initialization value */ 1596 1597 #ifdef __USE_PROTOS 1598 char * getInitializer(char *pStart) 1599 #else 1600 char * getInitializer(pStart) 1601 char * pStart; 1602 #endif 1603 { 1604 char *p; 1605 char *pDataType; 1606 char *pSymbol; 1607 char *pEqualSign; 1608 char *pValue; 1609 char *pSeparator; 1610 int nest = 0; 1611 1612 require(pStart!=NULL, "getInitializer: invalid string"); 1613 1614 p = endFormal(pStart, 1615 &pDataType, 1616 &pSymbol, 1617 &pEqualSign, 1618 &pValue, 1619 &pSeparator, 1620 &nest); 1621 if (nest != 0) return NULL; 1622 if (pEqualSign == NULL) return NULL; 1623 if (pValue == NULL) return NULL; 1624 return strBetween(pValue, NULL, pSeparator); 1625 } 1626 1627 /* 1628 Examines the string from pStart to pEnd-1. 1629 If the string has 0 length or is entirely white space 1630 returns 1. Otherwise 0. 1631 */ 1632 1633 #ifdef __USE_PROTOS 1634 int isWhiteString(const char *pStart, const char *pEnd) 1635 #else 1636 int isWhiteString(pStart, pEnd) 1637 const char *pStart; 1638 const char *pEnd; 1639 #endif 1640 { 1641 const char *p; 1642 for (p = pStart; p < pEnd; p++) { 1643 if (! isspace(*p)) return 0; 1644 } 1645 return 1; 1646 } 1647 1648 /* 1649 This replaces HasComma() which couldn't distinguish 1650 1651 foo ["a,b"] 1652 1653 from: 1654 1655 foo[a,b] 1656 1657 */ 1658 1659 #ifdef __USE_PROTOS 1660 int hasMultipleOperands(char *pStart) 1661 #else 1662 int hasMultipleOperands(pStart) 1663 char *pStart; 1664 #endif 1665 { 1666 char *p = pStart; 1667 int nest = 0; 1668 1669 p = skipSpaces(p); 1670 if (*p == 0) return 0; 1671 p = skipToSeparator(p, &nest); 1672 if (nest == 0 && *p == ',') return 1; 1673 return 0; 1674 } 1675 1676 1677 #define MAX_STR_BETWEEN_WORK_AREA 1000 1678 1679 static char strBetweenWorkArea[MAX_STR_BETWEEN_WORK_AREA]; 1680 1681 1682 /* 1683 strBetween(pStart, pNext, pStop) 1684 1685 Creates a null terminated string by copying the text between two pointers 1686 to a work area. The start of the string is pStart. The end of the string 1687 is the character before pNext, or if pNext is null then the character before 1688 pStop. Trailing spaces are not included in the copy operation. 1689 1690 This is used when a string contains several parts. The pNext part may be 1691 optional. The pStop will stop the scan when the optional part is not present 1692 (is a null pointer). 1693 */ 1694 1695 #ifdef __USE_PROTOS 1696 char *strBetween(char *pStart, char *pNext, char *pStop) 1697 #else 1698 char *strBetween(pStart, pNext, pStop) 1699 char *pStart; 1700 char *pNext; 1701 char *pStop; 1702 #endif 1703 { 1704 char *p; 1705 char *q = strBetweenWorkArea; 1706 const char *pEnd; 1707 1708 pEnd = (pNext != NULL) ? pNext : pStop; 1709 1710 require (pEnd != NULL, "pEnd == NULL"); 1711 require (pEnd >= pStart, "pEnd < pStart"); 1712 for (pEnd--; pEnd >= pStart; pEnd--) { /* MR31 */ 1713 if (! isspace(*pEnd)) break; 1714 } 1715 for (p = pStart; 1716 p <= pEnd && q < &strBetweenWorkArea[MAX_STR_BETWEEN_WORK_AREA-2]; 1717 p++, q++) { 1718 *q = *p; 1719 } 1720 *q = 0; 1721 return strBetweenWorkArea; 1722 } 1723 1724 /* 1725 function Returns pointer to character following separator at 1726 value which to continue search for next formal. If at the 1727 end of the string a pointer to the null byte at the 1728 end of the string is returned. 1729 1730 pStart Pointer to the starting position of the formal list 1731 1732 This may be the middle of a longer string, for example 1733 when looking for the end of formal #3 starting from 1734 the middle of the complete formal list. 1735 1736 ppDataType Returns a pointer to the start of the data type in the 1737 formal. Example: pointer to "Foo". 1738 1739 ppSymbol Returns a pointer to the start of the formal symbol. 1740 Example: pointer to "f". 1741 1742 ppEqualSign Returns a pointer to the equal sign separating the 1743 formal symbol from the initial value. If there is 1744 no "=" then this will be NULL. 1745 1746 ppValue Returns a pointer to the initial value part of the 1747 formal declaration. Example: pointer to "&farray[1,2]" 1748 1749 ppSeparator Returns a pointer to the character which terminated the 1750 scan. This should be a pointer to a comma or a null 1751 byte which terminates the string. 1752 1753 pNest Returns the nesting level when a separator was found. 1754 This is non-zero for any kind of error. This is zero 1755 for a successful parse of this portion of the formal 1756 list. 1757 1758 */ 1759 1760 #ifdef __USE_PROTOS 1761 char * endFormal(char *pStart, 1762 char **ppDataType, 1763 char **ppSymbol, 1764 char **ppEqualSign, 1765 char **ppValue, 1766 char **ppSeparator, 1767 int *pNest) 1768 #else 1769 char * endFormal(pStart, 1770 ppDataType, 1771 ppSymbol, 1772 ppEqualSign, 1773 ppValue, 1774 ppSeparator, 1775 pNest) 1776 char *pStart; 1777 char **ppDataType; 1778 char **ppSymbol; 1779 char **ppEqualSign; 1780 char **ppValue; 1781 char **ppSeparator; 1782 int *pNest; 1783 1784 #endif 1785 { 1786 char *p = pStart; 1787 char *q; 1788 1789 *ppDataType = NULL; 1790 *ppSymbol = NULL; 1791 *ppEqualSign = NULL; 1792 *ppValue = NULL; 1793 *ppSeparator = NULL; 1794 1795 *pNest = 0; 1796 1797 /* The first non-blank is the start of the datatype */ 1798 1799 p = skipSpaces(p); 1800 if (*p == 0) goto EXIT; 1801 *ppDataType = p; 1802 1803 /* We are not looking for the symbol, we are looking 1804 for the separator that follows the symbol. Then 1805 we'll back up. 1806 1807 Search for the ',' or '=" or null terminator. 1808 */ 1809 1810 p = skipToSeparatorOrEqualSign(p, pNest); 1811 1812 if (*pNest != 0) goto EXIT; 1813 1814 /* 1815 Work backwards to find start of symbol 1816 Skip spaces between the end of symbol and separator 1817 Assume that there are no spaces in the formal, but 1818 there is a space preceding the formal 1819 */ 1820 1821 for (q = &p[-1]; q >= *ppDataType; q--) { 1822 if (! isspace(*q)) break; 1823 } 1824 if (q < *ppDataType) goto EXIT; 1825 1826 /* 1827 MR26 Handle things like: IIR_Bool (IIR_Decl::*constraint)() 1828 Backup until we hit the end of a symbol string, then find the 1829 start of the symbol string. This wont' work for functions 1830 with prototypes, but works for the most common cases. For 1831 others, use typedef names. 1832 */ 1833 1834 /* MR26 */ for (q = q; q >= *ppDataType; q--) { 1835 /* MR26 */ if (isalpha(*q) || isdigit(*q) || *q == '_' || *q == '$') break; 1836 /* MR26 */ } 1837 /* MR26 */ if (q < *ppDataType) goto EXIT; 1838 1839 for (q = q; q >= *ppDataType; q--) { 1840 if ( ! (isalpha(*q) || isdigit(*q) || *q == '_' || *q == '$')) break; 1841 } 1842 1843 *ppSymbol = &q[1]; 1844 1845 if (*p == ',' || *p == 0) { 1846 *ppSeparator = p; 1847 goto EXIT; 1848 } 1849 1850 *ppEqualSign = p; 1851 p = skipSpaces(++p); 1852 *ppValue = p; 1853 if (*p == 0) goto EXIT; 1854 1855 1856 while (*p != 0 && *pNest == 0 && *p != ',') { 1857 p = skipToSeparator(p, pNest); 1858 } 1859 if (*pNest == 0) *ppSeparator = p; 1860 1861 EXIT: 1862 if (*p == ',') p++; 1863 return p; 1864 } 1865