Home | History | Annotate | Download | only in antlr
      1 /*
      2  * misc.c
      3  *
      4  * Manage tokens, regular expressions.
      5  * Print methods for debugging
      6  * Compute follow lists onto tail ends of rules.
      7  *
      8  * The following functions are visible:
      9  *
     10  *		int		addTname(char *);		Add token name
     11  *		int		addTexpr(char *);		Add token expression
     12  *		int		Tnum(char *);			Get number of expr/token
     13  *		void	Tklink(char *, char *);	Link a name with an expression
     14  *		int		hasAction(expr);		Does expr already have action assigned?
     15  *		void	setHasAction(expr);		Indicate that expr now has an action
     16  *		Entry	*newEntry(char *,int);	Create new table entry with certain size
     17  *		void	list_add(ListNode **list, char *e)
     18  *      void    list_free(ListNode **list, int freeData);   *** MR10 ***
     19  *		void	list_apply(ListNode *list, void (*f)())
     20  *		void	lexclass(char *m);		switch to new/old lexical class
     21  *		void	lexmode(int i);			switch to old lexical class i
     22  *
     23  * SOFTWARE RIGHTS
     24  *
     25  * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
     26  * Set (PCCTS) -- PCCTS is in the public domain.  An individual or
     27  * company may do whatever they wish with source code distributed with
     28  * PCCTS or the code generated by PCCTS, including the incorporation of
     29  * PCCTS, or its output, into commerical software.
     30  *
     31  * We encourage users to develop software with PCCTS.  However, we do ask
     32  * that credit is given to us for developing PCCTS.  By "credit",
     33  * we mean that if you incorporate our source code into one of your
     34  * programs (commercial product, research project, or otherwise) that you
     35  * acknowledge this fact somewhere in the documentation, research report,
     36  * etc...  If you like PCCTS and have developed a nice tool with the
     37  * output, please mention that you developed it using PCCTS.  In
     38  * addition, we ask that this header remain intact in our source code.
     39  * As long as these guidelines are kept, we expect to continue enhancing
     40  * this system and expect to make other tools available as they are
     41  * completed.
     42  *
     43  * ANTLR 1.33
     44  * Terence Parr
     45  * Parr Research Corporation
     46  * with Purdue University and AHPCRC, University of Minnesota
     47  * 1989-2001
     48  */
     49 
     50 #include <stdio.h>
     51 #include "pcctscfg.h"
     52 #include "set.h"
     53 #include "syn.h"
     54 #include "hash.h"
     55 #include "generic.h"
     56 #include "dlgdef.h"
     57 #include <ctype.h>
     58 
     59 static int tsize=TSChunk;		/* size of token str arrays */
     60 
     61 static void
     62 #ifdef __USE_PROTOS
     63 RemapForcedTokensInSyntaxDiagram(Node *);
     64 #else
     65 RemapForcedTokensInSyntaxDiagram();
     66 #endif
     67 
     68 				/* T o k e n  M a n i p u l a t i o n */
     69 
     70 /*
     71  * add token 't' to the TokenStr/Expr array.  Make more room if necessary.
     72  * 't' is either an expression or a token name.
     73  *
     74  * There is only one TokenStr array, but multiple ExprStr's.  Therefore,
     75  * for each lex class (element of lclass) we must extend the ExprStr array.
     76  * ExprStr's and TokenStr are always all the same size.
     77  *
     78  * Also, there is a Texpr hash table for each automaton.
     79  */
     80 static void
     81 #ifdef __USE_PROTOS
     82 Ttrack( char *t )
     83 #else
     84 Ttrack( t )
     85 char *t;
     86 #endif
     87 {
     88 	if ( TokenNum >= tsize )	/* terminal table overflow? */
     89 	{
     90 		char **p;
     91 		int i, more, j;
     92 
     93 		more = TSChunk * (1 + ((TokenNum-tsize) / TSChunk));
     94 		tsize += more;
     95 		TokenStr = (char **) realloc((char *)TokenStr, tsize*sizeof(char *));
     96 		require(TokenStr != NULL, "Ttrack: can't extend TokenStr");
     97 		for (i=0; i<NumLexClasses; i++)
     98 		{
     99 			lclass[i].exprs = (char **)
    100 							  realloc((char *)lclass[i].exprs, tsize*sizeof(char *));
    101 			require(lclass[i].exprs != NULL, "Ttrack: can't extend ExprStr");
    102 			for (p= &lclass[i].exprs[tsize-more],j=1; j<=more; j++) *p++ = NULL;
    103 		}
    104 		for (p= &TokenStr[tsize-more],i=1; i<=more; i++) *p++ = NULL;
    105 		lexmode( CurrentLexClass ); /* reset ExprStr in case table moved */
    106 	}
    107 	/* note: we use the actual ExprStr/TokenStr array
    108 	 * here as TokenInd doesn't exist yet
    109 	 */
    110 	if ( *t == '"' ) ExprStr[TokenNum] = t;
    111 	else TokenStr[TokenNum] = t;
    112 }
    113 
    114 static Expr *
    115 #ifdef __USE_PROTOS
    116 newExpr( char *e )
    117 #else
    118 newExpr( e )
    119 char *e;
    120 #endif
    121 {
    122 	Expr *p = (Expr *) calloc(1, sizeof(Expr));
    123 	require(p!=NULL, "newExpr: cannot alloc Expr node");
    124 
    125 	p->expr = e;
    126 	p->lclass = CurrentLexClass;
    127 	return p;
    128 }
    129 
    130 /* switch to lexical class/mode m.  This amounts to creating a new
    131  * lex mode if one does not already exist and making ExprStr point
    132  * to the correct char string array.  We must also switch Texpr tables.
    133  *
    134  * BTW, we need multiple ExprStr arrays because more than one automaton
    135  * may have the same label for a token, but with different expressions.
    136  * We need to track an expr for each automaton.  If we disallowed this
    137  * feature, only one ExprStr would be required.
    138  */
    139 void
    140 #ifdef __USE_PROTOS
    141 lexclass( char *m )
    142 #else
    143 lexclass( m )
    144 char *m;
    145 #endif
    146 {
    147 	int i;
    148 	TermEntry *p;
    149 	static char EOFSTR[] = "\"@\"";
    150 
    151 	if ( hash_get(Tname, m) != NULL )
    152 	{
    153 		warn(eMsg1("lexclass name conflicts with token/errclass label '%s'",m));
    154 	}
    155 	/* does m already exist? */
    156 	i = LexClassIndex(m);
    157 	if ( i != -1 ) {lexmode(i); return;}
    158 	/* must make new one */
    159 	NumLexClasses++;
    160 	CurrentLexClass = NumLexClasses-1;
    161 	require(NumLexClasses<=MaxLexClasses, "number of allowable lexclasses exceeded\nIncrease MaxLexClasses in generic.h and recompile all C files");
    162 	lclass[CurrentLexClass].classnum = m;
    163 	lclass[CurrentLexClass].exprs = (char **) calloc(tsize, sizeof(char *));
    164 	require(lclass[CurrentLexClass].exprs!=NULL,
    165 			"lexclass: cannot allocate ExprStr");
    166 	lclass[CurrentLexClass].htable = newHashTable();
    167 	ExprStr = lclass[CurrentLexClass].exprs;
    168 	Texpr = lclass[CurrentLexClass].htable;
    169 	/* define EOF for each automaton */
    170 	p = newTermEntry( EOFSTR );
    171 	p->token = EofToken;	/* couldn't have remapped tokens yet, use EofToken */
    172 	hash_add(Texpr, EOFSTR, (Entry *)p);
    173 	list_add(&ExprOrder, (void *)newExpr(EOFSTR));
    174 	/* note: we use the actual ExprStr array
    175 	 * here as TokenInd doesn't exist yet
    176 	 */
    177 	ExprStr[EofToken] = EOFSTR;
    178 }
    179 
    180 void
    181 #ifdef __USE_PROTOS
    182 lexmode( int i )
    183 #else
    184 lexmode( i )
    185 int i;
    186 #endif
    187 {
    188 	require(i<NumLexClasses, "lexmode: invalid mode");
    189 	ExprStr = lclass[i].exprs;
    190 	Texpr = lclass[i].htable;
    191 	CurrentLexClass = i;
    192 }
    193 
    194 /* return index into lclass array of lexical class. return -1 if nonexistent */
    195 int
    196 #ifdef __USE_PROTOS
    197 LexClassIndex( char *cl )
    198 #else
    199 LexClassIndex( cl )
    200 char *cl;
    201 #endif
    202 {
    203 	int i;
    204 
    205 	for (i=0; i<NumLexClasses; i++)
    206 	{
    207 		if ( strcmp(lclass[i].classnum, cl) == 0 ) return i;
    208 	}
    209 	return -1;
    210 }
    211 
    212 int
    213 #ifdef __USE_PROTOS
    214 hasAction( char *expr )
    215 #else
    216 hasAction( expr )
    217 char *expr;
    218 #endif
    219 {
    220 	TermEntry *p;
    221 	require(expr!=NULL, "hasAction: invalid expr");
    222 
    223 	p = (TermEntry *) hash_get(Texpr, expr);
    224 	require(p!=NULL, eMsg1("hasAction: expr '%s' doesn't exist",expr));
    225 	return (p->action!=NULL);
    226 }
    227 
    228 void
    229 #ifdef __USE_PROTOS
    230 setHasAction( char *expr, char *action )
    231 #else
    232 setHasAction( expr, action )
    233 char *expr;
    234 char *action;
    235 #endif
    236 {
    237 	TermEntry *p;
    238 	require(expr!=NULL, "setHasAction: invalid expr");
    239 
    240 	p = (TermEntry *) hash_get(Texpr, expr);
    241 	require(p!=NULL, eMsg1("setHasAction: expr '%s' doesn't exist",expr));
    242 	p->action = action;
    243 }
    244 
    245 ForcedToken *
    246 #ifdef __USE_PROTOS
    247 newForcedToken(char *token, int tnum)
    248 #else
    249 newForcedToken(token, tnum)
    250 char *token;
    251 int tnum;
    252 #endif
    253 {
    254 	ForcedToken *ft = (ForcedToken *) calloc(1, sizeof(ForcedToken));
    255 	require(ft!=NULL, "out of memory");
    256 	ft->token = token;
    257 	ft->tnum = tnum;
    258 	return ft;
    259 }
    260 
    261 /*
    262  * Make a token indirection array that remaps token numbers and then walk
    263  * the appropriate symbol tables and SynDiag to change token numbers
    264  */
    265 void
    266 #ifdef __USE_PROTOS
    267 RemapForcedTokens(void)
    268 #else
    269 RemapForcedTokens()
    270 #endif
    271 {
    272 	ListNode *p;
    273 	ForcedToken *q;
    274 	int max_token_number=0;     /* MR9 23-Sep-97 Removed "unsigned" */
    275 	int i;
    276 
    277 	if ( ForcedTokens == NULL ) return;
    278 
    279 	/* find max token num */
    280 	for (p = ForcedTokens->next; p!=NULL; p=p->next)
    281 	{
    282 		q = (ForcedToken *) p->elem;
    283 		if ( q->tnum > max_token_number ) max_token_number = q->tnum;
    284 	}
    285 	fprintf(stderr, "max token number is %d\n", max_token_number);
    286 
    287 	/* make token indirection array */
    288 	TokenInd = (int *) calloc(max_token_number+1, sizeof(int));
    289 	LastTokenCounted = TokenNum;
    290 	TokenNum = max_token_number+1;
    291 	require(TokenInd!=NULL, "RemapForcedTokens: cannot allocate TokenInd");
    292 
    293 	/* fill token indirection array and change token id htable ; swap token indices */
    294 	for (i=1; i<TokenNum; i++) TokenInd[i] = i;
    295 	for (p = ForcedTokens->next; p!=NULL; p=p->next)
    296 	{
    297 		TermEntry *te;
    298 		int old_pos, t;
    299 
    300 		q = (ForcedToken *) p->elem;
    301 		fprintf(stderr, "%s forced to %d\n", q->token, q->tnum);
    302 		te = (TermEntry *) hash_get(Tname, q->token);
    303 		require(te!=NULL, "RemapForcedTokens: token not in hash table");
    304 		old_pos = te->token;
    305 		fprintf(stderr, "Before: TokenInd[old_pos==%d] is %d\n", old_pos, TokenInd[old_pos]);
    306 		fprintf(stderr, "Before: TokenInd[target==%d] is %d\n", q->tnum, TokenInd[q->tnum]);
    307 		q = (ForcedToken *) p->elem;
    308 		t = TokenInd[old_pos];
    309 		TokenInd[old_pos] = q->tnum;
    310 		TokenInd[q->tnum] = t;
    311 		te->token = q->tnum;		/* update token type id symbol table */
    312 		fprintf(stderr, "After: TokenInd[old_pos==%d] is %d\n", old_pos, TokenInd[old_pos]);
    313 		fprintf(stderr, "After: TokenInd[target==%d] is %d\n", q->tnum, TokenInd[q->tnum]);
    314 
    315 		/* Change the token number in the sym tab entry for the exprs
    316 		 * at the old position of the token id and the target position
    317 		 */
    318 		/* update expr at target (if any) of forced token id */
    319 		if ( q->tnum < TokenNum )	/* is it a valid position? */
    320 		{
    321 			for (i=0; i<NumLexClasses; i++)
    322 			{
    323 				if ( lclass[i].exprs[q->tnum]!=NULL )
    324 				{
    325 					/* update the symbol table for this expr */
    326 					TermEntry *e = (TermEntry *) hash_get(lclass[i].htable, lclass[i].exprs[q->tnum]);
    327 					require(e!=NULL, "RemapForcedTokens: expr not in hash table");
    328 					e->token = old_pos;
    329 					fprintf(stderr, "found expr '%s' at target %d in lclass[%d]; changed to %d\n",
    330 							lclass[i].exprs[q->tnum], q->tnum, i, old_pos);
    331 				}
    332 			}
    333 		}
    334 		/* update expr at old position (if any) of forced token id */
    335 		for (i=0; i<NumLexClasses; i++)
    336 		{
    337 			if ( lclass[i].exprs[old_pos]!=NULL )
    338 			{
    339 				/* update the symbol table for this expr */
    340 				TermEntry *e = (TermEntry *) hash_get(lclass[i].htable, lclass[i].exprs[old_pos]);
    341 				require(e!=NULL, "RemapForcedTokens: expr not in hash table");
    342 				e->token = q->tnum;
    343 				fprintf(stderr, "found expr '%s' for id %s in lclass[%d]; changed to %d\n",
    344 						lclass[i].exprs[old_pos], q->token, i, q->tnum);
    345 			}
    346 		}
    347 	}
    348 
    349 	/* Update SynDiag */
    350 	RemapForcedTokensInSyntaxDiagram((Node *)SynDiag);
    351 }
    352 
    353 static void
    354 #ifdef __USE_PROTOS
    355 RemapForcedTokensInSyntaxDiagram(Node *p)
    356 #else
    357 RemapForcedTokensInSyntaxDiagram(p)
    358 Node *p;
    359 #endif
    360 {
    361 	Junction *j = (Junction *) p;
    362 	RuleRefNode *r = (RuleRefNode *) p;
    363 	TokNode *t = (TokNode *)p;
    364 
    365 	if ( p==NULL ) return;
    366 	require(p->ntype>=1 && p->ntype<=NumNodeTypes,	"Remap...: invalid diagram node");
    367 	switch ( p->ntype )
    368 	{
    369 		case nJunction :
    370 			if ( j->visited ) return;
    371 			if ( j->jtype == EndRule ) return;
    372 			j->visited = TRUE;
    373 			RemapForcedTokensInSyntaxDiagram( j->p1 );
    374 			RemapForcedTokensInSyntaxDiagram( j->p2 );
    375 			j->visited = FALSE;
    376 			return;
    377 		case nRuleRef :
    378 			RemapForcedTokensInSyntaxDiagram( r->next );
    379 			return;
    380 		case nToken :
    381 			if ( t->remapped ) return;	/* we've been here before */
    382 			t->remapped = 1;
    383 			fprintf(stderr, "remapping %d to %d\n", t->token, TokenInd[t->token]);
    384 			t->token = TokenInd[t->token];
    385 			RemapForcedTokensInSyntaxDiagram( t->next );
    386 			return;
    387 		case nAction :
    388 			RemapForcedTokensInSyntaxDiagram( ((ActionNode *)p)->next );
    389 			return;
    390 		default :
    391 			fatal_internal("invalid node type");
    392 	}
    393 }
    394 
    395 /*
    396  * Add a token name.  Return the token number associated with it.  If it already
    397  * exists, then return the token number assigned to it.
    398  *
    399  * Track the order in which tokens are found so that the DLG output maintains
    400  * that order.  It also lets us map token numbers to strings.
    401  */
    402 int
    403 #ifdef __USE_PROTOS
    404 addTname( char *token )
    405 #else
    406 addTname( token )
    407 char *token;
    408 #endif
    409 {
    410 	TermEntry *p;
    411 	require(token!=NULL, "addTname: invalid token name");
    412 
    413 	if ( (p=(TermEntry *)hash_get(Tname, token)) != NULL ) return p->token;
    414 	p = newTermEntry( token );
    415 	Ttrack( p->str );
    416 	p->token = TokenNum++;
    417 	hash_add(Tname, token, (Entry *)p);
    418 	return p->token;
    419 }
    420 
    421 /* This is the same as addTname except we force the TokenNum to be tnum.
    422  * We don't have to use the Forced token stuff as no tokens will have
    423  * been defined with #tokens when this is called.  This is only called
    424  * when a #tokdefs meta-op is used.
    425  */
    426 int
    427 #ifdef __USE_PROTOS
    428 addForcedTname( char *token, int tnum )
    429 #else
    430 addForcedTname( token, tnum )
    431 char *token;
    432 int tnum;
    433 #endif
    434 {
    435 	TermEntry *p;
    436 	require(token!=NULL, "addTname: invalid token name");
    437 
    438 	if ( (p=(TermEntry *)hash_get(Tname, token)) != NULL ) return p->token;
    439 	p = newTermEntry( token );
    440 	Ttrack( p->str );
    441 	p->token = tnum;
    442 	hash_add(Tname, token, (Entry *)p);
    443 	return p->token;
    444 }
    445 
    446 /*
    447  * Add a token expr.  Return the token number associated with it.  If it already
    448  * exists, then return the token number assigned to it.
    449  */
    450 int
    451 #ifdef __USE_PROTOS
    452 addTexpr( char *expr )
    453 #else
    454 addTexpr( expr )
    455 char *expr;
    456 #endif
    457 {
    458 	TermEntry *p;
    459 	require(expr!=NULL, "addTexpr: invalid regular expression");
    460 
    461 	if ( (p=(TermEntry *)hash_get(Texpr, expr)) != NULL ) return p->token;
    462 	p = newTermEntry( expr );
    463 	Ttrack( p->str );
    464 	/* track the order in which they occur */
    465 	list_add(&ExprOrder, (void *)newExpr(p->str));
    466 	p->token = TokenNum++;
    467 	hash_add(Texpr, expr, (Entry *)p);
    468 	return p->token;
    469 }
    470 
    471 /* return the token number of 'term'.  Return 0 if no 'term' exists */
    472 int
    473 #ifdef __USE_PROTOS
    474 Tnum( char *term )
    475 #else
    476 Tnum( term )
    477 char *term;
    478 #endif
    479 {
    480 	TermEntry *p;
    481 	require(term!=NULL, "Tnum: invalid terminal");
    482 
    483 	if ( *term=='"' ) p = (TermEntry *) hash_get(Texpr, term);
    484 	else p = (TermEntry *) hash_get(Tname, term);
    485 	if ( p == NULL ) return 0;
    486 	else return p->token;
    487 }
    488 
    489 /* associate a Name with an expr.  If both have been already assigned
    490  * token numbers, then an error is reported.  Add the token or expr
    491  * that has not been added if no error.  This 'represents' the #token
    492  * ANTLR pseudo-op.  If both have not been defined, define them both
    493  * linked to same token number.
    494  */
    495 void
    496 #ifdef __USE_PROTOS
    497 Tklink( char *token, char *expr )
    498 #else
    499 Tklink( token, expr )
    500 char *token;
    501 char *expr;
    502 #endif
    503 {
    504 	TermEntry *p, *q;
    505 	require(token!=NULL && expr!=NULL, "Tklink: invalid token name and/or expr");
    506 
    507 	p = (TermEntry *) hash_get(Tname, token);
    508 	q = (TermEntry *) hash_get(Texpr, expr);
    509 	if ( p != NULL && q != NULL )	/* both defined */
    510 	{
    511 		warn( eMsg2("token name %s and rexpr %s already defined; ignored",
    512 					token, expr) );
    513 		return;
    514 	}
    515 	if ( p==NULL && q==NULL )		/* both not defined */
    516 	{
    517 		int t = addTname( token );
    518 		q = newTermEntry( expr );
    519 		hash_add(Texpr, expr, (Entry *)q);
    520 		q->token = t;
    521 		/* note: we use the actual ExprStr array
    522 		 * here as TokenInd doesn't exist yet
    523 		 */
    524 		ExprStr[t] = q->str;
    525 		/* track the order in which they occur */
    526 		list_add(&ExprOrder, (void *)newExpr(q->str));
    527 		return;
    528 	}
    529 	if ( p != NULL )				/* one is defined, one is not */
    530 	{
    531 		q = newTermEntry( expr );
    532 		hash_add(Texpr, expr, (Entry *)q);
    533 		q->token = p->token;
    534 		ExprStr[p->token] = q->str;	/* both expr and token str defined now */
    535 		list_add(&ExprOrder, (void *)newExpr(q->str));
    536 	}
    537 	else							/* trying to associate name with expr here*/
    538 	{
    539 		p = newTermEntry( token );
    540 		hash_add(Tname, token, (Entry *)p);
    541 		p->token = q->token;
    542 		TokenStr[p->token] = p->str;/* both expr and token str defined now */
    543 	}
    544 }
    545 
    546 /*
    547  * Given a string, this function allocates and returns a pointer to a
    548  * hash table record of size 'sz' whose "str" pointer is reset to a position
    549  * in the string table.
    550  */
    551 Entry *
    552 #ifdef __USE_PROTOS
    553 newEntry( char *text, int sz )
    554 #else
    555 newEntry( text, sz )
    556 char *text;
    557 int sz;
    558 #endif
    559 {
    560 	Entry *p;
    561 	require(text!=NULL, "new: NULL terminal");
    562 
    563 	if ( (p = (Entry *) calloc(1,sz)) == 0 )
    564 	{
    565 		fatal_internal("newEntry: out of memory for terminals\n");
    566 		exit(PCCTS_EXIT_FAILURE);
    567 	}
    568 	p->str = mystrdup(text);
    569 
    570 	return(p);
    571 }
    572 
    573 /*
    574  * add an element to a list.
    575  *
    576  * Any non-empty list has a sentinel node whose 'elem' pointer is really
    577  * a pointer to the last element.  (i.e. length(list) = #elemIn(list)+1).
    578  * Elements are appended to the list.
    579  */
    580 void
    581 #ifdef __USE_PROTOS
    582 list_add( ListNode **list, void *e )
    583 #else
    584 list_add( list, e )
    585 ListNode **list;
    586 void *e;
    587 #endif
    588 {
    589 	ListNode *p, *tail;
    590 	require(e!=NULL, "list_add: attempting to add NULL list element");
    591 
    592 	p = newListNode;
    593 	require(p!=NULL, "list_add: cannot alloc new list node");
    594 	p->elem = e;
    595 	if ( *list == NULL )
    596 	{
    597 		ListNode *sentinel = newListNode;
    598 		require(sentinel!=NULL, "list_add: cannot alloc sentinel node");
    599 		*list=sentinel;
    600 		sentinel->next = p;
    601 		sentinel->elem = (char *)p;		/* set tail pointer */
    602 	}
    603 	else								/* find end of list */
    604 	{
    605 		tail = (ListNode *) (*list)->elem;	/* get tail pointer */
    606 		tail->next = p;
    607 		(*list)->elem = (char *) p;		/* reset tail */
    608 	}
    609 }
    610 
    611 /* MR10 list_free() frees the ListNode elements in the list       */
    612 /* MR10   if freeData then free the data elements of the list too */
    613 
    614 void
    615 #ifdef __USE_PROTOS
    616 list_free(ListNode **list,int freeData)
    617 #else
    618 list_free(list,freeData)
    619   ListNode      **list;
    620   int           freeData;
    621 #endif
    622 {
    623 	ListNode *p;
    624     ListNode *next;
    625 
    626 	if (list == NULL) return;
    627     if (*list == NULL) return;
    628 	for (p=*list; p != NULL; p=next) {
    629       next=p->next;
    630       if (freeData && p->elem != NULL) {
    631         free( (char *) p->elem);
    632       };
    633       free( (char *) p);
    634     };
    635     *list=NULL;
    636 }
    637 
    638 void
    639 #ifdef __USE_PROTOS
    640 list_apply( ListNode *list, void (*f)(void *) )
    641 #else
    642 list_apply( list, f )
    643 ListNode *list;
    644 void (*f)();
    645 #endif
    646 {
    647 	ListNode *p;
    648 	require(f!=NULL, "list_apply: NULL function to apply");
    649 
    650 	if ( list == NULL ) return;
    651 	for (p = list->next; p!=NULL; p=p->next) (*f)( p->elem );
    652 }
    653 
    654 /* MR27 */
    655 
    656 #ifdef __USE_PROTOS
    657 int list_search_cstring(ListNode *list, char * cstring)
    658 #else
    659 int list_search_cstring(list, cstring)
    660   ListNode * list;
    661   char * cstring;
    662 #endif
    663 {
    664 	ListNode *p;
    665 	if (list == NULL ) return 0;
    666 	for (p = list->next; p!=NULL; p=p->next) {
    667 		if (p->elem == NULL) continue;
    668 		if (0 == strcmp((char *) p->elem , cstring)) return 1;
    669 	}
    670 	return 0;
    671 }
    672 
    673 			/* F O L L O W  C y c l e  S t u f f */
    674 
    675 /* make a key based upon (rulename, computation, k value).
    676  * Computation values are 'i'==FIRST, 'o'==FOLLOW.
    677  */
    678 
    679 /* MR10  Make the key all characters so it can be read easily   */
    680 /* MR10    by a simple dump program.  Also, separates           */
    681 /* MR10   'o' and 'i' from rule name                            */
    682 
    683 char *
    684 #ifdef __USE_PROTOS
    685 Fkey( char *rule, int computation, int k )
    686 #else
    687 Fkey( rule, computation, k )
    688 char *rule;
    689 int computation;
    690 int k;
    691 #endif
    692 {
    693 	static char key[MaxRuleName+2+2+1];                                 /* MR10 */
    694 	int i;
    695 
    696 	if ( k > 99 )                                                       /* MR10 */
    697 		fatal("k>99 is too big for this implementation of ANTLR!\n");   /* MR10 */
    698 	if ( (i=strlen(rule)) > MaxRuleName )                               /* MR10 */
    699 		fatal( eMsgd("rule name > max of %d\n", MaxRuleName) );         /* MR10 */
    700 	strcpy(key,rule);
    701 
    702 /* MR10 */     key[i]='*';
    703 /* MR10 */     key[i+1] = (char) computation; /* MR20 G. Hobbelt */
    704 /* MR10 */     if (k < 10) {
    705 /* MR10 */       key[i+2] = (char) ( '0' + k);
    706 /* MR10 */  	 key[i+3] = '\0';
    707 /* MR10 */     } else {
    708 /* MR10 */       key[i+2] = (char) ( '0' + k/10);
    709 /* MR10 */       key[i+3] = (char) ( '0' + k % 10);
    710 /* MR10 */       key[i+4] = '\0';
    711 /* MR10 */     };
    712 
    713 	return key;
    714 }
    715 
    716 /* Push a rule onto the kth FOLLOW stack */
    717 void
    718 #ifdef __USE_PROTOS
    719 FoPush( char *rule, int k )
    720 #else
    721 FoPush( rule, k )
    722 char *rule;
    723 int k;
    724 #endif
    725 {
    726 	RuleEntry *r;
    727 	require(rule!=NULL, "FoPush: tried to push NULL rule");
    728 	require(k<=CLL_k,	"FoPush: tried to access non-existent stack");
    729 
    730 	/*fprintf(stderr, "FoPush(%s)\n", rule);*/
    731 	r = (RuleEntry *) hash_get(Rname, rule);
    732 	if ( r == NULL ) {fatal_internal( eMsg1("rule %s must be defined but isn't", rule) );}
    733 	if ( FoStack[k] == NULL )		/* Does the kth stack exist yet? */
    734 	{
    735 		/*fprintf(stderr, "allocating FoStack\n");*/
    736 		FoStack[k] = (int *) calloc(FoStackSize, sizeof(int));
    737 		require(FoStack[k]!=NULL, "FoPush: cannot allocate FOLLOW stack\n");
    738 	}
    739 	if ( FoTOS[k] == NULL )
    740 	{
    741 		FoTOS[k]=FoStack[k];
    742 		*(FoTOS[k]) = r->rulenum;
    743 	}
    744 	else
    745 	{
    746 #ifdef MEMCHK
    747 		require(valid(FoStack[k]), "FoPush: invalid FoStack");
    748 #endif
    749 		if ( FoTOS[k] >= &(FoStack[k][FoStackSize-1]) )
    750 			fatal( eMsgd("exceeded max depth of FOLLOW recursion (%d)\n",
    751 						FoStackSize) );
    752 		require(FoTOS[k]>=FoStack[k],
    753 				eMsg1("FoPush: FoStack stack-ptr is playing out of its sandbox",
    754 					  rule));
    755 		++(FoTOS[k]);
    756 		*(FoTOS[k]) = r->rulenum;
    757 	}
    758 	{
    759 		/*
    760 ****		int *p;
    761 ****		fprintf(stderr, "FoStack[k=%d]:\n", k);
    762 ****		for (p=FoStack[k]; p<=FoTOS[k]; p++)
    763 ****		{
    764 ****			fprintf(stderr, "\t%s\n", RulePtr[*p]->rname);
    765 ****		}
    766 		*/
    767 	}
    768 }
    769 
    770 /* Pop one rule off of the FOLLOW stack.  TOS ptr is NULL if empty. */
    771 void
    772 #ifdef __USE_PROTOS
    773 FoPop( int k )
    774 #else
    775 FoPop( k )
    776 int k;
    777 #endif
    778 {
    779 	require(k<=CLL_k, "FoPop: tried to access non-existent stack");
    780 	/*fprintf(stderr, "FoPop\n");*/
    781 	require(FoTOS[k]>=FoStack[k]&&FoTOS[k]<=&(FoStack[k][FoStackSize-1]),
    782 			"FoPop: FoStack stack-ptr is playing out of its sandbox");
    783 	if ( FoTOS[k] == FoStack[k] ) FoTOS[k] = NULL;
    784 	else (FoTOS[k])--;
    785 }
    786 
    787 /* Compute FOLLOW cycle.
    788  * Mark all FOLLOW sets for rules in cycle as incomplete.
    789  * Then, save cycle on the cycle list (Cycles) for later resolution.
    790  * The Cycle is stored in the form:
    791  *		(head of cycle==croot, rest of rules in cycle==cyclicDep)
    792  *
    793  * e.g. (Fo means "FOLLOW of", "-->" means requires or depends on)
    794  *
    795  *		Fo(x)-->Fo(a)-->Fo(b)-->Fo(c)-->Fo(x)
    796  *										   ^----Infinite recursion (cycle)
    797  *
    798  * the cycle would be: x -> {a,b,c} or stored as (x,{a,b,c}).  Fo(x) depends
    799  * on the FOLLOW of a,b, and c.  The root of a cycle is always complete after
    800  * Fo(x) finishes.  Fo(a,b,c) however are not.  It turns out that all rules
    801  * in a FOLLOW cycle have the same FOLLOW set.
    802  */
    803 void
    804 #ifdef __USE_PROTOS
    805 RegisterCycle( char *rule, int k )
    806 #else
    807 RegisterCycle( rule, k )
    808 char *rule;
    809 int k;
    810 #endif
    811 {
    812 	CacheEntry *f;
    813 	Cycle *c;
    814 	int *p;
    815 	RuleEntry *r;
    816 	require(rule!=NULL, "RegisterCycle: tried to register NULL rule");
    817 	require(k<=CLL_k,	"RegisterCycle: tried to access non-existent stack");
    818 
    819 	/*fprintf(stderr, "RegisterCycle(%s)\n", rule);*/
    820 	/* Find cycle start */
    821 	r = (RuleEntry *) hash_get(Rname, rule);
    822 	require(r!=NULL,eMsg1("rule %s must be defined but isn't", rule));
    823 	require(FoTOS[k]>=FoStack[k]&&FoTOS[k]<=&(FoStack[k][FoStackSize-1]),
    824 			eMsg1("RegisterCycle(%s): FoStack stack-ptr is playing out of its sandbox",
    825 				  rule));
    826 /***	if ( FoTOS[k]<FoStack[k]||FoTOS[k]>&(FoStack[k][FoStackSize-1]) )
    827 ****	{
    828 ****		fprintf(stderr, "RegisterCycle(%s): FoStack stack-ptr is playing out of its sandbox\n",
    829 ****						rule);
    830 ****		fprintf(stderr, "RegisterCycle: sp==0x%x out of bounds 0x%x...0x%x\n",
    831 ****						FoTOS[k], FoStack[k], &(FoStack[k][FoStackSize-1]));
    832 ****		exit(PCCTS_EXIT_FAILURE);
    833 ****	}
    834 ****/
    835 
    836 #ifdef MEMCHK
    837 	require(valid(FoStack[k]), "RegisterCycle: invalid FoStack");
    838 #endif
    839 	for (p=FoTOS[k]; *p != r->rulenum && p >= FoStack[k]; --p) {;}
    840 	require(p>=FoStack[k], "RegisterCycle: FoStack is screwed up beyond belief");
    841 	if ( p == FoTOS[k] ) return;	/* don't worry about cycles to oneself */
    842 
    843 	/* compute cyclic dependents (rules in cycle except head) */
    844 	c = newCycle;
    845 	require(c!=NULL, "RegisterCycle: couldn't alloc new cycle");
    846 	c->cyclicDep = empty;
    847 	c->croot = *p++;		/* record root of cycle */
    848 	for (; p<=FoTOS[k]; p++)
    849 	{
    850 		/* Mark all dependent rules as incomplete */
    851 		f = (CacheEntry *) hash_get(Fcache, Fkey(RulePtr[*p]->rname,'o',k));
    852 		if ( f==NULL )
    853 		{
    854 			f = newCacheEntry( Fkey(RulePtr[*p]->rname,'o',k) );
    855 			hash_add(Fcache, Fkey(RulePtr[*p]->rname,'o',k), (Entry *)f);
    856 		}
    857 		f->incomplete = TRUE;
    858 
    859 		set_orel(*p, &(c->cyclicDep)); /* mark rule as dependent of croot */
    860 	}
    861 	list_add(&(Cycles[k]), (void *)c);
    862 }
    863 
    864 /* make all rules in cycle complete
    865  *
    866  * while ( some set has changed ) do
    867  *		for each cycle do
    868  *			if degree of FOLLOW set for croot > old degree then
    869  *				update all FOLLOW sets for rules in cyclic dependency
    870  *				change = TRUE
    871  *			endif
    872  *		endfor
    873  * endwhile
    874  */
    875 void
    876 #ifdef __USE_PROTOS
    877 ResolveFoCycles( int k )
    878 #else
    879 ResolveFoCycles( k )
    880 int k;
    881 #endif
    882 {
    883 	ListNode *p, *q;
    884 	Cycle *c;
    885 	int changed = 1;
    886 	CacheEntry *f,*g;
    887 	int r;
    888 /*  int i;  */  /* MR10 not useful */
    889 	unsigned d;
    890 
    891     unsigned    *cursor;        /* MR10 */
    892     unsigned    *origin;        /* MR10 */
    893 
    894 	/*fprintf(stderr, "Resolving following cycles for %d\n", k);*/
    895 	while ( changed )
    896 	{
    897 		changed = 0;
    898 /* MR10 i = 0;  */
    899 		for (p = Cycles[k]->next; p!=NULL; p=p->next)
    900 		{
    901 			c = (Cycle *) p->elem;
    902 			/*fprintf(stderr, "cycle %d: %s -->", i++, RulePtr[c->croot]->rname);*/
    903 			/*s_fprT(stderr, c->cyclicDep);*/
    904 			/*fprintf(stderr, "\n");*/
    905 			f = (CacheEntry *)
    906 					hash_get(Fcache, Fkey(RulePtr[c->croot]->rname,'o',k));
    907 			require(f!=NULL, eMsg1("FOLLOW(%s) must be in cache but isn't", RulePtr[c->croot]->rname) );
    908 			if ( (d=set_deg(f->fset)) > c->deg )
    909 			{
    910 				/*fprintf(stderr, "Fo(%s) has changed\n", RulePtr[c->croot]->rname);*/
    911 				changed = 1;
    912 				c->deg = d;		/* update cycle FOLLOW set degree */
    913 
    914 /* MR10 */      origin=set_pdq(c->cyclicDep);
    915 /* MR10 */      for (cursor=origin; *cursor != nil; cursor++) {
    916 /* MR10 */         r=*cursor;
    917 
    918 /********		while ( !set_nil(c->cyclicDep) ) {      *****/
    919 /********					r = set_int(c->cyclicDep);  *****/
    920 /********					set_rm(r, c->cyclicDep);    *****/
    921 
    922 					/*fprintf(stderr, "updating Fo(%s)\n", RulePtr[r]->rname);*/
    923 					g = (CacheEntry *)
    924 							hash_get(Fcache, Fkey(RulePtr[r]->rname,'o',k));
    925 					require(g!=NULL, eMsg1("FOLLOW(%s) must be in cache but isn't", RulePtr[r]->rname) );
    926 					set_orin(&(g->fset), f->fset);
    927 					g->incomplete = FALSE;
    928 				}
    929 /* MR10 */      free( (char *) origin);
    930 /* MR10 */      origin=NULL;
    931 			}
    932 		}
    933 /* MR10 - this if statement appears to be meaningless since i is always 0 */
    934 /* MR10		if ( i == 1 ) changed = 0;	*/ /* if only 1 cycle, no need to repeat */
    935 	}
    936 	/* kill Cycle list */
    937 	for (q = Cycles[k]->next; q != NULL; q=p)
    938 	{
    939 		p = q->next;
    940 		set_free( ((Cycle *)q->elem)->cyclicDep );
    941 		free((char *)q);
    942 	}
    943 	free( (char *)Cycles[k] );
    944 	Cycles[k] = NULL;
    945 }
    946 
    947 
    948 			/* P r i n t i n g  S y n t a x  D i a g r a m s */
    949 
    950 static void
    951 #ifdef __USE_PROTOS
    952 pBlk( Junction *q, int btype )
    953 #else
    954 pBlk( q, btype )
    955 Junction *q;
    956 int btype;
    957 #endif
    958 {
    959 	int k,a;
    960 	Junction *alt, *p;
    961 
    962 	q->end->pvisited = TRUE;
    963 	if ( btype == aLoopBegin )
    964 	{
    965 		require(q->p2!=NULL, "pBlk: invalid ()* block");
    966 		PRINT(q->p1);
    967 		alt = (Junction *)q->p2;
    968 		PRINT(alt->p1);
    969 		if ( PrintAnnotate )
    970 		{
    971 			printf(" /* Opt ");
    972 			k = 1;
    973 			while ( !set_nil(alt->fset[k]) )
    974 			{
    975 				s_fprT(stdout, alt->fset[k]);
    976 				if ( k++ == CLL_k ) break;
    977 				if ( !set_nil(alt->fset[k]) ) printf(", ");
    978 			}
    979 			printf(" */\n");
    980 		}
    981 		return;
    982 	}
    983 	for (a=1,alt=q; alt != NULL; alt= (Junction *) alt->p2, a++)
    984 	{
    985 		if ( alt->p1 != NULL ) PRINT(alt->p1);
    986 		if ( PrintAnnotate )
    987 		{
    988 			printf( " /* [%d] ", alt->altnum);
    989 			k = 1;
    990 			while ( !set_nil(alt->fset[k]) )
    991 			{
    992 				s_fprT(stdout, alt->fset[k]);
    993 				if ( k++ == CLL_k ) break;
    994 				if ( !set_nil(alt->fset[k]) ) printf(", ");
    995 			}
    996 			if ( alt->p2 == NULL && btype == aOptBlk )
    997 				printf( " (optional branch) */\n");
    998 			else printf( " */\n");
    999 		}
   1000 
   1001 		/* ignore implied empty alt of Plus blocks */
   1002 		if ( alt->p2 != NULL && ((Junction *)alt->p2)->ignore ) break;
   1003 
   1004 		if ( alt->p2 != NULL && !(((Junction *)alt->p2)->p2==NULL && btype == aOptBlk) )
   1005 		{
   1006 			if ( pLevel == 1 )
   1007 			{
   1008 				printf("\n");
   1009 				if ( a+1==pAlt1 || a+1==pAlt2 ) printf("=>");
   1010 				printf("\t");
   1011 			}
   1012 			else printf(" ");
   1013 			printf("|");
   1014 			if ( pLevel == 1 )
   1015 			{
   1016 				p = (Junction *) ((Junction *)alt->p2)->p1;
   1017 				while ( p!=NULL )
   1018 				{
   1019 					if ( p->ntype==nAction )
   1020 					{
   1021 						p=(Junction *)((ActionNode *)p)->next;
   1022 						continue;
   1023 					}
   1024 					if ( p->ntype!=nJunction )
   1025 					{
   1026 						break;
   1027 					}
   1028 					if ( p->jtype==EndBlk || p->jtype==EndRule )
   1029 					{
   1030 						p = NULL;
   1031 						break;
   1032 					}
   1033 					p = (Junction *)p->p1;
   1034 				}
   1035 				if ( p==NULL ) printf("\n\t");	/* Empty alt? */
   1036 			}
   1037 		}
   1038 	}
   1039 	q->end->pvisited = FALSE;
   1040 }
   1041 
   1042 /* How to print out a junction */
   1043 void
   1044 #ifdef __USE_PROTOS
   1045 pJunc( Junction *q )
   1046 #else
   1047 pJunc( q )
   1048 Junction *q;
   1049 #endif
   1050 {
   1051 	int dum_k;
   1052 	int doing_rule;
   1053 	require(q!=NULL, "pJunc: NULL node");
   1054 	require(q->ntype==nJunction, "pJunc: not junction");
   1055 
   1056 	if ( q->pvisited == TRUE ) return;
   1057 	q->pvisited = TRUE;
   1058 	switch ( q->jtype )
   1059 	{
   1060 		case aSubBlk :
   1061 			if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k);
   1062 			if ( q->end->p1 != NULL && ((Junction *)q->end->p1)->ntype==nJunction &&
   1063 				 ((Junction *)q->end->p1)->jtype == EndRule ) doing_rule = 1;
   1064 			else doing_rule = 0;
   1065 			pLevel++;
   1066 			if ( pLevel==1 )
   1067 			{
   1068 				if ( pAlt1==1 ) printf("=>");
   1069 				printf("\t");
   1070 			}
   1071 			else printf(" ");
   1072 			if ( doing_rule )
   1073 			{
   1074 				if ( pLevel==1 ) printf(" ");
   1075 				pBlk(q,q->jtype);
   1076 			}
   1077 			else {
   1078 				printf("(");
   1079 				if ( pLevel==1 ) printf(" ");
   1080 				pBlk(q,q->jtype);
   1081 				if ( pLevel>1 ) printf(" ");
   1082 				printf(")");
   1083 			}
   1084 			if ( q->guess ) printf("?");
   1085 			pLevel--;
   1086 			if ( PrintAnnotate ) freeBlkFsets(q);
   1087 			if ( q->end->p1 != NULL ) PRINT(q->end->p1);
   1088 			break;
   1089 		case aOptBlk :
   1090 			if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k);
   1091 			pLevel++;
   1092 			if ( pLevel==1 )
   1093 			{
   1094 				if ( pAlt1==1 ) printf("=>");
   1095 				printf("\t");
   1096 			}
   1097 			else printf(" ");
   1098 			printf("{");
   1099 			if ( pLevel==1 ) printf(" ");
   1100 			pBlk(q,q->jtype);
   1101 			if ( pLevel>1 ) printf(" ");
   1102 			else printf("\n\t");
   1103 			printf("}");
   1104 			pLevel--;
   1105 			if ( PrintAnnotate ) freeBlkFsets(q);
   1106 			if ( q->end->p1 != NULL ) PRINT(q->end->p1);
   1107 			break;
   1108 		case aLoopBegin :
   1109 			if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k);
   1110 			pLevel++;
   1111 			if ( pLevel==1 )
   1112 			{
   1113 				if ( pAlt1==1 ) printf("=>");
   1114 				printf("\t");
   1115 			}
   1116 			else printf(" ");
   1117 			printf("(");
   1118 			if ( pLevel==1 ) printf(" ");
   1119 			pBlk(q,q->jtype);
   1120 			if ( pLevel>1 ) printf(" ");
   1121 			else printf("\n\t");
   1122 			printf(")*");
   1123 			pLevel--;
   1124 			if ( PrintAnnotate ) freeBlkFsets(q);
   1125 			if ( q->end->p1 != NULL ) PRINT(q->end->p1);
   1126 			break;
   1127 		case aLoopBlk :
   1128 			if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k);
   1129 			pBlk(q,q->jtype);
   1130 			if ( PrintAnnotate ) freeBlkFsets(q);
   1131 			break;
   1132 		case aPlusBlk :
   1133 			if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k);
   1134 			pLevel++;
   1135 			if ( pLevel==1 )
   1136 			{
   1137 				if ( pAlt1==1 ) printf("=>");
   1138 				printf("\t");
   1139 			}
   1140 			else printf(" ");
   1141 			printf("(");
   1142 			if ( pLevel==1 ) printf(" ");
   1143 			pBlk(q,q->jtype);
   1144 			if ( pLevel>1 ) printf(" ");
   1145 			printf(")+");
   1146 			pLevel--;
   1147 			if ( PrintAnnotate ) freeBlkFsets(q);
   1148 			if ( q->end->p1 != NULL ) PRINT(q->end->p1);
   1149 			break;
   1150 		case EndBlk :
   1151 			break;
   1152 		case RuleBlk :
   1153 			printf( "\n%s :\n", q->rname);
   1154 			PRINT(q->p1);
   1155 			if ( q->p2 != NULL ) PRINT(q->p2);
   1156 			break;
   1157 		case Generic :
   1158 			if ( q->p1 != NULL ) PRINT(q->p1);
   1159 			q->pvisited = FALSE;
   1160 			if ( q->p2 != NULL ) PRINT(q->p2);
   1161 			break;
   1162 		case EndRule :
   1163 			printf( "\n\t;\n");
   1164 			break;
   1165 	}
   1166 	q->pvisited = FALSE;
   1167 }
   1168 
   1169 /* How to print out a rule reference node */
   1170 void
   1171 #ifdef __USE_PROTOS
   1172 pRuleRef( RuleRefNode *p )
   1173 #else
   1174 pRuleRef( p )
   1175 RuleRefNode *p;
   1176 #endif
   1177 {
   1178 	require(p!=NULL, "pRuleRef: NULL node");
   1179 	require(p->ntype==nRuleRef, "pRuleRef: not rule ref node");
   1180 
   1181 	printf( " %s", p->text);
   1182 	PRINT(p->next);
   1183 }
   1184 
   1185 /* How to print out a terminal node */
   1186 void
   1187 #ifdef __USE_PROTOS
   1188 pToken( TokNode *p )
   1189 #else
   1190 pToken( p )
   1191 TokNode *p;
   1192 #endif
   1193 {
   1194 	require(p!=NULL, "pToken: NULL node");
   1195 	require(p->ntype==nToken, "pToken: not token node");
   1196 
   1197 	if ( p->wild_card ) printf(" .");
   1198 	printf( " %s", TerminalString(p->token));
   1199 	PRINT(p->next);
   1200 }
   1201 
   1202 /* How to print out a terminal node */
   1203 void
   1204 #ifdef __USE_PROTOS
   1205 pAction( ActionNode *p )
   1206 #else
   1207 pAction( p )
   1208 ActionNode *p;
   1209 #endif
   1210 {
   1211 	require(p!=NULL, "pAction: NULL node");
   1212 	require(p->ntype==nAction, "pAction: not action node");
   1213 
   1214 	PRINT(p->next);
   1215 }
   1216 
   1217 					/* F i l l  F o l l o w  L i s t s */
   1218 
   1219 /*
   1220  * Search all rules for all rule reference nodes, q to rule, r.
   1221  * Add q->next to follow list dangling off of rule r.
   1222  * i.e.
   1223  *
   1224  *		r: -o-R-o-->o--> Ptr to node following rule r in another rule
   1225  *					|
   1226  *					o--> Ptr to node following another reference to r.
   1227  *
   1228  * This is the data structure employed to avoid FOLLOW set computation.  We
   1229  * simply compute the FIRST (reach) of the EndRule Node which follows the
   1230  * list found at the end of all rules which are referenced elsewhere.  Rules
   1231  * not invoked by other rules have no follow list (r->end->p1==NULL).
   1232  * Generally, only start symbols are not invoked by another rule.
   1233  *
   1234  * Note that this mechanism also gives a free cross-reference mechanism.
   1235  *
   1236  * The entire syntax diagram is layed out like this:
   1237  *
   1238  * SynDiag
   1239  *	|
   1240  *	v
   1241  *	o-->R1--o
   1242  *	|
   1243  *	o-->R2--o
   1244  *	|
   1245  *	...
   1246  *	|
   1247  *	o-->Rn--o
   1248  *
   1249  */
   1250 void
   1251 #ifdef __USE_PROTOS
   1252 FoLink( Node *p )
   1253 #else
   1254 FoLink( p )
   1255 Node *p;
   1256 #endif
   1257 {
   1258 	RuleEntry *q;
   1259 	Junction *j = (Junction *) p;
   1260 	RuleRefNode *r = (RuleRefNode *) p;
   1261 
   1262 	if ( p==NULL ) return;
   1263 	require(p->ntype>=1 && p->ntype<=NumNodeTypes,
   1264 			eMsgd("FoLink: invalid diagram node: ntype==%d",p->ntype));
   1265 	switch ( p->ntype )
   1266 	{
   1267 		case nJunction :
   1268 			if ( j->fvisited ) return;
   1269 			if ( j->jtype == EndRule ) return;
   1270 			j->fvisited = TRUE;
   1271 			FoLink( j->p1 );
   1272 			FoLink( j->p2 );
   1273 /* MR14 */
   1274 /* MR14 */  /* Need to determine whether the guess block is an         */
   1275 /* MR14 */  /* of the form (alpha)? beta before follow sets are        */
   1276 /* MR14 */  /* computed.  This is necessary to solve problem           */
   1277 /* MR14 */  /* of doing follow on the alpha of an (alpha)? beta block. */
   1278 /* MR14 */
   1279 /* MR14 */  /* This is performed by analysis_point as a side-effect.   */
   1280 /* MR14 */
   1281 /* MR14 */
   1282 /* MR14 */  if (j->jtype == aSubBlk && j->guess) {
   1283 /* MR14 */    Junction *ignore;
   1284 /* MR14 */    ignore=analysis_point(j);
   1285 /* MR14 */  }
   1286 /* MR14 */
   1287 			return;
   1288 		case nRuleRef :
   1289 			if ( r->linked ) return;
   1290 			q = (RuleEntry *) hash_get(Rname, r->text);
   1291 			if ( q == NULL )
   1292 			{
   1293 				warnFL( eMsg1("rule %s not defined",r->text), FileStr[r->file], r->line );
   1294 			}
   1295 			else
   1296 			{
   1297 				if ( r->parms!=NULL && RulePtr[q->rulenum]->pdecl==NULL )
   1298 				{
   1299 					warnFL( eMsg1("rule %s accepts no parameter(s)", r->text),
   1300 							FileStr[r->file], r->line );
   1301 				}
   1302 				if ( r->parms==NULL && RulePtr[q->rulenum]->pdecl!=NULL )
   1303 				{
   1304 					warnFL( eMsg1("rule %s requires parameter(s)", r->text),
   1305 							FileStr[r->file], r->line );
   1306 				}
   1307 				if ( r->assign!=NULL && RulePtr[q->rulenum]->ret==NULL )
   1308 				{
   1309 					warnFL( eMsg1("rule %s yields no return value(s)", r->text),
   1310 							FileStr[r->file], r->line );
   1311 				}
   1312 				if ( r->assign==NULL && RulePtr[q->rulenum]->ret!=NULL )
   1313 				{
   1314 					warnFL( eMsg1("rule %s returns a value(s)", r->text),
   1315 							FileStr[r->file], r->line );
   1316 				}
   1317 				if ( !r->linked )
   1318 				{
   1319 					addFoLink(	r->next, r->rname, RulePtr[q->rulenum] );
   1320 					r->linked = TRUE;
   1321 				}
   1322 			}
   1323 			FoLink( r->next );
   1324 			return;
   1325 		case nToken :
   1326 			FoLink( ((TokNode *)p)->next );
   1327 			return;
   1328 		case nAction :
   1329 			FoLink( ((ActionNode *)p)->next );
   1330 			return;
   1331 		default :
   1332 			fatal_internal("invalid node type");
   1333 	}
   1334 }
   1335 
   1336 /*
   1337  * Add a reference to the end of a rule.
   1338  *
   1339  * 'r' points to the RuleBlk node in a rule.  r->end points to the last node
   1340  * (EndRule jtype) in a rule.
   1341  *
   1342  * Initial:
   1343  *		r->end --> 	o
   1344  *
   1345  * After:
   1346  *		r->end --> 	o-->o--> Ptr to node following rule r in another rule
   1347  *						|
   1348  *						o--> Ptr to node following another reference to r.
   1349  *
   1350  * Note that the links are added to the head of the list so that r->end->p1
   1351  * always points to the most recently added follow-link.  At the end, it should
   1352  * point to the last reference found in the grammar (starting from the 1st rule).
   1353  */
   1354 void
   1355 #ifdef __USE_PROTOS
   1356 addFoLink( Node *p, char *rname, Junction *r )
   1357 #else
   1358 addFoLink( p, rname, r )
   1359 Node *p;
   1360 char *rname;
   1361 Junction *r;
   1362 #endif
   1363 {
   1364 	Junction *j;
   1365 	require(r!=NULL,				"addFoLink: incorrect rule graph");
   1366 	require(r->end!=NULL,			"addFoLink: incorrect rule graph");
   1367 	require(r->end->jtype==EndRule,	"addFoLink: incorrect rule graph");
   1368 	require(p!=NULL,				"addFoLink: NULL FOLLOW link");
   1369 
   1370 	j = newJunction();
   1371 	j->rname = rname;			/* rname on follow links point to target rule */
   1372 	j->p1 = p;					/* link to other rule */
   1373 	j->p2 = (Node *) r->end->p1;/* point to head of list */
   1374 	r->end->p1 = (Node *) j;	/* reset head to point to new node */
   1375 }
   1376 
   1377 void
   1378 #ifdef __USE_PROTOS
   1379 GenCrossRef( Junction *p )
   1380 #else
   1381 GenCrossRef( p )
   1382 Junction *p;
   1383 #endif
   1384 {
   1385 	set a;
   1386 	Junction *j;
   1387 	RuleEntry *q;
   1388 	unsigned e;
   1389 	require(p!=NULL, "GenCrossRef: why are you passing me a null grammar?");
   1390 
   1391 	printf("Cross Reference:\n\n");
   1392 	a = empty;
   1393 	for (; p!=NULL; p = (Junction *)p->p2)
   1394 	{
   1395 		printf("Rule %20s referenced by {", p->rname);
   1396 		/* make a set of rules for uniqueness */
   1397 		for (j = (Junction *)(p->end)->p1; j!=NULL; j = (Junction *)j->p2)
   1398 		{
   1399 			q = (RuleEntry *) hash_get(Rname, j->rname);
   1400 			require(q!=NULL, "GenCrossRef: FoLinks are screwed up");
   1401 			set_orel(q->rulenum, &a);
   1402 		}
   1403 		for (; !set_nil(a); set_rm(e, a))
   1404 		{
   1405 			e = set_int(a);
   1406 			printf(" %s", RulePtr[e]->rname);
   1407 		}
   1408 		printf(" }\n");
   1409 	}
   1410 	set_free( a );
   1411 }
   1412 
   1413 /*
   1414    The single argument is a pointer to the start of an element of a
   1415    C++ style function prototypet list.  Given a pointer to the start of
   1416    an formal we must locate the comma (or the end of the string)
   1417    and locate the datatype, formal name, and initial value expression.
   1418 
   1419    The function returns a pointer to the character following the comma
   1420    which terminates the formal declaration, or a pointer to the end of
   1421    the string if none was found.
   1422 
   1423    I thought we were parsing specialists, how come I'm doing this by
   1424    hand written code ?
   1425 
   1426    Examples of input:
   1427 
   1428         Foo f,
   1429         Foo f = Foo(1),
   1430         Foo f = Foo(1,2),
   1431         Foo f = &farray[1,2],
   1432         Foo f = ",",
   1433         Foo f = ',',
   1434         TFoo<int,char> f = TFoo<int,char>(1,2),
   1435 
   1436    A non-zero value for nesting indicates a problem matching '(' and ')',
   1437    '[' and ']', '<' and '>', '{' and '}', or improperly terminated string
   1438    or character literal.
   1439 
   1440 */
   1441 
   1442 
   1443 /*
   1444  *  Don't care if it is a valid string literal or not, just find the end
   1445  *  Start with pointer to leading "\""
   1446  */
   1447 
   1448 #ifdef __USE_PROTOS
   1449 char * skipStringLiteral(char *pCurrent)
   1450 #else
   1451 char * skipStringLiteral(pCurrent)
   1452 char *pCurrent;
   1453 #endif
   1454 {
   1455   char *p = pCurrent;
   1456   if (*p == 0) return p;
   1457   require (*p == '\"', "skipStringLiteral")
   1458   p++;
   1459   for (p = p; *p != 0; p++) {
   1460     if (*p == '\\') {
   1461       p++;
   1462       if (*p == 0) break;
   1463       p++;
   1464     }
   1465     if (*p == '\"') {
   1466       p++;
   1467       break;
   1468     }
   1469   }
   1470   return p;
   1471 }
   1472 
   1473 /*
   1474  *  Don't care if it is a valid character literal or not, just find the end
   1475  *  Start with pointer to leading "'"
   1476  */
   1477 
   1478 #ifdef __USE_PROTOS
   1479 char * skipCharLiteral(char *pStart)
   1480 #else
   1481 char * skipCharLiteral(pStart)
   1482  char *pStart;
   1483 #endif
   1484 {
   1485   char *p = pStart;
   1486   if (*p == 0) return p;
   1487   require (*p == '\'', "skipCharLiteral")
   1488   p++;
   1489   for (p = p; *p != 0; p++) {
   1490     if (*p == '\\') {
   1491       p++;
   1492       if (*p == 0) break;
   1493       p++;
   1494     }
   1495     if (*p == '\'') {
   1496       p++;
   1497       break;
   1498     }
   1499   }
   1500   return p;
   1501 }
   1502 
   1503 #ifdef __USE_PROTOS
   1504 char * skipSpaces(char *pStart)
   1505 #else
   1506 char * skipSpaces(pStart)
   1507 char * pStart;
   1508 #endif
   1509 {
   1510   char *p = pStart;
   1511   while (*p != 0 && isspace(*p)) p++;
   1512   return p;
   1513 }
   1514 
   1515 #ifdef __USE_PROTOS
   1516 char * skipToSeparatorOrEqualSign(char *pStart, int *pNest)
   1517 #else
   1518 char * skipToSeparatorOrEqualSign(pStart, pNest)
   1519 char *pStart;
   1520 int *pNest;
   1521 #endif
   1522 {
   1523   char *p = pStart;
   1524 
   1525   int nest = 0;
   1526 
   1527   *pNest = (-1);
   1528 
   1529   while (*p != 0) {
   1530     switch (*p) {
   1531 
   1532       case '(' :
   1533       case '[' :
   1534       case '<' :
   1535       case '{' :
   1536         nest++;
   1537         p++;
   1538         break;
   1539 
   1540       case ')' :
   1541       case ']' :
   1542       case '>' :
   1543       case '}' :
   1544         nest--;
   1545         p++;
   1546         break;
   1547 
   1548       case '"' :
   1549         p = skipStringLiteral(p);
   1550         break;
   1551 
   1552       case '\'' :
   1553         p = skipCharLiteral(p);
   1554         break;
   1555 
   1556       case '\\':
   1557         p++;
   1558         if (*p == 0) goto EXIT;
   1559         p++;
   1560         break;
   1561 
   1562       case ',':
   1563       case '=':
   1564         if (nest == 0) goto EXIT;
   1565 		p++;
   1566         break;
   1567 
   1568       default:
   1569         p++;
   1570     }
   1571   }
   1572 EXIT:
   1573   *pNest = nest;
   1574   return p;
   1575 }
   1576 
   1577 #ifdef __USE_PROTOS
   1578 char * skipToSeparator(char *pStart, int *pNest)
   1579 #else
   1580 char * skipToSeparator(pStart, pNest)
   1581 char *pStart;
   1582 int *pNest;
   1583 #endif
   1584 {
   1585   char * p = pStart;
   1586   for ( ; ; ) {
   1587     p = skipToSeparatorOrEqualSign(p, pNest);
   1588     if (*pNest != 0) return p;
   1589     if (*p == ',') return p;
   1590     if (*p == 0) return p;
   1591 	p++;
   1592   }
   1593 }
   1594 
   1595 /* skip to just past the "=" separating the declaration from the initialization value */
   1596 
   1597 #ifdef __USE_PROTOS
   1598 char * getInitializer(char *pStart)
   1599 #else
   1600 char * getInitializer(pStart)
   1601 char * pStart;
   1602 #endif
   1603 {
   1604 	char *p;
   1605 	char *pDataType;
   1606 	char *pSymbol;
   1607 	char *pEqualSign;
   1608 	char *pValue;
   1609 	char *pSeparator;
   1610 	int nest = 0;
   1611 
   1612 	require(pStart!=NULL, "getInitializer: invalid string");
   1613 
   1614 	p = endFormal(pStart,
   1615 			      &pDataType,
   1616 				  &pSymbol,
   1617 				  &pEqualSign,
   1618 				  &pValue,
   1619 				  &pSeparator,
   1620 				  &nest);
   1621     if (nest != 0) return NULL;
   1622     if (pEqualSign == NULL) return NULL;
   1623     if (pValue == NULL) return NULL;
   1624 	return strBetween(pValue, NULL, pSeparator);
   1625 }
   1626 
   1627 /*
   1628    Examines the string from pStart to pEnd-1.
   1629    If the string has 0 length or is entirely white space
   1630    returns 1.  Otherwise 0.
   1631 */
   1632 
   1633 #ifdef __USE_PROTOS
   1634 int isWhiteString(const char *pStart, const char *pEnd)
   1635 #else
   1636 int isWhiteString(pStart, pEnd)
   1637 const char *pStart;
   1638 const char *pEnd;
   1639 #endif
   1640 {
   1641   const char *p;
   1642   for (p = pStart; p < pEnd; p++) {
   1643     if (! isspace(*p)) return 0;
   1644   }
   1645   return 1;
   1646 }
   1647 
   1648 /*
   1649    This replaces HasComma() which couldn't distinguish
   1650 
   1651         foo ["a,b"]
   1652 
   1653    from:
   1654 
   1655         foo[a,b]
   1656 
   1657 */
   1658 
   1659 #ifdef __USE_PROTOS
   1660 int hasMultipleOperands(char *pStart)
   1661 #else
   1662 int hasMultipleOperands(pStart)
   1663 char *pStart;
   1664 #endif
   1665 {
   1666   char *p = pStart;
   1667   int nest = 0;
   1668 
   1669   p = skipSpaces(p);
   1670   if (*p == 0) return 0;
   1671   p = skipToSeparator(p, &nest);
   1672   if (nest == 0 && *p == ',') return 1;
   1673   return 0;
   1674 }
   1675 
   1676 
   1677 #define MAX_STR_BETWEEN_WORK_AREA 1000
   1678 
   1679 static char strBetweenWorkArea[MAX_STR_BETWEEN_WORK_AREA];
   1680 
   1681 
   1682 /*
   1683 	strBetween(pStart, pNext, pStop)
   1684 
   1685     Creates a null terminated string by copying the text between two pointers
   1686 	to a work area.  The start of the string is pStart.  The end of the string
   1687 	is the character before pNext, or if pNext is null then the character before
   1688 	pStop.  Trailing spaces are not included in the copy operation.
   1689 
   1690 	This is used when a string contains several parts.  The pNext part may be
   1691 	optional.  The pStop will stop the scan when the optional part is not present
   1692 	(is a null pointer).
   1693 */
   1694 
   1695 #ifdef __USE_PROTOS
   1696 char *strBetween(char *pStart, char *pNext, char *pStop)
   1697 #else
   1698 char *strBetween(pStart, pNext, pStop)
   1699 char *pStart;
   1700 char *pNext;
   1701 char *pStop;
   1702 #endif
   1703 {
   1704   char *p;
   1705   char *q = strBetweenWorkArea;
   1706   const char *pEnd;
   1707 
   1708   pEnd = (pNext != NULL) ? pNext : pStop;
   1709 
   1710   require (pEnd != NULL, "pEnd == NULL");
   1711   require (pEnd >= pStart, "pEnd < pStart");
   1712   for (pEnd--; pEnd >= pStart; pEnd--) { /* MR31 */
   1713 	if (! isspace(*pEnd)) break;
   1714   }
   1715   for (p = pStart;
   1716        p <= pEnd && q < &strBetweenWorkArea[MAX_STR_BETWEEN_WORK_AREA-2];
   1717 	   p++, q++) {
   1718 	 *q = *p;
   1719   }
   1720   *q = 0;
   1721   return strBetweenWorkArea;
   1722 }
   1723 
   1724 /*
   1725    function     Returns pointer to character following separator at
   1726    value        which to continue search for next formal.  If at the
   1727                 end of the string a pointer to the null byte at the
   1728                 end of the string is returned.
   1729 
   1730    pStart       Pointer to the starting position of the formal list
   1731 
   1732                 This may be the middle of a longer string, for example
   1733                 when looking for the end of formal #3 starting from
   1734                 the middle of the complete formal list.
   1735 
   1736    ppDataType   Returns a pointer to the start of the data type in the
   1737                 formal. Example: pointer to "Foo".
   1738 
   1739    ppSymbol     Returns a pointer to the start of the formal symbol.
   1740                 Example: pointer to "f".
   1741 
   1742    ppEqualSign  Returns a pointer to the equal sign separating the
   1743                 formal symbol from the initial value.  If there is
   1744                 no "=" then this will be NULL.
   1745 
   1746    ppValue      Returns a pointer to the initial value part of the
   1747                 formal declaration.  Example: pointer to "&farray[1,2]"
   1748 
   1749    ppSeparator  Returns a pointer to the character which terminated the
   1750                 scan.  This should be a pointer to a comma or a null
   1751                 byte which terminates the string.
   1752 
   1753    pNest        Returns the nesting level when a separator was found.
   1754                 This is non-zero for any kind of error.  This is zero
   1755                 for a successful parse of this portion of the formal
   1756                 list.
   1757 
   1758 */
   1759 
   1760 #ifdef __USE_PROTOS
   1761 char * endFormal(char *pStart,
   1762                  char **ppDataType,
   1763                  char **ppSymbol,
   1764                  char **ppEqualSign,
   1765                  char **ppValue,
   1766                  char **ppSeparator,
   1767                  int *pNest)
   1768 #else
   1769 char * endFormal(pStart,
   1770 			     ppDataType,
   1771 				 ppSymbol,
   1772 				 ppEqualSign,
   1773 				 ppValue,
   1774 				 ppSeparator,
   1775 				 pNest)
   1776 char *pStart;
   1777 char **ppDataType;
   1778 char **ppSymbol;
   1779 char **ppEqualSign;
   1780 char **ppValue;
   1781 char **ppSeparator;
   1782 int *pNest;
   1783 
   1784 #endif
   1785 {
   1786   char *p = pStart;
   1787   char *q;
   1788 
   1789   *ppDataType = NULL;
   1790   *ppSymbol = NULL;
   1791   *ppEqualSign = NULL;
   1792   *ppValue = NULL;
   1793   *ppSeparator = NULL;
   1794 
   1795   *pNest = 0;
   1796 
   1797   /* The first non-blank is the start of the datatype */
   1798 
   1799   p = skipSpaces(p);
   1800   if (*p == 0) goto EXIT;
   1801   *ppDataType = p;
   1802 
   1803   /* We are not looking for the symbol, we are looking
   1804      for the separator that follows the symbol.  Then
   1805      we'll back up.
   1806 
   1807      Search for the ',' or '=" or null terminator.
   1808    */
   1809 
   1810   p = skipToSeparatorOrEqualSign(p, pNest);
   1811 
   1812   if (*pNest != 0) goto EXIT;
   1813 
   1814   /*
   1815      Work backwards to find start of symbol
   1816      Skip spaces between the end of symbol and separator
   1817      Assume that there are no spaces in the formal, but
   1818      there is a space preceding the formal
   1819   */
   1820 
   1821   for (q = &p[-1]; q >= *ppDataType; q--) {
   1822     if (! isspace(*q)) break;
   1823   }
   1824   if (q < *ppDataType) goto EXIT;
   1825 
   1826   /*
   1827      MR26 Handle things like: IIR_Bool (IIR_Decl::*constraint)()
   1828      Backup until we hit the end of a symbol string, then find the
   1829      start of the symbol string.  This wont' work for functions
   1830      with prototypes, but works for the most common cases.  For
   1831      others, use typedef names.
   1832    */
   1833 
   1834 /* MR26 */  for (q = q; q >= *ppDataType; q--) {
   1835 /* MR26 */    if (isalpha(*q) || isdigit(*q) || *q == '_' || *q == '$') break;
   1836 /* MR26 */  }
   1837 /* MR26 */  if (q < *ppDataType) goto EXIT;
   1838 
   1839   for (q = q; q >= *ppDataType; q--) {
   1840     if ( ! (isalpha(*q) || isdigit(*q) || *q == '_' || *q == '$')) break;
   1841   }
   1842 
   1843   *ppSymbol = &q[1];
   1844 
   1845   if (*p == ',' || *p == 0) {
   1846     *ppSeparator = p;
   1847     goto EXIT;
   1848   }
   1849 
   1850   *ppEqualSign = p;
   1851   p = skipSpaces(++p);
   1852   *ppValue = p;
   1853   if (*p == 0) goto EXIT;
   1854 
   1855 
   1856   while (*p != 0 && *pNest == 0 && *p != ',') {
   1857       p = skipToSeparator(p, pNest);
   1858   }
   1859   if (*pNest == 0) *ppSeparator = p;
   1860 
   1861 EXIT:
   1862   if (*p == ',') p++;
   1863   return p;
   1864 }
   1865