Home | History | Annotate | Download | only in antlr
      1 /*
      2  * globals.c	--	File containing all variables/tables visible to all files.
      3  *
      4  * SOFTWARE RIGHTS
      5  *
      6  * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
      7  * Set (PCCTS) -- PCCTS is in the public domain.  An individual or
      8  * company may do whatever they wish with source code distributed with
      9  * PCCTS or the code generated by PCCTS, including the incorporation of
     10  * PCCTS, or its output, into commerical software.
     11  *
     12  * We encourage users to develop software with PCCTS.  However, we do ask
     13  * that credit is given to us for developing PCCTS.  By "credit",
     14  * we mean that if you incorporate our source code into one of your
     15  * programs (commercial product, research project, or otherwise) that you
     16  * acknowledge this fact somewhere in the documentation, research report,
     17  * etc...  If you like PCCTS and have developed a nice tool with the
     18  * output, please mention that you developed it using PCCTS.  In
     19  * addition, we ask that this header remain intact in our source code.
     20  * As long as these guidelines are kept, we expect to continue enhancing
     21  * this system and expect to make other tools available as they are
     22  * completed.
     23  *
     24  * ANTLR 1.33
     25  * Terence Parr
     26  * Parr Research Corporation
     27  * with Purdue University and AHPCRC, University of Minnesota
     28  * 1989-2001
     29  */
     30 
     31 #include <stdio.h>
     32 
     33 #include "pcctscfg.h"
     34 
     35 #include "set.h"
     36 #include "syn.h"
     37 #include "hash.h"
     38 #include "generic.h"
     39 
     40 char Version[] = "1.33MR33" ;	/* PCCTS version number */	                   /* MRXXX */
     41 char VersionDef[] = "13333";    /* same (except int equiv for preproc symbol) */ /* MRXXX */
     42 
     43 char LexStartSymbol[] = "START";/* Name of starting lexical class/automaton */
     44 char *RemapFileName = "remap.h";
     45 char *DlgFileName = "parser.dlg";
     46 char *DefFileName = "tokens.h";
     47 char *ErrFileName = "err.c";
     48 char *ModeFileName = "mode.h";
     49 char *StdMsgName = NULL;
     50 
     51 char *ParserName = DefaultParserName;
     52 
     53 /* list of PCCTS supplied support symbols; these are renamed when more than
     54  * one ANTLR-generated parsers are linked together to avoid name conflicts.
     55  * Can't use '##' ANSIC preprocessor concat operator with K&R and:
     56  *		#define zzskip	zzparser ## skip
     57  * will not work for ANSI/C++ as 'zzparserskip' is created w/o zzparser
     58  * being substituted--ack!!!
     59  */
     60 char *StandardSymbols[] = {
     61 /* ANTLR stuff */
     62 	"zzStackOvfMsg",
     63 	"zzasp",
     64 	"zzaStack",
     65 	"inf_tokens",
     66 	"inf_text",
     67 	"inf_text_buffer",
     68 	"inf_text_buffer_ptr",
     69 	"inf_text_buffer_size",
     70  	"inf_labase",
     71 	"inf_last",
     72 	"inf_lap",
     73 	"zztokenLA",
     74 	"zztextLA",
     75 	"zzlap",
     76 	"zzlabase",
     77 	"zztoktext",
     78 	"zztoken",
     79 	"zzdirty",
     80 	"zzguessing",
     81 	"zzguess_start",
     82 	"zzresynch",
     83 	"zzinf_tokens",
     84 	"zzinf_text",
     85 	"zzinf_text_buffer",
     86 	"zzinf_labase",
     87 	"zzinf_last",
     88 	"zzfill_inf_look",
     89 	"zzFAIL",
     90 	"zzsave_antlr_state",
     91 	"zzrestore_antlr_state",
     92 	"zzsyn",
     93 	"zzset_el",
     94 	"zzset_deg",
     95 	"zzedecode",
     96 	"_zzsetmatch",
     97 	"_zzmatch",
     98 	"_inf_zzgettok",
     99     "zzconsumeUntil",
    100     "zzconsumeUntilToken",
    101     "_zzmatch_wsig",
    102     "_zzsetmatch_wsig",
    103     "_zzmatch_wdfltsig",
    104     "_zzsetmatch_wdfltsig",
    105 	"zzdflthandlers",
    106 /* DLG stuff */
    107 	"zzreal_line",
    108 	"zzcharfull",
    109 	"zzerr",
    110 	"zzlextext",
    111 	"zzbegexpr",
    112 	"zzendexpr",
    113 	"zzbufsize",
    114 	"zzbegcol",
    115 	"zzendcol",
    116 	"zzline",
    117 	"zzchar",
    118 	"zzbufovf",
    119 	"zzrdstream",
    120 	"zzrdfunc",
    121 	"zzrdstr",
    122 	"zzclose_stream",
    123 	"zzsave_dlg_state",
    124 	"zzrestore_dlg_state",
    125 	"zzmode",
    126 	"zzskip",
    127 	"zzmore",
    128 	"zzreplchar",
    129 	"zzreplstr",
    130 	"zzgettok",
    131 	"zzadvance",
    132 	"zzerrstd",
    133 	"zzerr_in",
    134 	"zzconstr_attr",
    135 	"zzempty_attr",
    136 	"zzerraction",
    137 	"zztokens",			/* list of token regular expressions */
    138 	"dfa",
    139 	"accepts",
    140 	"actions",
    141     "zzTraceOptionValue",       /* MR10 */
    142     "zzTraceGuessOptionValue",  /* MR10 */
    143     "zzTraceCurrentRuleName",   /* MR10 */
    144     "zzTraceDepth",             /* MR10 */
    145     "zzGuessSeq",               /* MR10 */
    146     "zzSyntaxErrCount",         /* MR11 */
    147     "zzLexErrCount",            /* MR11 */
    148     "zzTraceGuessDone",         /* MR13 - BJS */
    149     "zzTraceGuessFail",         /* MR13 - BJS */
    150     "zzTraceGuessOption",       /* MR13 - BJS */
    151     "zzTraceIn",                /* MR13 - BJS */
    152     "zzTraceOption",            /* MR13 - BJS */
    153     "zzTraceOut",               /* MR13 - BJS */
    154     "zzTraceReset",             /* MR13 - BJS */
    155 	NULL		/* must be present */
    156 };
    157 
    158 /* list of PCCTS supplied support functions; these are renamed when more than
    159  * one ANTLR-generated parsers are linked together to avoid name conflicts.
    160  */
    161 char *ASTSymbols[] = {
    162 	"AST",
    163 	"zzast_sp",
    164 	"zzastStack",
    165 	"zzlink",
    166 	"zzastnew",
    167 	"zzsubchild",
    168 	"zzsubroot",
    169 	"zzpre_ast",
    170 	"zzfree_ast",
    171 	"zztmake",
    172 	"zzdup_ast",
    173 	"zztfree",
    174 	"zzdouble_link",
    175 	NULL		/* must be present */
    176 };
    177 
    178 /* Current ambiguity examination information */
    179 int CurAmbigAlt1, CurAmbigAlt2, CurAmbigline, CurAmbigfile;
    180 char *CurAmbigbtype;
    181 
    182 
    183 						/* M e t h o d  T a b l e s */
    184 /*
    185  * The following tables are used to fill syntax diagram nodes with the correct
    186  * function pointers for computing FIRST sets and printing themselves.
    187  */
    188 
    189 /* fpTraverse[node type] == pointer to function that calculates trees
    190  * representing the FIRST sets for that node (maintains spatial info).
    191  * We use 'struct _tree' not 'tree' due to a g++ 2.4.3 bug.
    192  */
    193 #ifdef __cplusplus
    194 struct _tree *(*fpTraverse[NumNodeTypes+1])(... /* Node *, int, set * */) = {
    195 	NULL,
    196 	(struct _tree *(*)(...)) tJunc,
    197 	(struct _tree *(*)(...)) tRuleRef,
    198 	(struct _tree *(*)(...)) tToken,
    199 	(struct _tree *(*)(...)) tAction
    200 };
    201 #else
    202 Tree *(*fpTraverse[NumNodeTypes+1])() = {
    203 	NULL,
    204 	tJunc,
    205 	tRuleRef,
    206 	tToken,
    207 	tAction
    208 };
    209 #endif
    210 
    211 /* fpReach[node type] == pointer to function that calculates FIRST set for
    212  * that node. (r stands for reach).  We use 'struct _set' not 'set'
    213  * due to a g++ 2.4.3 bug.
    214  */
    215 #ifdef __cplusplus
    216 struct _set (*fpReach[NumNodeTypes+1])(... /* Node *, int, set * */) = {
    217 	NULL,
    218 	(struct _set (*)(...)) rJunc,
    219 	(struct _set (*)(...)) rRuleRef,
    220 	(struct _set (*)(...)) rToken,
    221 	(struct _set (*)(...)) rAction
    222 };
    223 #else
    224 set (*fpReach[NumNodeTypes+1])() = {
    225 	NULL,
    226 	rJunc,
    227 	rRuleRef,
    228 	rToken,
    229 	rAction
    230 };
    231 #endif
    232 
    233 /* fpPrint[node type] == pointer to function that knows how to print that node. */
    234 #ifdef __cplusplus
    235 void (*fpPrint[NumNodeTypes+1])(... /* Node * */) = {
    236 	NULL,
    237 	(void (*)(...)) pJunc,
    238 	(void (*)(...)) pRuleRef,
    239 	(void (*)(...)) pToken,
    240 	(void (*)(...)) pAction
    241 };
    242 #else
    243 void (*fpPrint[NumNodeTypes+1])() = {
    244 	NULL,
    245 	pJunc,
    246 	pRuleRef,
    247 	pToken,
    248 	pAction
    249 };
    250 #endif
    251 
    252 char *decodeJType[] = {
    253 	"invalid",
    254 	"aSubBlk",
    255 	"aOptBlk",
    256 	"aLoopBlk",
    257 	"EndBlk",
    258 	"RuleBlk",
    259 	"Generic",
    260 	"EndRule",
    261 	"aPlusBlk",
    262 	"aLoopBegin"
    263 };
    264 
    265 
    266 							/* H a s h  T a b l e s */
    267 
    268 Entry	**Tname,			/* Table of all token names (maps name to tok num)*/
    269 		**Texpr,			/* Table of all token expressions
    270 							   (maps expr to tok num) */
    271 		**Rname,			/* Table of all Rules (has ptr to start of rule) */
    272 		**Fcache,			/* Cache of First/Follow Computations */
    273 		**Tcache;			/* Tree cache; First/Follow for permute trees */
    274 Entry	**Elabel;			/* Table of all element label names */
    275 Entry	**Sname;			/* Signal names */
    276 Entry   **Pname;            /* symbolic predicate names MR11 */
    277 
    278 
    279 							/* V a r i a b l e s */
    280 
    281 int     Save_argc;          /* MR10 */
    282 char    **Save_argv;        /* MR10 */
    283 int		EpToken=0;			/* Imaginary Epsilon token number */
    284 int		WildCardToken=0;
    285 int		CurFile= -1;		/* Index into FileStr table */
    286 char    *CurPredName=NULL;  /* MR11 */
    287 char	*CurRule=NULL;		/* Pointer to current rule name */
    288 int     CurRuleDebug=0;     /* MR13 debug flag */
    289 RuleEntry *CurRuleNode=NULL;/* Pointer to current rule node in syntax tree */
    290 char	*CurRetDef=NULL;	/* Pointer to current return type definition */
    291 char	*CurParmDef=NULL;	/* Pointer to current parameter definition */
    292 Junction *CurRuleBlk=NULL;	/* Pointer to current block node for enclosing block */
    293 ListNode *CurExGroups=NULL;	/* Current list of exception groups for rule/alts */
    294 ListNode *CurElementLabels=NULL;
    295 ListNode *CurAstLabelsInActions=NULL; /* MR27 */
    296 
    297 /* MR10  used by <<>>? to set "label_used_in_semantic_pred"  */
    298 /* MR10  this will force LT(i) assignment even in guess mode */
    299 
    300 ListNode *CurActionLabels=NULL;     /* MR10 Element Labels appearing in last action */
    301 int      numericActionLabel=0 ;     /* MR10 << ... $1 ... >> or << ... $1 ... >>?   */
    302 ListNode *NumericPredLabels=NULL;   /* MR10 << ... $1 ... >>?  ONLY                 */
    303 ListNode *ContextGuardPredicateList=NULL;  /* MR13 for re-evaluating predicates
    304                                                    after meta tokens are defined    */
    305 
    306 int		CurBlockID=0;		/* Unique int for each block */
    307 int		CurAltNum=0;
    308 Junction *CurAltStart = NULL;	/* Junction node that starts the alt */
    309 Junction *OuterAltStart = NULL; /* For chaining exception groups        MR7 */
    310 int		NumRules=0;			/* Rules are from 1 to n */
    311 FILE	*output=NULL;		/* current parser output file */
    312 FILE	*input=NULL;		/* current grammar input file */
    313 char	*FileStr[MaxNumFiles];/* Ptr to array of file names on command-line */
    314 int		NumFiles=0;			/* current grammar file number */
    315 #ifdef __cplusplus
    316 void	(**fpTrans)(...),	/* array of ptrs to funcs that translate nodes */
    317 	 	(**fpJTrans)(...);	/*  ... that translate junctions */
    318 #else
    319 void	(**fpTrans)(),		/* array of ptrs to funcs that translate nodes */
    320 	 	(**fpJTrans)();		/*  ... that translate junctions */
    321 #endif
    322 int		**FoStack;			/* Array of LL_k ptrs to stacks of rule numbers */
    323 int		**FoTOS;			/* FOLLOW stack top-of-stack pointers */
    324 Junction *SynDiag = NULL;	/* Pointer to start of syntax diagram */
    325 int		BlkLevel=1;			/* Current block level.  Set by antlr.g, used by
    326 							 * scanner to translate $i.j attributes */
    327 set		reserved_positions;	/* set of token positions reserved by '#token T=i' cmds */
    328 set		all_tokens;			/* set of all token types */
    329 set		imag_tokens;		/* set of all imaginary token types (EpToken, errclasses...) */
    330 set		tokclasses;			/* set of all token class token types */
    331 ListNode *ForcedTokens = 0;	/* list of token_id/token_num pairs to remap */
    332 ListNode *MetaTokenNodes=NULL; /* list of meta token refs such as token classes etc... */
    333 int		*TokenInd=NULL;		/* an indirection level between token num and position
    334 							 * of that token def in TokenStr and ExprStr */
    335 int		LastTokenCounted=0;	/* ==TokenNum if no token renumbering (same as old TokenNum) */
    336 int		TokenNum=TokenStart;
    337 char	**TokenStr=NULL;	/* map token # to token name */
    338 char	**ExprStr=NULL;		/* map token # to expr */
    339 Junction **RulePtr=NULL;	/* map rule # to RuleBlk node of rule */
    340 ListNode *ExprOrder=NULL;	/* list of exprs as they are found in grammar */
    341 ListNode *BeforeActions=NULL;/* list of grammar actions before rules */
    342 ListNode *AfterActions=NULL;/* list of grammar actions after rules */
    343 ListNode *LexActions=NULL;	/* list of lexical actions */
    344 
    345 /* MR1              									    */
    346 /* MR1  11-Apr-97	Provide mechanism for inserting code into DLG class     */
    347 /* MR1				via #lexmember <<....>>			            */
    348 /* MR1				via #lexprefix <<....>>			            */
    349 /* MR1				                					    */
    350 
    351 ListNode *LexMemberActions=NULL;/* list of lexical header member decl   MR1 */
    352 ListNode *LexPrefixActions=NULL;/* list of lexical header #include decl MR1 */
    353 ListNode **Cycles=NULL;		/* list of cycles (for each k) found when
    354 							   doing FOLLOWs */
    355 ListNode *eclasses=NULL;	/* list of error classes */
    356 ListNode *tclasses=NULL;	/* list of token classes */
    357 LClass	 lclass[MaxLexClasses]; /* array of lex class definitions */
    358 int		 CurrentLexClass;	/* index into lclass */
    359 int		 NumLexClasses=0;	/* in range 1..MaxLexClasses (init 0) */
    360 
    361 char	*HdrAction=NULL;	/* action defined with #header */
    362 char    *FirstAction=NULL;  /* action defined with #first MR11 */
    363 FILE	*ErrFile;			/* sets and error recovery stuff */
    364 FILE	*DefFile=NULL;		/* list of tokens, return value structs, setwd defs */
    365 FILE    *MRinfoFile=NULL;   /* MR10 information file */
    366 int     MRinfo=0;           /* MR10 */
    367 int     MRinfoSeq=0;        /* MR10 */
    368 int     InfoP=0;            /* MR10 predicates        */
    369 int     InfoT=0;            /* MR10 tnodes            */
    370 int     InfoF=0;            /* MR10 first/follow sets */
    371 int     InfoM=0;            /* MR10 monitor progress  */
    372 int     InfoO=0;            /* MR12 orphan rules      */
    373 int     TnodesInUse=0;      /* MR10 */
    374 int     TnodesPeak=0;       /* MR10 */
    375 int     TnodesAllocated=0;  /* MR10 */
    376 int     TnodesReportThreshold=0;    /* MR11 */
    377 int     PotentialSuppression=0; /* MR10 */
    378 int     PotentialDummy=0;       /* MR10 */
    379 int		CannotContinue=FALSE;
    380 int		OutputLL_k = 1;		/* LL_k for parsing must be power of 2 */
    381 int		action_file;		/* used to track start of action */
    382 int		action_line;
    383 int		FoundGuessBlk=0;	/* there is a (...)? block somewhere in grammar */
    384 int		FoundException=0;	/* there is an exception somewhere in grammar */
    385 /* MR6	Distinguish between @ operator and real exception 		    */
    386 /* MR6    by keeping separate flags for @ operator and real exceptions 	    */
    387 int		FoundAtOperator=0;					                     /* MR6 */
    388 int		FoundExceptionGroup=0;			                             /* MR6 */
    389 int		pLevel=0;			/* print Level */
    390 int		pAlt1,pAlt2;		/* print "==>" in front of these alts */
    391 
    392 /* C++ output stuff */
    393 FILE	*Parser_h,			/* where subclass of ANTLRParser goes */
    394 		*Parser_c;			/* where code for subclass of ANTLRParser goes */
    395 char	Parser_h_Name[MaxFileName+1] = "";
    396 char	Parser_c_Name[MaxFileName+1] = "";
    397 char    MRinfoFile_Name[MaxFileName+1] = "";                /* MR10 */
    398 char    *ClassDeclStuff=NULL;                               /* MR10 */
    399 char    *BaseClassName=NULL;                                /* MR22 */
    400 /* list of actions inside the #class {...} defs */
    401 ListNode *class_before_actions=NULL;
    402 ListNode *class_after_actions=NULL;
    403 
    404 char	CurrentClassName[MaxRuleName]="";
    405 int		no_classes_found=1;
    406 char	*UserTokenDefsFile;
    407 int		UserDefdTokens=0;	/* found #tokdefs? */
    408 char	*OutputDirectory=TopDirectory;
    409 ExceptionGroup *DefaultExGroup = NULL;
    410 int		NumSignals = NumPredefinedSignals;
    411 int		ContextGuardTRAV=0;
    412 
    413 char    *MR_AmbAidRule=NULL;        /* MR11 */
    414 int     MR_AmbAidLine=0;            /* MR11 */
    415 int     MR_AmbAidDepth=0;           /* MR11 */
    416 int     MR_AmbAidMultiple=0;        /* MR11 */
    417 int     MR_skipped_e3_report=0;     /* MR11 */
    418 int     MR_usingPredNames=0;        /* MR11 */
    419 int     MR_BadExprSets=0;           /* MR13 */
    420 int     MR_Inhibit_Tokens_h_Gen=0;  /* MR13 */
    421 int     NewAST=0;                   /* MR13 */
    422 int		tmakeInParser=0;            /* MR23 */
    423 int     AlphaBetaTrace=0;           /* MR14 */
    424 int		MR_BlkErr=0;				/* MR21 */
    425 int     MR_AlphaBetaMessageCount=0; /* MR14 */
    426 int     MR_AlphaBetaWarning=0;      /* MR14 */
    427 int     MR_ErrorSetComputationActive=0;     /* MR14 */
    428 int     MR_MaintainBackTrace=0;             /* MR14 */
    429 set     MR_CompromisedRules;        /* MR14 */
    430 
    431 Junction    *MR_RuleBlkWithHalt;    /* MR10 */
    432 
    433 					/* C m d - L i n e  O p t i o n s */
    434 
    435 int		LL_k=1;				/* how many tokens of full lookahead */
    436 int		CLL_k= -1;			/* how many tokens of compressed lookahead */
    437 int		PrintOut = FALSE;	/* print out the grammar */
    438 int		PrintAnnotate = FALSE;/* annotate printout with FIRST sets */
    439 int		CodeGen=TRUE;		/* Generate output code? */
    440 int		LexGen=TRUE;		/* Generate lexical files? (tokens.h, parser.dlg) */
    441 int		GenAST=FALSE;		/* Generate AST's? */
    442 int		GenANSI=FALSE;		/* Generate ANSI code where necessary */
    443 int		GenExprSetsOpt=TRUE;/* use sets not (LA(1)==tok) expression lists */
    444 int		GenCR=FALSE;		/* Generate cross reference? */
    445 int		GenLineInfo=FALSE;	/* Generate # line "file" stuff? */
    446 int		GenLineInfoMS=FALSE;/* Like -gl but replace "\" with "/" for MS C/C++ systems */
    447 int		TraceGen=FALSE;		/* Generate code to trace rule invocation */
    448 int		elevel=1;			/* error level for ambiguity messages */
    449 int		GenEClasseForRules=0;/* don't generate eclass for each rule */
    450 int		TreeResourceLimit= -1;/* don't limit tree resource */
    451 int		DemandLookahead = 0;/* demand/delayed lookahead or not */
    452 char	*RulePrefix = "";	/* prefix each generated rule with this */
    453 char	*stdpccts = "stdpccts.h";/* where to generate std pccts include file */
    454 int		GenStdPccts = 0;	/* don't gen stdpccts.h? */
    455 int		ParseWithPredicates = 1;
    456 int		WarningLevel = 1;
    457 int		UseStdout = 0;					/* MR6 */
    458 int		TabWidth = 2;					/* MR6 */ /* MR27 */
    459 int		HoistPredicateContext = 0;
    460 int     MRhoisting = 0;                 /* MR9 */
    461 int     MRhoistingk = 0;                /* MR13 */
    462 int     MR_debugGenRule=0;              /* MR11 */
    463 
    464 int		GenCC = 0;			/* Generate C++ output */
    465 
    466 PointerStack MR_BackTraceStack={0,0,NULL};            /* MR10 */
    467 PointerStack MR_PredRuleRefStack={0,0,NULL};          /* MR10 */
    468 PointerStack MR_RuleBlkWithHaltStack={0,0,NULL};      /* MR10 */
    469 
    470 /* DontCopyTokens and Pragma_DupLabeledTokens were a bad idea.  I've just
    471    turned them off rather than backpatching the code.  Who knows?  We
    472    may need them in the future.
    473  */
    474 int		DontCopyTokens = 1;	/* in C++, don't copy ANTLRToken passed to ANTLR */
    475 
    476 /* Remember if LT(i), LA(i), or LATEXT(i) used in an action which is not
    477    a predicate.  If so, give a warning for novice users.
    478 */
    479 
    480 int     LTinTokenAction = 0; /* MR23 */
    481 int     PURIFY = 1;          /* MR23 */
    482 
    483 int     CurBlockID_array[MAX_BLK_LEVEL]; /* MR23 */
    484 int     CurAltNum_array[MAX_BLK_LEVEL]; /* MR23 */
    485