Home | History | Annotate | Download | only in h
      1 /* ANTLRParser.h
      2  *
      3  * Define the generic ANTLRParser superclass, which is subclassed to
      4  * define an actual parser.
      5  *
      6  * Before entry into this file: ANTLRTokenType must be set.
      7  *
      8  * SOFTWARE RIGHTS
      9  *
     10  * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
     11  * Set (PCCTS) -- PCCTS is in the public domain.  An individual or
     12  * company may do whatever they wish with source code distributed with
     13  * PCCTS or the code generated by PCCTS, including the incorporation of
     14  * PCCTS, or its output, into commerical software.
     15  *
     16  * We encourage users to develop software with PCCTS.  However, we do ask
     17  * that credit is given to us for developing PCCTS.  By "credit",
     18  * we mean that if you incorporate our source code into one of your
     19  * programs (commercial product, research project, or otherwise) that you
     20  * acknowledge this fact somewhere in the documentation, research report,
     21  * etc...  If you like PCCTS and have developed a nice tool with the
     22  * output, please mention that you developed it using PCCTS.  In
     23  * addition, we ask that this header remain intact in our source code.
     24  * As long as these guidelines are kept, we expect to continue enhancing
     25  * this system and expect to make other tools available as they are
     26  * completed.
     27  *
     28  * ANTLR 1.33
     29  * Terence Parr
     30  * Parr Research Corporation
     31  * with Purdue University and AHPCRC, University of Minnesota
     32  * 1989-2000
     33  */
     34 
     35 #ifndef APARSER_H_GATE
     36 #define APARSER_H_GATE
     37 
     38 #include "pcctscfg.h"
     39 
     40 #include "pccts_stdio.h"
     41 #include "pccts_setjmp.h"
     42 
     43 PCCTS_NAMESPACE_STD
     44 
     45 #include ATOKEN_H
     46 #include ATOKENBUFFER_H
     47 
     48 #ifdef ZZCAN_GUESS
     49 #ifndef ZZINF_LOOK
     50 #define ZZINF_LOOK
     51 #endif
     52 #endif
     53 
     54 
     55 #define NLA			(token_type[lap&(LLk-1)])/* --> next LA */
     56 
     57 typedef unsigned char SetWordType;
     58 
     59 /* Define external bit set stuff (for SetWordType) */
     60 #define EXT_WORDSIZE	(sizeof(char)*8)
     61 #define EXT_LOGWORDSIZE	3
     62 
     63            /* s y n t a c t i c  p r e d i c a t e  s t u f f */
     64 
     65 #ifndef zzUSER_GUESS_HOOK
     66 #define zzUSER_GUESS_HOOK(seqFrozen,zzrv)
     67 #endif
     68 
     69 #ifndef zzUSER_GUESS_DONE_HOOK
     70 #define zzUSER_GUESS_DONE_HOOK(seqFrozen)
     71 #endif
     72 
     73 /* MR14 Add zzUSER_GUESS_FAIL_HOOK and related code */
     74 
     75 #define zzUSER_GUESS_FAIL_HOOK_INTERNAL zzUSER_GUESS_FAIL_HOOK(SeqFrozen)
     76 #ifndef zzUSER_GUESS_FAIL_HOOK
     77 #define zzUSER_GUESS_FAIL_HOOK(zzGuessSeq)
     78 #endif
     79 
     80 
     81 typedef struct _zzjmp_buf {
     82 			jmp_buf state;
     83 		} zzjmp_buf;
     84 
     85 /* these need to be macros not member functions */
     86 #define zzGUESS_BLOCK		ANTLRParserState zzst; int zzrv; int _marker; int zzGuessSeqFrozen;
     87 #define zzNON_GUESS_MODE	if ( !guessing )
     88 #define zzGUESS_FAIL		guess_fail();
     89 
     90 /*  Note:  zzGUESS_DONE does not execute longjmp() */
     91 
     92 #define zzGUESS_DONE		{zzrv=1; inputTokens->rewind(_marker); guess_done(&zzst);zzUSER_GUESS_DONE_HOOK(zzGuessSeqFrozen) }
     93 #define zzGUESS				saveState(&zzst); \
     94 							guessing = 1; \
     95                             zzGuessSeqFrozen = ++zzGuessSeq; \
     96 							_marker = inputTokens->mark(); \
     97 							zzrv = setjmp(guess_start.state); \
     98                             zzUSER_GUESS_HOOK(zzGuessSeqFrozen,zzrv) \
     99 						    if ( zzrv ) zzGUESS_DONE
    100 
    101 #define zzTRACEdata     const ANTLRChar *zzTracePrevRuleName = NULL;
    102 
    103 #ifndef zzTRACEIN
    104 #define zzTRACEIN(r)	zzTracePrevRuleName=traceCurrentRuleName;tracein(r);
    105 #endif
    106 #ifndef zzTRACEOUT
    107 #define zzTRACEOUT(r)	traceout(r);traceCurrentRuleName=zzTracePrevRuleName;
    108 #endif
    109 
    110                   /* a n t l r  p a r s e r  d e f */
    111 
    112 struct ANTLRParserState {
    113 	/* class variables */
    114 	zzjmp_buf guess_start;
    115 	int guessing;
    116 
    117 	int inf_labase;
    118 	int inf_last;
    119 
    120 	int dirty;
    121 
    122     int             traceOptionValue;       // MR10
    123     int             traceGuessOptionValue;  // MR10
    124     const ANTLRChar *traceCurrentRuleName;  // MR10
    125     int             traceDepth;             // MR10
    126 
    127 };
    128 
    129 /* notes:
    130  *
    131  * multiple inheritance is a cool way to include what stuff is needed
    132  * in this structure (like guess stuff).  however, i'm not convinced that
    133  * multiple inheritance works correctly on all platforms.  not that
    134  * much space is used--just include all possibly useful members.
    135  *
    136  * the class should also be a template with arguments for the lookahead
    137  * depth and so on.  that way, more than one parser can be defined (as
    138  * each will probably have different lookahead requirements).  however,
    139  * am i sure that templates work?  no, i'm not sure.
    140  *
    141  * no attributes are maintained and, hence, the 'asp' variable is not
    142  * needed.  $i can still be referenced, but it refers to the token
    143  * associated with that rule element.  question: where are the token's
    144  * stored if not on the software stack?  in local variables created
    145  * and assigned to by antlr.
    146  */
    147 class ANTLRParser {
    148 protected:
    149 	/* class variables */
    150 	static SetWordType bitmask[sizeof(SetWordType)*8];
    151 	static char eMsgBuffer[500];
    152 
    153 protected:
    154 	int LLk;					// number of lookahead symbols (old LL_K)
    155 	int demand_look;
    156 	ANTLRTokenType eofToken;			// when do I stop during resynch()s
    157 	int bsetsize;           			// size of bitsets created by ANTLR in
    158         								// units of SetWordType
    159 
    160 	ANTLRTokenBuffer *inputTokens;	//place to get input tokens
    161 
    162 	zzjmp_buf guess_start;		// where to jump back to upon failure
    163 	int guessing;				// if guessing (using (...)? predicate)
    164 
    165 	// infinite lookahead stuff
    166 	int can_use_inf_look;		// set by subclass (generated by ANTLR)
    167 	int inf_lap;
    168 	int inf_labase;
    169 	int inf_last;
    170 	int *_inf_line;
    171 
    172 	const ANTLRChar **token_tbl; // pointer to table of token type strings MR20 const
    173 
    174 	int dirty;					// used during demand lookahead
    175 
    176 	ANTLRTokenType *token_type;		// fast reference cache of token.getType()
    177 //	ANTLRLightweightToken **token;	// the token with all its attributes
    178 	int lap;
    179 	int labase;
    180 #ifdef ZZDEFER_FETCH
    181 	int stillToFetch;                               // MR19 V.H. Simonis
    182 #endif
    183 
    184 private:
    185 	void fill_inf_look();
    186 
    187 protected:
    188 	virtual void guess_fail() {                         // MR9 27-Sep-97 make virtual
    189         traceGuessFail();                               // MR10
    190         longjmp(guess_start.state, 1); }                // MR9
    191 	virtual void guess_done(ANTLRParserState *st) {     // MR9 27-Sep-97 make virtual
    192          restoreState(st); }                            // MR9
    193 	virtual int guess(ANTLRParserState *);              // MR9 27-Sep-97 make virtual
    194 	void look(int);
    195     int _match(ANTLRTokenType, ANTLRChar **, ANTLRTokenType *,
    196 			   _ANTLRTokenPtr *, SetWordType **);
    197     int _setmatch(SetWordType *, ANTLRChar **, ANTLRTokenType *,
    198 			   _ANTLRTokenPtr *, SetWordType **,
    199 			   SetWordType * tokclassErrset /* MR23 */);
    200     int _match_wsig(ANTLRTokenType);
    201     int _setmatch_wsig(SetWordType *);
    202     virtual void consume();
    203     virtual void resynch(SetWordType *wd,SetWordType mask); // MR21
    204 	void prime_lookahead();
    205 	virtual void tracein(const ANTLRChar *r);              // MR10
    206 	virtual void traceout(const ANTLRChar *r);             // MR10
    207 	static unsigned MODWORD(unsigned x) {return x & (EXT_WORDSIZE-1);}	// x % EXT_WORDSIZE // MR9
    208 	static unsigned DIVWORD(unsigned x) {return x >> EXT_LOGWORDSIZE;}	// x / EXT_WORDSIZE // MR9
    209 	int set_deg(SetWordType *);
    210 	int set_el(ANTLRTokenType, SetWordType *);
    211 	virtual void edecode(SetWordType *);				// MR1
    212 	virtual void FAIL(int k, ...);					    // MR1
    213     int                 traceOptionValue;                           // MR10
    214     int                 traceGuessOptionValue;                      // MR10
    215     const ANTLRChar     *traceCurrentRuleName;                      // MR10
    216     int                 traceDepth;                                 // MR10
    217     void                traceReset();                               // MR10
    218     virtual void        traceGuessFail();                           // MR10
    219     virtual void        traceGuessDone(const ANTLRParserState *);   // MR10
    220     int                 zzGuessSeq;                                 // MR10
    221 
    222 public:
    223 	ANTLRParser(ANTLRTokenBuffer *,
    224 				int k=1,
    225 				int use_inf_look=0,
    226 				int demand_look=0,
    227 				int bsetsize=1);
    228 	virtual ~ANTLRParser();
    229 
    230 	virtual void init();
    231 
    232 	ANTLRTokenType LA(int i)
    233 	{
    234 //
    235 //  MR14 demand look will always be 0 for C++ mode
    236 //
    237 ////	return demand_look ? token_type[(labase+(i)-1)&(LLk-1)] :
    238 ////						token_type[(lap+(i)-1)&(LLk-1)];
    239 
    240 // MR19 V.H. Simonis Defer fetch feature
    241 
    242 #ifdef ZZDEFER_FETCH
    243       undeferFetch();
    244 #endif
    245 	  return token_type[(lap+(i)-1)&(LLk-1)];
    246 	}
    247 	_ANTLRTokenPtr LT(int i);
    248 
    249 	void setEofToken(ANTLRTokenType t)	{ eofToken = t; }
    250 	ANTLRTokenType getEofToken() const  { return eofToken; }    // MR14
    251 
    252 	void noGarbageCollectTokens()	{ inputTokens->noGarbageCollectTokens(); }
    253 	void garbageCollectTokens()		{ inputTokens->garbageCollectTokens(); }
    254 
    255     virtual void syn(_ANTLRTokenPtr tok, ANTLRChar *egroup,
    256 					 SetWordType *eset, ANTLRTokenType etok, int k);
    257 	virtual void saveState(ANTLRParserState *);     // MR9 27-Sep-97 make virtual
    258 	virtual void restoreState(ANTLRParserState *);  // MR9 27-Sep-97 make virtual
    259 
    260 	virtual void panic(const char *msg); // MR20 const
    261 
    262 	static char *eMsgd(char *,int);
    263 	static char *eMsg(char *,char *);
    264 	static char *eMsg2(char *,char *,char *);
    265 
    266 	virtual int printMessage(FILE* pFile, const char* pFormat, ...); // MR23
    267 	virtual int printMessageV(FILE* pFile, const char* pFormat, va_list arglist); // MR23
    268 
    269 	void consumeUntil(SetWordType *st);
    270 	void consumeUntilToken(int t);
    271 
    272 	virtual int _setmatch_wdfltsig(SetWordType *tokensWanted,
    273 					 ANTLRTokenType tokenTypeOfSet,
    274 					 SetWordType *whatFollows);
    275 	virtual int _match_wdfltsig(ANTLRTokenType tokenWanted,
    276 					 SetWordType *whatFollows);
    277 
    278 	const ANTLRChar * parserTokenName(int tok);			// MR1
    279 
    280     int                 traceOptionValueDefault;        // MR11
    281     int                 traceOption(int delta);         // MR11
    282     int                 traceGuessOption(int delta);    // MR11
    283 
    284 //  MR8  5-Aug-97   S.Bochnak (at) microtool.com.pl
    285 //  MR8             Move resynch static local variable
    286 //  MR8               to class instance
    287 
    288     int                 syntaxErrCount;                      // MR12
    289     ANTLRTokenStream   *getLexer() const {                   // MR12
    290       return inputTokens ? inputTokens->getLexer() : 0; }    // MR12
    291 protected:                                              // MR8
    292     int     resynchConsumed;                            // MR8
    293     char    *zzFAILtext; // workarea required by zzFAIL // MR9
    294     void    undeferFetch();                             // MR19 V.H. Simonis
    295     int     isDeferFetchEnabled();                      // MR19 V.H. Simonis
    296     virtual void failedSemanticPredicate(const char* predicate); /* MR23 */
    297 };
    298 
    299 #define zzmatch(_t)							\
    300 	if ( !_match((ANTLRTokenType)_t, &zzMissText, &zzMissTok, \
    301 				 (_ANTLRTokenPtr *) &zzBadTok, &zzMissSet) ) goto fail;
    302 
    303 #define zzmatch_wsig(_t,handler)						\
    304 	if ( !_match_wsig((ANTLRTokenType)_t) ) if ( guessing ) zzGUESS_FAIL else {_signal=MismatchedToken; goto handler;}
    305 
    306 #define zzsetmatch(_ts,_tokclassErrset)							\
    307 	if ( !_setmatch(_ts, &zzMissText, &zzMissTok, \
    308 				 (_ANTLRTokenPtr *) &zzBadTok, &zzMissSet, _tokclassErrset) ) goto fail;
    309 
    310 #define zzsetmatch_wsig(_ts, handler)				\
    311 	if ( !_setmatch_wsig(_ts) ) if ( guessing ) zzGUESS_FAIL else {_signal=MismatchedToken; goto handler;}
    312 
    313 /* For the dflt signal matchers, a FALSE indicates that an error occurred
    314  * just like the other matchers, but in this case, the routine has already
    315  * recovered--we do NOT want to consume another token.  However, when
    316  * the match was successful, we do want to consume hence _signal=0 so that
    317  * a token is consumed by the "if (!_signal) consume(); _signal=NoSignal;"
    318  * preamble.
    319  */
    320 #define zzsetmatch_wdfltsig(tokensWanted, tokenTypeOfSet, whatFollows) \
    321 	if ( !_setmatch_wdfltsig(tokensWanted, tokenTypeOfSet, whatFollows) ) \
    322 		_signal = MismatchedToken;
    323 
    324 #define zzmatch_wdfltsig(tokenWanted, whatFollows) \
    325 	if ( !_match_wdfltsig(tokenWanted, whatFollows) ) _signal = MismatchedToken;
    326 
    327 
    328 //  MR1  10-Apr-97 	zzfailed_pred() macro does not backtrack in guess mode.
    329 //  MR1	   		    Identification and correction due to J. Lilley
    330 //
    331 //  MR23            Call virtual method to report error.
    332 //  MR23            Provide more control over failed predicate action
    333 //                  without any need for user to worry about guessing internals.
    334 
    335 #ifndef zzfailed_pred
    336 #define zzfailed_pred(_p,_hasuseraction,_useraction) \
    337   if (guessing) { \
    338     zzGUESS_FAIL; \
    339   } else { \
    340     zzfailed_pred_action(_p,_hasuseraction,_useraction) \
    341   }
    342 #endif
    343 
    344 //  MR23            Provide more control over failed predicate action
    345 //                  without any need for user to worry about guessing internals.
    346 //                  _hasuseraction == 0 => no user specified error action
    347 //                  _hasuseraction == 1 => user specified error action
    348 
    349 #ifndef zzfailed_pred_action
    350 #define zzfailed_pred_action(_p,_hasuseraction,_useraction) \
    351     if (_hasuseraction) { _useraction } else { failedSemanticPredicate(_p); }
    352 #endif
    353 
    354 #define zzRULE \
    355 		SetWordType *zzMissSet=NULL; ANTLRTokenType zzMissTok=(ANTLRTokenType)0;	\
    356 		_ANTLRTokenPtr zzBadTok=NULL; ANTLRChar *zzBadText=(ANTLRChar *)"";	\
    357 		int zzErrk=1,zzpf=0; \
    358         zzTRACEdata \
    359 		ANTLRChar *zzMissText=(ANTLRChar *)"";
    360 
    361 #endif
    362 
    363         /* S t a n d a r d  E x c e p t i o n  S i g n a l s */
    364 
    365 #define NoSignal			0
    366 #define MismatchedToken		1
    367 #define NoViableAlt			2
    368 #define NoSemViableAlt		3
    369 
    370 /* MR7  Allow more control over signalling                                  */
    371 /*        by adding "Unwind" and "SetSignal"                                */
    372 
    373 #define Unwind              4
    374 #define setSignal(newValue) *_retsignal=_signal=(newValue)
    375 #define suppressSignal       *_retsignal=_signal=0
    376 #define exportSignal        *_retsignal=_signal
    377