Home | History | Annotate | Download | only in v3
      1 /*
      2  [The "BSD license"]
      3  Copyright (c) 2010 Terence Parr
      4  All rights reserved.
      5 
      6  Redistribution and use in source and binary forms, with or without
      7  modification, are permitted provided that the following conditions
      8  are met:
      9  1. Redistributions of source code must retain the above copyright
     10     notice, this list of conditions and the following disclaimer.
     11  2. Redistributions in binary form must reproduce the above copyright
     12     notice, this list of conditions and the following disclaimer in the
     13     documentation and/or other materials provided with the distribution.
     14  3. The name of the author may not be used to endorse or promote products
     15     derived from this software without specific prior written permission.
     16 
     17  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     18  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     19  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     20  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     21  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     22  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     26  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 */
     28 /** ANTLR v3 grammar written in ANTLR v3 with AST construction */
     29 grammar ANTLRv3;
     30 
     31 options {
     32 	output=AST;
     33 	ASTLabelType=CommonTree;
     34 }
     35 
     36 tokens {
     37 	DOC_COMMENT;
     38 	PARSER;
     39     LEXER;
     40     RULE;
     41     BLOCK;
     42     OPTIONAL;
     43     CLOSURE;
     44     POSITIVE_CLOSURE;
     45     SYNPRED;
     46     RANGE;
     47     CHAR_RANGE;
     48     EPSILON;
     49     ALT;
     50     EOR;
     51     EOB;
     52     EOA; // end of alt
     53     ID;
     54     ARG;
     55     ARGLIST;
     56     RET='returns';
     57     LEXER_GRAMMAR;
     58     PARSER_GRAMMAR;
     59     TREE_GRAMMAR;
     60     COMBINED_GRAMMAR;
     61     LABEL; // $x used in rewrite rules
     62     TEMPLATE;
     63     SCOPE='scope';
     64     SEMPRED;
     65     GATED_SEMPRED; // {p}? =>
     66     SYN_SEMPRED; // (...) =>   it's a manually-specified synpred converted to sempred
     67     BACKTRACK_SEMPRED; // auto backtracking mode syn pred converted to sempred
     68     FRAGMENT='fragment';
     69     TREE_BEGIN='^(';
     70     ROOT='^';
     71     BANG='!';
     72     RANGE='..';
     73     REWRITE='->';
     74     AT='@';
     75     LABEL_ASSIGN='=';
     76     LIST_LABEL_ASSIGN='+=';
     77 }
     78 
     79 @parser::header
     80 {
     81     package org.antlr.grammar.v3;
     82 }
     83 @lexer::header
     84 {
     85     package org.antlr.grammar.v3;
     86 }
     87 
     88 @members {
     89 	int gtype;
     90 }
     91 
     92 grammarDef
     93     :   DOC_COMMENT?
     94     	(	'lexer'  {gtype=LEXER_GRAMMAR;}    // pure lexer
     95     	|   'parser' {gtype=PARSER_GRAMMAR;}   // pure parser
     96     	|   'tree'   {gtype=TREE_GRAMMAR;}     // a tree parser
     97     	|		     {gtype=COMBINED_GRAMMAR;} // merged parser/lexer
     98     	)
     99     	g='grammar' id ';' optionsSpec? tokensSpec? attrScope* action*
    100     	rule+
    101     	EOF
    102     	-> ^( {adaptor.create(gtype,$g)}
    103     		  id DOC_COMMENT? optionsSpec? tokensSpec? attrScope* action* rule+
    104     		)
    105     ;
    106 
    107 tokensSpec
    108 	:	TOKENS tokenSpec+ '}' -> ^(TOKENS tokenSpec+)
    109 	;
    110 
    111 tokenSpec
    112 	:	TOKEN_REF
    113 		(	'=' (lit=STRING_LITERAL|lit=CHAR_LITERAL)	-> ^('=' TOKEN_REF $lit)
    114 		|												-> TOKEN_REF
    115 		)
    116 		';'
    117 	;
    118 
    119 attrScope
    120 	:	'scope' id ACTION -> ^('scope' id ACTION)
    121 	;
    122 
    123 /** Match stuff like @parser::members {int i;} */
    124 action
    125 	:	'@' (actionScopeName '::')? id ACTION -> ^('@' actionScopeName? id ACTION)
    126 	;
    127 
    128 /** Sometimes the scope names will collide with keywords; allow them as
    129  *  ids for action scopes.
    130  */
    131 actionScopeName
    132 	:	id
    133 	|	l='lexer'	-> ID[$l]
    134     |   p='parser'	-> ID[$p]
    135 	;
    136 
    137 optionsSpec
    138 	:	OPTIONS (option ';')+ '}' -> ^(OPTIONS option+)
    139 	;
    140 
    141 option
    142     :   id '=' optionValue -> ^('=' id optionValue)
    143  	;
    144 
    145 optionValue
    146     :   qid
    147     |   STRING_LITERAL
    148     |   CHAR_LITERAL
    149     |   INT
    150     |	s='*' -> STRING_LITERAL[$s]  // used for k=*
    151     ;
    152 
    153 rule
    154 scope {
    155 	String name;
    156 }
    157 	:	DOC_COMMENT?
    158 		( modifier=('protected'|'public'|'private'|'fragment') )?
    159 		id {$rule::name = $id.text;}
    160 		'!'?
    161 		( arg=ARG_ACTION )?
    162 		( 'returns' rt=ARG_ACTION  )?
    163 		throwsSpec? optionsSpec? ruleScopeSpec? ruleAction*
    164 		':'	altList	';'
    165 		exceptionGroup?
    166 	    -> ^( RULE id {modifier!=null?adaptor.create(modifier):null} ^(ARG[$arg] $arg)? ^('returns' $rt)?
    167 	    	  throwsSpec? optionsSpec? ruleScopeSpec? ruleAction*
    168 	    	  altList
    169 	    	  exceptionGroup?
    170 	    	  EOR["EOR"]
    171 	    	)
    172 	;
    173 
    174 /** Match stuff like @init {int i;} */
    175 ruleAction
    176 	:	'@' id ACTION -> ^('@' id ACTION)
    177 	;
    178 
    179 throwsSpec
    180 	:	'throws' id ( ',' id )* -> ^('throws' id+)
    181 	;
    182 
    183 ruleScopeSpec
    184 	:	'scope' ACTION -> ^('scope' ACTION)
    185 	|	'scope' id (',' id)* ';' -> ^('scope' id+)
    186 	|	'scope' ACTION
    187 		'scope' id (',' id)* ';'
    188 		-> ^('scope' ACTION id+ )
    189 	;
    190 
    191 block
    192     :   lp='('
    193 		( (opts=optionsSpec)? ':' )?
    194 		altpair ( '|' altpair )*
    195         rp=')'
    196         -> ^( BLOCK[$lp,"BLOCK"] optionsSpec? altpair+ EOB[$rp,"EOB"] )
    197     ;
    198 
    199 altpair : alternative rewrite ;
    200 
    201 altList
    202 @init {
    203 	// must create root manually as it's used by invoked rules in real antlr tool.
    204 	// leave here to demonstrate use of {...} in rewrite rule
    205 	// it's really BLOCK[firstToken,"BLOCK"]; set line/col to previous ( or : token.
    206     CommonTree blkRoot = (CommonTree)adaptor.create(BLOCK,input.LT(-1),"BLOCK");
    207 }
    208     :   altpair ( '|' altpair )* -> ^( {blkRoot} altpair+ EOB["EOB"] )
    209     ;
    210 
    211 alternative
    212 @init {
    213 	Token firstToken = input.LT(1);
    214 	Token prevToken = input.LT(-1); // either : or | I think
    215 }
    216     :   element+ -> ^(ALT[firstToken,"ALT"] element+ EOA["EOA"])
    217     |   -> ^(ALT[prevToken,"ALT"] EPSILON[prevToken,"EPSILON"] EOA["EOA"])
    218     ;
    219 
    220 exceptionGroup
    221 	:	( exceptionHandler )+ ( finallyClause )?
    222 	|	finallyClause
    223     ;
    224 
    225 exceptionHandler
    226     :    'catch' ARG_ACTION ACTION -> ^('catch' ARG_ACTION ACTION)
    227     ;
    228 
    229 finallyClause
    230     :    'finally' ACTION -> ^('finally' ACTION)
    231     ;
    232 
    233 element
    234 	:	id (labelOp='='|labelOp='+=') atom
    235 		(	ebnfSuffix	-> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] ^($labelOp id atom) EOA["EOA"]) EOB["EOB"]))
    236 		|				-> ^($labelOp id atom)
    237 		)
    238 	|	id (labelOp='='|labelOp='+=') block
    239 		(	ebnfSuffix	-> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] ^($labelOp id block) EOA["EOA"]) EOB["EOB"]))
    240 		|				-> ^($labelOp id block)
    241 		)
    242 	|	atom
    243 		(	ebnfSuffix	-> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] atom EOA["EOA"]) EOB["EOB"]) )
    244 		|				-> atom
    245 		)
    246 	|	ebnf
    247 	|   ACTION
    248 	|   SEMPRED ( g='=>' -> GATED_SEMPRED[$g] | -> SEMPRED )
    249 	|   treeSpec
    250 		(	ebnfSuffix	-> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] treeSpec EOA["EOA"]) EOB["EOB"]) )
    251 		|				-> treeSpec
    252 		)
    253 	;
    254 
    255 atom:   terminal
    256 	|	range
    257 		(	(op='^'|op='!')	-> ^($op range)
    258 		|					-> range
    259 		)
    260     |	notSet
    261 		(	(op='^'|op='!')	-> ^($op notSet)
    262 		|					-> notSet
    263 		)
    264     |   RULE_REF ARG_ACTION?
    265 		(	(op='^'|op='!')	-> ^($op RULE_REF ARG_ACTION?)
    266 		|					-> ^(RULE_REF ARG_ACTION?)
    267 		)
    268     ;
    269 
    270 notSet
    271 	:	'~'
    272 		(	notTerminal elementOptions?	-> ^('~' notTerminal elementOptions?)
    273 		|	block elementOptions?		-> ^('~' block elementOptions?)
    274 		)
    275 	;
    276 
    277 notTerminal
    278 	:   CHAR_LITERAL
    279 	|	TOKEN_REF
    280 	|	STRING_LITERAL
    281 	;
    282 
    283 elementOptions
    284 	:	'<' qid '>'					 -> ^(OPTIONS qid)
    285 	|	'<' option (';' option)* '>' -> ^(OPTIONS option+)
    286 	;
    287 
    288 elementOption
    289 	:	id '=' optionValue -> ^('=' id optionValue)
    290 	;
    291 
    292 treeSpec
    293 	:	'^(' element ( element )+ ')' -> ^(TREE_BEGIN element+)
    294 	;
    295 
    296 range!
    297 	:	c1=CHAR_LITERAL RANGE c2=CHAR_LITERAL elementOptions?
    298 		-> ^(CHAR_RANGE[$c1,".."] $c1 $c2 elementOptions?)
    299 	;
    300 
    301 terminal
    302     :   (	CHAR_LITERAL elementOptions?    	  -> ^(CHAR_LITERAL elementOptions?)
    303 	    	// Args are only valid for lexer rules
    304 		|   TOKEN_REF ARG_ACTION? elementOptions? -> ^(TOKEN_REF ARG_ACTION? elementOptions?)
    305 		|   STRING_LITERAL elementOptions?		  -> ^(STRING_LITERAL elementOptions?)
    306 		|   '.' elementOptions?		 			  -> ^('.' elementOptions?)
    307 		)
    308 		(	'^'							-> ^('^' $terminal)
    309 		|	'!' 						-> ^('!' $terminal)
    310 		)?
    311 	;
    312 
    313 /** Matches ENBF blocks (and token sets via block rule) */
    314 ebnf
    315 @init {
    316     Token firstToken = input.LT(1);
    317 }
    318 @after {
    319 	$ebnf.tree.getToken().setLine(firstToken.getLine());
    320 	$ebnf.tree.getToken().setCharPositionInLine(firstToken.getCharPositionInLine());
    321 }
    322 	:	block
    323 		(	op='?'	-> ^(OPTIONAL[op] block)
    324 		|	op='*'	-> ^(CLOSURE[op] block)
    325 		|	op='+'	-> ^(POSITIVE_CLOSURE[op] block)
    326 		|   '=>'	// syntactic predicate
    327 					-> {gtype==COMBINED_GRAMMAR &&
    328 					    Character.isUpperCase($rule::name.charAt(0))}?
    329 					   // if lexer rule in combined, leave as pred for lexer
    330 					   ^(SYNPRED["=>"] block)
    331 					// in real antlr tool, text for SYN_SEMPRED is predname
    332 					-> SYN_SEMPRED
    333         |			-> block
    334 		)
    335 	;
    336 
    337 ebnfSuffix
    338 @init {
    339 	Token op = input.LT(1);
    340 }
    341 	:	'?'	-> OPTIONAL[op]
    342   	|	'*' -> CLOSURE[op]
    343    	|	'+' -> POSITIVE_CLOSURE[op]
    344 	;
    345 
    346 
    347 
    348 // R E W R I T E  S Y N T A X
    349 
    350 rewrite
    351 @init {
    352 	Token firstToken = input.LT(1);
    353 }
    354 	:	(rew+='->' preds+=SEMPRED predicated+=rewrite_alternative)*
    355 		rew2='->' last=rewrite_alternative
    356         -> ^($rew $preds $predicated)* ^($rew2 $last)
    357 	|
    358 	;
    359 
    360 rewrite_alternative
    361 options {backtrack=true;}
    362 	:	rewrite_template
    363 	|	rewrite_tree_alternative
    364    	|   /* empty rewrite */ -> ^(ALT["ALT"] EPSILON["EPSILON"] EOA["EOA"])
    365 	;
    366 
    367 rewrite_tree_block
    368     :   lp='(' rewrite_tree_alternative ')'
    369     	-> ^(BLOCK[$lp,"BLOCK"] rewrite_tree_alternative EOB[$lp,"EOB"])
    370     ;
    371 
    372 rewrite_tree_alternative
    373     :	rewrite_tree_element+ -> ^(ALT["ALT"] rewrite_tree_element+ EOA["EOA"])
    374     ;
    375 
    376 rewrite_tree_element
    377 	:	rewrite_tree_atom
    378 	|	rewrite_tree_atom ebnfSuffix
    379 		-> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] rewrite_tree_atom EOA["EOA"]) EOB["EOB"]))
    380 	|   rewrite_tree
    381 		(	ebnfSuffix
    382 			-> ^(ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] rewrite_tree EOA["EOA"]) EOB["EOB"]))
    383 		|	-> rewrite_tree
    384 		)
    385 	|   rewrite_tree_ebnf
    386 	;
    387 
    388 rewrite_tree_atom
    389     :   CHAR_LITERAL
    390 	|   TOKEN_REF ARG_ACTION? -> ^(TOKEN_REF ARG_ACTION?) // for imaginary nodes
    391     |   RULE_REF
    392 	|   STRING_LITERAL
    393 	|   d='$' id -> LABEL[$d,$id.text] // reference to a label in a rewrite rule
    394 	|	ACTION
    395 	;
    396 
    397 rewrite_tree_ebnf
    398 @init {
    399     Token firstToken = input.LT(1);
    400 }
    401 @after {
    402 	$rewrite_tree_ebnf.tree.getToken().setLine(firstToken.getLine());
    403 	$rewrite_tree_ebnf.tree.getToken().setCharPositionInLine(firstToken.getCharPositionInLine());
    404 }
    405 	:	rewrite_tree_block ebnfSuffix -> ^(ebnfSuffix rewrite_tree_block)
    406 	;
    407 
    408 rewrite_tree
    409 	:	'^(' rewrite_tree_atom rewrite_tree_element* ')'
    410 		-> ^(TREE_BEGIN rewrite_tree_atom rewrite_tree_element* )
    411 	;
    412 
    413 /** Build a tree for a template rewrite:
    414       ^(TEMPLATE (ID|ACTION) ^(ARGLIST ^(ARG ID ACTION) ...) )
    415     where ARGLIST is always there even if no args exist.
    416     ID can be "template" keyword.  If first child is ACTION then it's
    417     an indirect template ref
    418 
    419     -> foo(a={...}, b={...})
    420     -> ({string-e})(a={...}, b={...})  // e evaluates to template name
    421     -> {%{$ID.text}} // create literal template from string (done in ActionTranslator)
    422 	-> {st-expr} // st-expr evaluates to ST
    423  */
    424 rewrite_template
    425 	:   // -> template(a={...},...) "..."    inline template
    426 		id lp='(' rewrite_template_args	')'
    427 		( str=DOUBLE_QUOTE_STRING_LITERAL | str=DOUBLE_ANGLE_STRING_LITERAL )
    428 		-> ^(TEMPLATE[$lp,"TEMPLATE"] id rewrite_template_args $str)
    429 
    430 	|	// -> foo(a={...}, ...)
    431 		rewrite_template_ref
    432 
    433 	|	// -> ({expr})(a={...}, ...)
    434 		rewrite_indirect_template_head
    435 
    436 	|	// -> {...}
    437 		ACTION
    438 	;
    439 
    440 /** -> foo(a={...}, ...) */
    441 rewrite_template_ref
    442 	:	id lp='(' rewrite_template_args	')'
    443 		-> ^(TEMPLATE[$lp,"TEMPLATE"] id rewrite_template_args)
    444 	;
    445 
    446 /** -> ({expr})(a={...}, ...) */
    447 rewrite_indirect_template_head
    448 	:	lp='(' ACTION ')' '(' rewrite_template_args ')'
    449 		-> ^(TEMPLATE[$lp,"TEMPLATE"] ACTION rewrite_template_args)
    450 	;
    451 
    452 rewrite_template_args
    453 	:	rewrite_template_arg (',' rewrite_template_arg)*
    454 		-> ^(ARGLIST rewrite_template_arg+)
    455 	|	-> ARGLIST
    456 	;
    457 
    458 rewrite_template_arg
    459 	:   id '=' ACTION -> ^(ARG[$id.start] id ACTION)
    460 	;
    461 
    462 qid :	id ('.' id)* ;
    463 
    464 id	:	TOKEN_REF -> ID[$TOKEN_REF]
    465 	|	RULE_REF  -> ID[$RULE_REF]
    466 	;
    467 
    468 // L E X I C A L   R U L E S
    469 
    470 SL_COMMENT
    471  	:	'//'
    472  	 	(	' $ANTLR ' SRC // src directive
    473  		|	~('\r'|'\n')*
    474 		)
    475 		'\r'? '\n'
    476 		{$channel=HIDDEN;}
    477 	;
    478 
    479 ML_COMMENT
    480 	:	'/*' {if (input.LA(1)=='*') $type=DOC_COMMENT; else $channel=HIDDEN;} .* '*/'
    481 	;
    482 
    483 CHAR_LITERAL
    484 	:	'\'' LITERAL_CHAR '\''
    485 	;
    486 
    487 STRING_LITERAL
    488 	:	'\'' LITERAL_CHAR LITERAL_CHAR* '\''
    489 	;
    490 
    491 fragment
    492 LITERAL_CHAR
    493 	:	ESC
    494 	|	~('\''|'\\')
    495 	;
    496 
    497 DOUBLE_QUOTE_STRING_LITERAL
    498 	:	'"' (ESC | ~('\\'|'"'))* '"'
    499 	;
    500 
    501 DOUBLE_ANGLE_STRING_LITERAL
    502 	:	'<<' .* '>>'
    503 	;
    504 
    505 fragment
    506 ESC	:	'\\'
    507 		(	'n'
    508 		|	'r'
    509 		|	't'
    510 		|	'b'
    511 		|	'f'
    512 		|	'"'
    513 		|	'\''
    514 		|	'\\'
    515 		|	'>'
    516 		|	'u' XDIGIT XDIGIT XDIGIT XDIGIT
    517 		|	. // unknown, leave as it is
    518 		)
    519 	;
    520 
    521 fragment
    522 XDIGIT :
    523 		'0' .. '9'
    524 	|	'a' .. 'f'
    525 	|	'A' .. 'F'
    526 	;
    527 
    528 INT	:	'0'..'9'+
    529 	;
    530 
    531 ARG_ACTION
    532 	:	NESTED_ARG_ACTION
    533 	;
    534 
    535 fragment
    536 NESTED_ARG_ACTION :
    537 	'['
    538 	(	options {greedy=false; k=1;}
    539 	:	NESTED_ARG_ACTION
    540 	|	ACTION_STRING_LITERAL
    541 	|	ACTION_CHAR_LITERAL
    542 	|	.
    543 	)*
    544 	']'
    545 	//{setText(getText().substring(1, getText().length()-1));}
    546 	;
    547 
    548 ACTION
    549 	:	NESTED_ACTION ( '?' {$type = SEMPRED;} )?
    550 	;
    551 
    552 fragment
    553 NESTED_ACTION :
    554 	'{'
    555 	(	options {greedy=false; k=2;}
    556 	:	NESTED_ACTION
    557 	|	SL_COMMENT
    558 	|	ML_COMMENT
    559 	|	ACTION_STRING_LITERAL
    560 	|	ACTION_CHAR_LITERAL
    561 	|	.
    562 	)*
    563 	'}'
    564    ;
    565 
    566 fragment
    567 ACTION_CHAR_LITERAL
    568 	:	'\'' (ACTION_ESC|~('\\'|'\'')) '\''
    569 	;
    570 
    571 fragment
    572 ACTION_STRING_LITERAL
    573 	:	'"' (ACTION_ESC|~('\\'|'"'))* '"'
    574 	;
    575 
    576 fragment
    577 ACTION_ESC
    578 	:	'\\\''
    579 	|	'\\' '"' // ANTLR doesn't like: '\\"'
    580 	|	'\\' ~('\''|'"')
    581 	;
    582 
    583 TOKEN_REF
    584 	:	'A'..'Z' ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
    585 	;
    586 
    587 RULE_REF
    588 	:	'a'..'z' ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
    589 	;
    590 
    591 /** Match the start of an options section.  Don't allow normal
    592  *  action processing on the {...} as it's not a action.
    593  */
    594 OPTIONS
    595 	:	'options' WS_LOOP '{'
    596 	;
    597 
    598 TOKENS
    599 	:	'tokens' WS_LOOP '{'
    600 	;
    601 
    602 /** Reset the file and line information; useful when the grammar
    603  *  has been generated so that errors are shown relative to the
    604  *  original file like the old C preprocessor used to do.
    605  */
    606 fragment
    607 SRC	:	'src' ' ' file=ACTION_STRING_LITERAL ' ' line=INT
    608 	;
    609 
    610 WS	:	(	' '
    611 		|	'\t'
    612 		|	'\r'? '\n'
    613 		)+
    614 		{$channel=HIDDEN;}
    615 	;
    616 
    617 fragment
    618 WS_LOOP
    619 	:	(	WS
    620 		|	SL_COMMENT
    621 		|	ML_COMMENT
    622 		)*
    623 	;
    624 
    625