1 /* 2 [The "BSD license"] 3 Copyright (c) 2010 Terence Parr 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions 8 are met: 9 1. Redistributions of source code must retain the above copyright 10 notice, this list of conditions and the following disclaimer. 11 2. Redistributions in binary form must reproduce the above copyright 12 notice, this list of conditions and the following disclaimer in the 13 documentation and/or other materials provided with the distribution. 14 3. The name of the author may not be used to endorse or promote products 15 derived from this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 /** ANTLR v3 grammar written in ANTLR v3 with AST construction */ 29 grammar ANTLRv3; 30 31 options { 32 output=AST; 33 ASTLabelType=CommonTree; 34 } 35 36 tokens { 37 DOC_COMMENT; 38 PARSER; 39 LEXER; 40 RULE; 41 BLOCK; 42 OPTIONAL; 43 CLOSURE; 44 POSITIVE_CLOSURE; 45 SYNPRED; 46 RANGE; 47 CHAR_RANGE; 48 EPSILON; 49 ALT; 50 EOR; 51 EOB; 52 EOA; // end of alt 53 ID; 54 ARG; 55 ARGLIST; 56 RET='returns'; 57 LEXER_GRAMMAR; 58 PARSER_GRAMMAR; 59 TREE_GRAMMAR; 60 COMBINED_GRAMMAR; 61 LABEL; // $x used in rewrite rules 62 TEMPLATE; 63 SCOPE='scope'; 64 SEMPRED; 65 GATED_SEMPRED; // {p}? => 66 SYN_SEMPRED; // (...) => it's a manually-specified synpred converted to sempred 67 BACKTRACK_SEMPRED; // auto backtracking mode syn pred converted to sempred 68 FRAGMENT='fragment'; 69 TREE_BEGIN='^('; 70 ROOT='^'; 71 BANG='!'; 72 RANGE='..'; 73 REWRITE='->'; 74 AT='@'; 75 LABEL_ASSIGN='='; 76 LIST_LABEL_ASSIGN='+='; 77 } 78 79 @parser::header 80 { 81 package org.antlr.grammar.v3; 82 } 83 @lexer::header 84 { 85 package org.antlr.grammar.v3; 86 } 87 88 @members { 89 int gtype; 90 } 91 92 grammarDef 93 : DOC_COMMENT? 94 ( 'lexer' {gtype=LEXER_GRAMMAR;} // pure lexer 95 | 'parser' {gtype=PARSER_GRAMMAR;} // pure parser 96 | 'tree' {gtype=TREE_GRAMMAR;} // a tree parser 97 | {gtype=COMBINED_GRAMMAR;} // merged parser/lexer 98 ) 99 g='grammar' id ';' optionsSpec? tokensSpec? attrScope* action* 100 rule+ 101 EOF 102 -> ^( {adaptor.create(gtype,$g)} 103 id DOC_COMMENT? optionsSpec? tokensSpec? attrScope* action* rule+ 104 ) 105 ; 106 107 tokensSpec 108 : TOKENS tokenSpec+ '}' -> ^(TOKENS tokenSpec+) 109 ; 110 111 tokenSpec 112 : TOKEN_REF 113 ( '=' (lit=STRING_LITERAL|lit=CHAR_LITERAL) -> ^('=' TOKEN_REF $lit) 114 | -> TOKEN_REF 115 ) 116 ';' 117 ; 118 119 attrScope 120 : 'scope' id ACTION -> ^('scope' id ACTION) 121 ; 122 123 /** Match stuff like @parser::members {int i;} */ 124 action 125 : '@' (actionScopeName '::')? id ACTION -> ^('@' actionScopeName? id ACTION) 126 ; 127 128 /** Sometimes the scope names will collide with keywords; allow them as 129 * ids for action scopes. 130 */ 131 actionScopeName 132 : id 133 | l='lexer' -> ID[$l] 134 | p='parser' -> ID[$p] 135 ; 136 137 optionsSpec 138 : OPTIONS (option ';')+ '}' -> ^(OPTIONS option+) 139 ; 140 141 option 142 : id '=' optionValue -> ^('=' id optionValue) 143 ; 144 145 optionValue 146 : qid 147 | STRING_LITERAL 148 | CHAR_LITERAL 149 | INT 150 | s='*' -> STRING_LITERAL[$s] // used for k=* 151 ; 152 153 rule 154 scope { 155 String name; 156 } 157 : DOC_COMMENT? 158 ( modifier=('protected'|'public'|'private'|'fragment') )? 159 id {$rule::name = $id.text;} 160 '!'? 161 ( arg=ARG_ACTION )? 162 ( 'returns' rt=ARG_ACTION )? 163 throwsSpec? optionsSpec? ruleScopeSpec? ruleAction* 164 ':' altList ';' 165 exceptionGroup? 166 -> ^( RULE id {modifier!=null?adaptor.create(modifier):null} ^(ARG[$arg] $arg)? ^('returns' $rt)? 167 throwsSpec? optionsSpec? ruleScopeSpec? ruleAction* 168 altList 169 exceptionGroup? 170 EOR["EOR"] 171 ) 172 ; 173 174 /** Match stuff like @init {int i;} */ 175 ruleAction 176 : '@' id ACTION -> ^('@' id ACTION) 177 ; 178 179 throwsSpec 180 : 'throws' id ( ',' id )* -> ^('throws' id+) 181 ; 182 183 ruleScopeSpec 184 : 'scope' ACTION -> ^('scope' ACTION) 185 | 'scope' id (',' id)* ';' -> ^('scope' id+) 186 | 'scope' ACTION 187 'scope' id (',' id)* ';' 188 -> ^('scope' ACTION id+ ) 189 ; 190 191 block 192 : lp='(' 193 ( (opts=optionsSpec)? ':' )? 194 altpair ( '|' altpair )* 195 rp=')' 196 -> ^( BLOCK[$lp,"BLOCK"] optionsSpec? altpair+ EOB[$rp,"EOB"] ) 197 ; 198 199 altpair : alternative rewrite ; 200 201 altList 202 @init { 203 // must create root manually as it's used by invoked rules in real antlr tool. 204 // leave here to demonstrate use of {...} in rewrite rule 205 // it's really BLOCK[firstToken,"BLOCK"]; set line/col to previous ( or : token. 206 CommonTree blkRoot = (CommonTree)adaptor.create(BLOCK,input.LT(-1),"BLOCK"); 207 } 208 : altpair ( '|' altpair )* -> ^( {blkRoot} altpair+ EOB["EOB"] ) 209 ; 210 211 alternative 212 @init { 213 Token firstToken = input.LT(1); 214 Token prevToken = input.LT(-1); // either : or | I think 215 } 216 : element+ -> ^(ALT[firstToken,"ALT"] element+ EOA["EOA"]) 217 | -> ^(ALT[prevToken,"ALT"] EPSILON[prevToken,"EPSILON"] EOA["EOA"]) 218 ; 219 220 exceptionGroup 221 : ( exceptionHandler )+ ( finallyClause )? 222 | finallyClause 223 ; 224 225 exceptionHandler 226 : 'catch' ARG_ACTION ACTION -> ^('catch' ARG_ACTION ACTION) 227 ; 228 229 finallyClause 230 : 'finally' ACTION -> ^('finally' ACTION) 231 ; 232 233 element 234 : id (labelOp='='|labelOp='+=') atom 235 ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] ^($labelOp id atom) EOA["EOA"]) EOB["EOB"])) 236 | -> ^($labelOp id atom) 237 ) 238 | id (labelOp='='|labelOp='+=') block 239 ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] ^($labelOp id block) EOA["EOA"]) EOB["EOB"])) 240 | -> ^($labelOp id block) 241 ) 242 | atom 243 ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] atom EOA["EOA"]) EOB["EOB"]) ) 244 | -> atom 245 ) 246 | ebnf 247 | ACTION 248 | SEMPRED ( g='=>' -> GATED_SEMPRED[$g] | -> SEMPRED ) 249 | treeSpec 250 ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] treeSpec EOA["EOA"]) EOB["EOB"]) ) 251 | -> treeSpec 252 ) 253 ; 254 255 atom: terminal 256 | range 257 ( (op='^'|op='!') -> ^($op range) 258 | -> range 259 ) 260 | notSet 261 ( (op='^'|op='!') -> ^($op notSet) 262 | -> notSet 263 ) 264 | RULE_REF ARG_ACTION? 265 ( (op='^'|op='!') -> ^($op RULE_REF ARG_ACTION?) 266 | -> ^(RULE_REF ARG_ACTION?) 267 ) 268 ; 269 270 notSet 271 : '~' 272 ( notTerminal elementOptions? -> ^('~' notTerminal elementOptions?) 273 | block elementOptions? -> ^('~' block elementOptions?) 274 ) 275 ; 276 277 notTerminal 278 : CHAR_LITERAL 279 | TOKEN_REF 280 | STRING_LITERAL 281 ; 282 283 elementOptions 284 : '<' qid '>' -> ^(OPTIONS qid) 285 | '<' option (';' option)* '>' -> ^(OPTIONS option+) 286 ; 287 288 elementOption 289 : id '=' optionValue -> ^('=' id optionValue) 290 ; 291 292 treeSpec 293 : '^(' element ( element )+ ')' -> ^(TREE_BEGIN element+) 294 ; 295 296 range! 297 : c1=CHAR_LITERAL RANGE c2=CHAR_LITERAL elementOptions? 298 -> ^(CHAR_RANGE[$c1,".."] $c1 $c2 elementOptions?) 299 ; 300 301 terminal 302 : ( CHAR_LITERAL elementOptions? -> ^(CHAR_LITERAL elementOptions?) 303 // Args are only valid for lexer rules 304 | TOKEN_REF ARG_ACTION? elementOptions? -> ^(TOKEN_REF ARG_ACTION? elementOptions?) 305 | STRING_LITERAL elementOptions? -> ^(STRING_LITERAL elementOptions?) 306 | '.' elementOptions? -> ^('.' elementOptions?) 307 ) 308 ( '^' -> ^('^' $terminal) 309 | '!' -> ^('!' $terminal) 310 )? 311 ; 312 313 /** Matches ENBF blocks (and token sets via block rule) */ 314 ebnf 315 @init { 316 Token firstToken = input.LT(1); 317 } 318 @after { 319 $ebnf.tree.getToken().setLine(firstToken.getLine()); 320 $ebnf.tree.getToken().setCharPositionInLine(firstToken.getCharPositionInLine()); 321 } 322 : block 323 ( op='?' -> ^(OPTIONAL[op] block) 324 | op='*' -> ^(CLOSURE[op] block) 325 | op='+' -> ^(POSITIVE_CLOSURE[op] block) 326 | '=>' // syntactic predicate 327 -> {gtype==COMBINED_GRAMMAR && 328 Character.isUpperCase($rule::name.charAt(0))}? 329 // if lexer rule in combined, leave as pred for lexer 330 ^(SYNPRED["=>"] block) 331 // in real antlr tool, text for SYN_SEMPRED is predname 332 -> SYN_SEMPRED 333 | -> block 334 ) 335 ; 336 337 ebnfSuffix 338 @init { 339 Token op = input.LT(1); 340 } 341 : '?' -> OPTIONAL[op] 342 | '*' -> CLOSURE[op] 343 | '+' -> POSITIVE_CLOSURE[op] 344 ; 345 346 347 348 // R E W R I T E S Y N T A X 349 350 rewrite 351 @init { 352 Token firstToken = input.LT(1); 353 } 354 : (rew+='->' preds+=SEMPRED predicated+=rewrite_alternative)* 355 rew2='->' last=rewrite_alternative 356 -> ^($rew $preds $predicated)* ^($rew2 $last) 357 | 358 ; 359 360 rewrite_alternative 361 options {backtrack=true;} 362 : rewrite_template 363 | rewrite_tree_alternative 364 | /* empty rewrite */ -> ^(ALT["ALT"] EPSILON["EPSILON"] EOA["EOA"]) 365 ; 366 367 rewrite_tree_block 368 : lp='(' rewrite_tree_alternative ')' 369 -> ^(BLOCK[$lp,"BLOCK"] rewrite_tree_alternative EOB[$lp,"EOB"]) 370 ; 371 372 rewrite_tree_alternative 373 : rewrite_tree_element+ -> ^(ALT["ALT"] rewrite_tree_element+ EOA["EOA"]) 374 ; 375 376 rewrite_tree_element 377 : rewrite_tree_atom 378 | rewrite_tree_atom ebnfSuffix 379 -> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] rewrite_tree_atom EOA["EOA"]) EOB["EOB"])) 380 | rewrite_tree 381 ( ebnfSuffix 382 -> ^(ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] rewrite_tree EOA["EOA"]) EOB["EOB"])) 383 | -> rewrite_tree 384 ) 385 | rewrite_tree_ebnf 386 ; 387 388 rewrite_tree_atom 389 : CHAR_LITERAL 390 | TOKEN_REF ARG_ACTION? -> ^(TOKEN_REF ARG_ACTION?) // for imaginary nodes 391 | RULE_REF 392 | STRING_LITERAL 393 | d='$' id -> LABEL[$d,$id.text] // reference to a label in a rewrite rule 394 | ACTION 395 ; 396 397 rewrite_tree_ebnf 398 @init { 399 Token firstToken = input.LT(1); 400 } 401 @after { 402 $rewrite_tree_ebnf.tree.getToken().setLine(firstToken.getLine()); 403 $rewrite_tree_ebnf.tree.getToken().setCharPositionInLine(firstToken.getCharPositionInLine()); 404 } 405 : rewrite_tree_block ebnfSuffix -> ^(ebnfSuffix rewrite_tree_block) 406 ; 407 408 rewrite_tree 409 : '^(' rewrite_tree_atom rewrite_tree_element* ')' 410 -> ^(TREE_BEGIN rewrite_tree_atom rewrite_tree_element* ) 411 ; 412 413 /** Build a tree for a template rewrite: 414 ^(TEMPLATE (ID|ACTION) ^(ARGLIST ^(ARG ID ACTION) ...) ) 415 where ARGLIST is always there even if no args exist. 416 ID can be "template" keyword. If first child is ACTION then it's 417 an indirect template ref 418 419 -> foo(a={...}, b={...}) 420 -> ({string-e})(a={...}, b={...}) // e evaluates to template name 421 -> {%{$ID.text}} // create literal template from string (done in ActionTranslator) 422 -> {st-expr} // st-expr evaluates to ST 423 */ 424 rewrite_template 425 : // -> template(a={...},...) "..." inline template 426 id lp='(' rewrite_template_args ')' 427 ( str=DOUBLE_QUOTE_STRING_LITERAL | str=DOUBLE_ANGLE_STRING_LITERAL ) 428 -> ^(TEMPLATE[$lp,"TEMPLATE"] id rewrite_template_args $str) 429 430 | // -> foo(a={...}, ...) 431 rewrite_template_ref 432 433 | // -> ({expr})(a={...}, ...) 434 rewrite_indirect_template_head 435 436 | // -> {...} 437 ACTION 438 ; 439 440 /** -> foo(a={...}, ...) */ 441 rewrite_template_ref 442 : id lp='(' rewrite_template_args ')' 443 -> ^(TEMPLATE[$lp,"TEMPLATE"] id rewrite_template_args) 444 ; 445 446 /** -> ({expr})(a={...}, ...) */ 447 rewrite_indirect_template_head 448 : lp='(' ACTION ')' '(' rewrite_template_args ')' 449 -> ^(TEMPLATE[$lp,"TEMPLATE"] ACTION rewrite_template_args) 450 ; 451 452 rewrite_template_args 453 : rewrite_template_arg (',' rewrite_template_arg)* 454 -> ^(ARGLIST rewrite_template_arg+) 455 | -> ARGLIST 456 ; 457 458 rewrite_template_arg 459 : id '=' ACTION -> ^(ARG[$id.start] id ACTION) 460 ; 461 462 qid : id ('.' id)* ; 463 464 id : TOKEN_REF -> ID[$TOKEN_REF] 465 | RULE_REF -> ID[$RULE_REF] 466 ; 467 468 // L E X I C A L R U L E S 469 470 SL_COMMENT 471 : '//' 472 ( ' $ANTLR ' SRC // src directive 473 | ~('\r'|'\n')* 474 ) 475 '\r'? '\n' 476 {$channel=HIDDEN;} 477 ; 478 479 ML_COMMENT 480 : '/*' {if (input.LA(1)=='*') $type=DOC_COMMENT; else $channel=HIDDEN;} .* '*/' 481 ; 482 483 CHAR_LITERAL 484 : '\'' LITERAL_CHAR '\'' 485 ; 486 487 STRING_LITERAL 488 : '\'' LITERAL_CHAR LITERAL_CHAR* '\'' 489 ; 490 491 fragment 492 LITERAL_CHAR 493 : ESC 494 | ~('\''|'\\') 495 ; 496 497 DOUBLE_QUOTE_STRING_LITERAL 498 : '"' (ESC | ~('\\'|'"'))* '"' 499 ; 500 501 DOUBLE_ANGLE_STRING_LITERAL 502 : '<<' .* '>>' 503 ; 504 505 fragment 506 ESC : '\\' 507 ( 'n' 508 | 'r' 509 | 't' 510 | 'b' 511 | 'f' 512 | '"' 513 | '\'' 514 | '\\' 515 | '>' 516 | 'u' XDIGIT XDIGIT XDIGIT XDIGIT 517 | . // unknown, leave as it is 518 ) 519 ; 520 521 fragment 522 XDIGIT : 523 '0' .. '9' 524 | 'a' .. 'f' 525 | 'A' .. 'F' 526 ; 527 528 INT : '0'..'9'+ 529 ; 530 531 ARG_ACTION 532 : NESTED_ARG_ACTION 533 ; 534 535 fragment 536 NESTED_ARG_ACTION : 537 '[' 538 ( options {greedy=false; k=1;} 539 : NESTED_ARG_ACTION 540 | ACTION_STRING_LITERAL 541 | ACTION_CHAR_LITERAL 542 | . 543 )* 544 ']' 545 //{setText(getText().substring(1, getText().length()-1));} 546 ; 547 548 ACTION 549 : NESTED_ACTION ( '?' {$type = SEMPRED;} )? 550 ; 551 552 fragment 553 NESTED_ACTION : 554 '{' 555 ( options {greedy=false; k=2;} 556 : NESTED_ACTION 557 | SL_COMMENT 558 | ML_COMMENT 559 | ACTION_STRING_LITERAL 560 | ACTION_CHAR_LITERAL 561 | . 562 )* 563 '}' 564 ; 565 566 fragment 567 ACTION_CHAR_LITERAL 568 : '\'' (ACTION_ESC|~('\\'|'\'')) '\'' 569 ; 570 571 fragment 572 ACTION_STRING_LITERAL 573 : '"' (ACTION_ESC|~('\\'|'"'))* '"' 574 ; 575 576 fragment 577 ACTION_ESC 578 : '\\\'' 579 | '\\' '"' // ANTLR doesn't like: '\\"' 580 | '\\' ~('\''|'"') 581 ; 582 583 TOKEN_REF 584 : 'A'..'Z' ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* 585 ; 586 587 RULE_REF 588 : 'a'..'z' ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* 589 ; 590 591 /** Match the start of an options section. Don't allow normal 592 * action processing on the {...} as it's not a action. 593 */ 594 OPTIONS 595 : 'options' WS_LOOP '{' 596 ; 597 598 TOKENS 599 : 'tokens' WS_LOOP '{' 600 ; 601 602 /** Reset the file and line information; useful when the grammar 603 * has been generated so that errors are shown relative to the 604 * original file like the old C preprocessor used to do. 605 */ 606 fragment 607 SRC : 'src' ' ' file=ACTION_STRING_LITERAL ' ' line=INT 608 ; 609 610 WS : ( ' ' 611 | '\t' 612 | '\r'? '\n' 613 )+ 614 {$channel=HIDDEN;} 615 ; 616 617 fragment 618 WS_LOOP 619 : ( WS 620 | SL_COMMENT 621 | ML_COMMENT 622 )* 623 ; 624 625