1 /* 2 [The "BSD licence"] 3 Copyright (c) 2007-2008 Leon Jen-Yuan Su 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions 8 are met: 9 10 1. Redistributions of source code must retain the above copyright 11 notice, this list of conditions and the following disclaimer. 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 3. The name of the author may not be used to endorse or promote products 16 derived from this software without specific prior written permission. 17 18 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 grammar gUnit; 30 options {language=Java;} 31 tokens { 32 OK = 'OK'; 33 FAIL = 'FAIL'; 34 DOC_COMMENT; 35 } 36 @header {package org.antlr.gunit;} 37 @lexer::header { 38 package org.antlr.gunit; 39 import java.io.BufferedReader; 40 import java.io.IOException; 41 import java.io.StringReader; 42 } 43 @members { 44 public GrammarInfo grammarInfo; 45 public gUnitParser(TokenStream input, GrammarInfo grammarInfo) { 46 super(input); 47 this.grammarInfo = grammarInfo; 48 } 49 } 50 51 gUnitDef: 'gunit' g1=id ('walks' g2=id)? ';' 52 { 53 if ( $g2.text!=null ) { 54 grammarInfo.setGrammarName($g2.text); 55 grammarInfo.setTreeGrammarName($g1.text); 56 } 57 else { 58 grammarInfo.setGrammarName($g1.text); 59 } 60 } 61 optionsSpec? header? testsuite* 62 ; 63 64 optionsSpec 65 : OPTIONS (option ';')+ '}' 66 ; 67 68 // Note: currently, this is the only valid option for setting customized tree adaptor 69 option : id '=' treeAdaptor 70 { 71 if ( $id.text.equals("TreeAdaptor") ) { 72 grammarInfo.setAdaptor($treeAdaptor.text); 73 } 74 // TODO: need a better error logging strategy 75 else System.err.println("Invalid option detected: "+$text); 76 } 77 ; 78 79 treeAdaptor 80 : id EXT* 81 ; 82 83 header : '@header' ACTION 84 { 85 int pos1, pos2; 86 if ( (pos1=$ACTION.text.indexOf("package"))!=-1 && (pos2=$ACTION.text.indexOf(';'))!=-1 ) { 87 grammarInfo.setGrammarPackage($ACTION.text.substring(pos1+8, pos2).trim()); // substring the package path 88 } 89 else { 90 System.err.println("error(line "+$ACTION.getLine()+"): invalid header"); 91 } 92 } 93 ; 94 95 testsuite // gUnit test suite based on individual rule 96 scope { 97 boolean isLexicalRule; 98 } 99 @init { 100 gUnitTestSuite ts = null; 101 $testsuite::isLexicalRule = false; 102 } 103 : ( r1=RULE_REF ('walks' r2=RULE_REF)? 104 { 105 if ( $r2==null ) ts = new gUnitTestSuite($r1.text); 106 else ts = new gUnitTestSuite($r1.text, $r2.text); 107 } 108 | t=TOKEN_REF 109 { 110 ts = new gUnitTestSuite(); 111 ts.setLexicalRuleName($t.text); 112 $testsuite::isLexicalRule = true; 113 } 114 ) 115 ':' 116 testcase[ts]+ {grammarInfo.addRuleTestSuite(ts);} 117 ; 118 119 // TODO : currently gUnit just ignores illegal test for lexer rule, but should also emit a reminding message 120 testcase[gUnitTestSuite ts] // individual test within a (rule)testsuite 121 : input expect {$ts.addTestCase($input.in, $expect.out);} 122 ; 123 124 input returns [gUnitTestInput in] 125 @init { 126 String testInput = null; 127 boolean inputIsFile = false; 128 int line = -1; 129 } 130 @after { 131 in = new gUnitTestInput(testInput, inputIsFile, line); 132 } 133 : STRING 134 { 135 testInput = $STRING.text.replace("\\n", "\n").replace("\\r", "\r").replace("\\t", "\t") 136 .replace("\\b", "\b").replace("\\f", "\f").replace("\\\"", "\"").replace("\\'", "\'").replace("\\\\", "\\"); 137 line = $STRING.line; 138 } 139 | ML_STRING 140 { 141 testInput = $ML_STRING.text; 142 line = $ML_STRING.line; 143 } 144 | file 145 { 146 testInput = $file.text; 147 inputIsFile = true; 148 line = $file.line; 149 } 150 ; 151 152 expect returns [AbstractTest out] 153 : OK {$out = new BooleanTest(true);} 154 | FAIL {$out = new BooleanTest(false);} 155 | 'returns' RETVAL {if ( !$testsuite::isLexicalRule ) $out = new ReturnTest($RETVAL);} 156 | '->' output {if ( !$testsuite::isLexicalRule ) $out = new OutputTest($output.token);} 157 ; 158 159 output returns [Token token] 160 : STRING 161 { 162 $STRING.setText($STRING.text.replace("\\n", "\n").replace("\\r", "\r").replace("\\t", "\t") 163 .replace("\\b", "\b").replace("\\f", "\f").replace("\\\"", "\"").replace("\\'", "\'").replace("\\\\", "\\")); 164 $token = $STRING; 165 } 166 | ML_STRING {$token = $ML_STRING;} 167 | AST {$token = $AST;} 168 | ACTION {$token = $ACTION;} 169 ; 170 171 file returns [int line] 172 : id EXT? {$line = $id.line;} 173 ; 174 175 id returns [int line] 176 : TOKEN_REF {$line = $TOKEN_REF.line;} 177 | RULE_REF {$line = $RULE_REF.line;} 178 ; 179 180 // L E X I C A L R U L E S 181 182 SL_COMMENT 183 : '//' ~('\r'|'\n')* '\r'? '\n' {$channel=HIDDEN;} 184 ; 185 186 ML_COMMENT 187 : '/*' {$channel=HIDDEN;} .* '*/' 188 ; 189 190 STRING : '"' ( ESC | ~('\\'|'"') )* '"' {setText(getText().substring(1, getText().length()-1));} 191 ; 192 193 ML_STRING 194 : {// we need to determine the number of spaces or tabs (indentation) for multi-line input 195 StringBuffer buf = new StringBuffer(); 196 int i = -1; 197 int c = input.LA(-1); 198 while ( c==' ' || c=='\t' ) { 199 buf.append((char)c); 200 c = input.LA(--i); 201 } 202 String indentation = buf.reverse().toString(); 203 } 204 '<<' .* '>>' 205 {// also determine the appropriate newline separator and get info of the first and last 2 characters (exclude '<<' and '>>') 206 String newline = System.getProperty("line.separator"); 207 String front, end; 208 int oldFrontIndex = 2; 209 int oldEndIndex = getText().length()-2; 210 int newFrontIndex, newEndIndex; 211 if ( newline.length()==1 ) { 212 front = getText().substring(2, 3); 213 end = getText().substring(getText().length()-3, getText().length()-2); 214 newFrontIndex = 3; 215 newEndIndex = getText().length()-3; 216 } 217 else {// must be 2, e.g. Windows System which uses \r\n as a line separator 218 front = getText().substring(2, 4); 219 end = getText().substring(getText().length()-4, getText().length()-2); 220 newFrontIndex = 4; 221 newEndIndex = getText().length()-4; 222 } 223 // strip unwanted characters, e.g. '<<' (including a newline after it) or '>>' (including a newline before it) 224 String temp = null; 225 if ( front.equals(newline) && end.equals(newline) ) { 226 // need to handle the special case: <<\n>> or <<\r\n>> 227 if ( newline.length()==1 && getText().length()==5 ) temp = ""; 228 else if ( newline.length()==2 && getText().length()==6 ) temp = ""; 229 else temp = getText().substring(newFrontIndex, newEndIndex); 230 } 231 else if ( front.equals(newline) ) { 232 temp = getText().substring(newFrontIndex, oldEndIndex); 233 } 234 else if ( end.equals(newline) ) { 235 temp = getText().substring(oldFrontIndex, newEndIndex); 236 } 237 else { 238 temp = getText().substring(oldFrontIndex, oldEndIndex); 239 } 240 // finally we need to prpcess the indentation line by line 241 BufferedReader bufReader = new BufferedReader(new StringReader(temp)); 242 buf = new StringBuffer(); 243 String line = null; 244 int count = 0; 245 try { 246 while((line = bufReader.readLine()) != null) { 247 if ( line.startsWith(indentation) ) line = line.substring(indentation.length()); 248 if ( count>0 ) buf.append(newline); 249 buf.append(line); 250 count++; 251 } 252 setText(buf.toString()); 253 } 254 catch (IOException ioe) { 255 setText(temp); 256 } 257 } 258 ; 259 260 TOKEN_REF 261 : 'A'..'Z' ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* 262 ; 263 264 RULE_REF 265 : 'a'..'z' ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* 266 ; 267 268 EXT : '.'('a'..'z'|'A'..'Z'|'0'..'9')+; 269 270 RETVAL : NESTED_RETVAL {setText(getText().substring(1, getText().length()-1));} 271 ; 272 273 fragment 274 NESTED_RETVAL : 275 '[' 276 ( options {greedy=false;} 277 : NESTED_RETVAL 278 | . 279 )* 280 ']' 281 ; 282 283 AST : NESTED_AST (' '? NESTED_AST)*; 284 285 fragment 286 NESTED_AST : 287 '(' 288 ( NESTED_AST 289 | STRING_LITERAL 290 | ~('('|')'|'"') 291 )* 292 ')' 293 ; 294 295 OPTIONS : 'options' WS* '{' 296 ; 297 298 ACTION 299 : NESTED_ACTION {setText(getText().substring(1, getText().length()-1));} 300 ; 301 302 fragment 303 NESTED_ACTION : 304 '{' 305 ( options {greedy=false; k=3;} 306 : NESTED_ACTION 307 | STRING_LITERAL 308 | CHAR_LITERAL 309 | . 310 )* 311 '}' 312 ; 313 314 fragment 315 CHAR_LITERAL 316 : '\'' ( ESC | ~('\''|'\\') ) '\'' 317 ; 318 319 fragment 320 STRING_LITERAL 321 : '"' ( ESC | ~('\\'|'"') )* '"' 322 ; 323 324 fragment 325 ESC : '\\' 326 ( 'n' 327 | 'r' 328 | 't' 329 | 'b' 330 | 'f' 331 | '"' 332 | '\'' 333 | '\\' 334 | '>' 335 | 'u' XDIGIT XDIGIT XDIGIT XDIGIT 336 | . // unknown, leave as it is 337 ) 338 ; 339 340 fragment 341 XDIGIT : 342 '0' .. '9' 343 | 'a' .. 'f' 344 | 'A' .. 'F' 345 ; 346 347 WS : ( ' ' 348 | '\t' 349 | '\r'? '\n' 350 )+ 351 {$channel=HIDDEN;} 352 ; 353