1 #!/usr/bin/ruby 2 # encoding: utf-8 3 4 require 'antlr3' 5 require 'set' 6 require 'rake' 7 require 'rake/tasklib' 8 require 'shellwords' 9 10 module ANTLR3 11 12 =begin rdoc ANTLR3::CompileTask 13 14 A rake task-generating utility concerning ANTLR grammar file 15 compilation. This is a general utility -- the grammars do 16 not have to be targetted for Ruby output; it handles all 17 known ANTLR language targets. 18 19 require 'antlr3/task' 20 21 ANTLR3::CompileTask.define( 22 :name => 'grammars', :output_directory => 'lib/parsers' 23 ) do | t | 24 t.grammar_set( 'antlr/MainParser.g', 'antlr/MainTree.g' ) 25 26 t.grammar_set( 'antlr/Template.g' ) do | gram | 27 gram.output_directory = 'lib/parsers/template' 28 gram.debug = true 29 end 30 end 31 32 33 TODO: finish documentation 34 35 =end 36 37 class CompileTask < Rake::TaskLib 38 attr_reader :grammar_sets, :options 39 attr_accessor :name 40 41 def self.define( *grammar_files ) 42 lib = new( *grammar_files ) 43 block_given? and yield( lib ) 44 lib.define 45 return( lib ) 46 end 47 48 def initialize( *grammar_files ) 49 grammar_files = [ grammar_files ].flatten! 50 options = Hash === grammar_files.last ? grammar_files.pop : {} 51 @grammar_sets = [] 52 @name = options.fetch( :name, 'antlr-grammars' ) 53 @options = options 54 @namespace = Rake.application.current_scope 55 grammar_files.empty? or grammar_set( grammar_files ) 56 end 57 58 def target_files 59 @grammar_sets.inject( [] ) do | list, set | 60 list.concat( set.target_files ) 61 end 62 end 63 64 def grammar_set( *grammar_files ) 65 grammar_files = [ grammar_files ].flatten! 66 options = @options.merge( 67 Hash === grammar_files.last ? grammar_files.pop : {} 68 ) 69 set = GrammarSet.new( grammar_files, options ) 70 block_given? and yield( set ) 71 @grammar_sets << set 72 return( set ) 73 end 74 75 def compile_task 76 full_name = ( @namespace + [ @name, 'compile' ] ).join( ':' ) 77 Rake::Task[ full_name ] 78 end 79 80 def compile! 81 compile_task.invoke 82 end 83 84 def clobber_task 85 full_name = ( @namespace + [ @name, 'clobber' ] ).join( ':' ) 86 Rake::Task[ full_name ] 87 end 88 89 def clobber! 90 clobber_task.invoke 91 end 92 93 def define 94 namespace( @name ) do 95 desc( "trash all ANTLR-generated source code" ) 96 task( 'clobber' ) do 97 for set in @grammar_sets 98 set.clean 99 end 100 end 101 102 for set in @grammar_sets 103 set.define_tasks 104 end 105 106 desc( "compile ANTLR grammars" ) 107 task( 'compile' => target_files ) 108 end 109 end 110 111 112 #class CompileTask::GrammarSet 113 class GrammarSet 114 attr_accessor :antlr_jar, :debug, 115 :trace, :profile, :compile_options, 116 :java_options 117 attr_reader :load_path, :grammars 118 attr_writer :output_directory 119 120 def initialize( grammar_files, options = {} ) 121 @load_path = grammar_files.map { | f | File.dirname( f ) } 122 @load_path.push( '.', @output_directory ) 123 124 if extra_load = options[ :load_path ] 125 extra_load = [ extra_load ].flatten 126 @load_path.unshift( extra_load ) 127 end 128 @load_path.uniq! 129 130 @grammars = grammar_files.map do | file | 131 GrammarFile.new( self, file ) 132 end 133 @output_directory = '.' 134 dir = options[ :output_directory ] and @output_directory = dir.to_s 135 136 @antlr_jar = options.fetch( :antlr_jar, ANTLR3.antlr_jar ) 137 @debug = options.fetch( :debug, false ) 138 @trace = options.fetch( :trace, false ) 139 @profile = options.fetch( :profile, false ) 140 @compile_options = 141 case opts = options[ :compile_options ] 142 when Array then opts 143 else Shellwords.shellwords( opts.to_s ) 144 end 145 @java_options = 146 case opts = options[ :java_options ] 147 when Array then opts 148 else Shellwords.shellwords( opts.to_s ) 149 end 150 end 151 152 def target_files 153 @grammars.map { | gram | gram.target_files }.flatten 154 end 155 156 def output_directory 157 @output_directory || '.' 158 end 159 160 def define_tasks 161 file( @antlr_jar ) 162 163 for grammar in @grammars 164 deps = [ @antlr_jar ] 165 if vocab = grammar.token_vocab and 166 tfile = find_tokens_file( vocab, grammar ) 167 file( tfile ) 168 deps << tfile 169 end 170 grammar.define_tasks( deps ) 171 end 172 end 173 174 def clean 175 for grammar in @grammars 176 grammar.clean 177 end 178 if test( ?d, output_directory ) and ( Dir.entries( output_directory ) - %w( . .. ) ).empty? 179 rmdir( output_directory ) 180 end 181 end 182 183 def find_tokens_file( vocab, grammar ) 184 gram = @grammars.find { | gram | gram.name == vocab } and 185 return( gram.tokens_file ) 186 file = locate( "#{ vocab }.tokens" ) and return( file ) 187 warn( Util.tidy( <<-END, true ) ) 188 | unable to locate .tokens file `#{ vocab }' referenced in #{ grammar.path } 189 | -- ignoring dependency 190 END 191 return( nil ) 192 end 193 194 def locate( file_name ) 195 dir = @load_path.find do | dir | 196 File.file?( File.join( dir, file_name ) ) 197 end 198 dir and return( File.join( dir, file_name ) ) 199 end 200 201 def compile( grammar ) 202 dir = output_directory 203 test( ?d, dir ) or FileUtils.mkpath( dir ) 204 sh( build_command( grammar ) ) 205 end 206 207 def build_command( grammar ) 208 parts = [ 'java', '-cp', @antlr_jar ] 209 parts.concat( @java_options ) 210 parts << 'org.antlr.Tool' << '-fo' << output_directory 211 parts << '-debug' if @debug 212 parts << '-profile' if @profile 213 parts << '-trace' if @trace 214 parts.concat( @compile_options ) 215 parts << grammar.path 216 return parts.map! { | t | escape( t ) }.join( ' ' ) 217 end 218 219 def escape( token ) 220 token = token.to_s.dup 221 token.empty? and return( %('') ) 222 token.gsub!( /([^A-Za-z0-9_\-.,:\/@\n])/n, "\\\\\\1" ) 223 token.gsub!( /\n/, "'\n'" ) 224 return( token ) 225 end 226 227 end 228 229 class GrammarFile 230 LANGUAGES = { 231 "ActionScript" => [ ".as" ], 232 "CSharp2" => [ ".cs" ], 233 "C" => [ ".c", ".h" ], 234 "ObjC" => [ ".m", ".h" ], 235 "CSharp3" => [ ".cs" ], 236 "Cpp" => [ ".cpp", ".h" ], 237 "Ruby" => [ ".rb" ], 238 "Java" => [ ".java" ], 239 "JavaScript" => [ ".js" ], 240 "Python" => [ ".py" ], 241 "Delphi" => [ ".pas" ], 242 "Perl5" => [ ".pm" ] 243 }.freeze 244 GRAMMAR_TYPES = %w(lexer parser tree combined) 245 246 ################################################################## 247 ######## CONSTRUCTOR ############################################# 248 ################################################################## 249 250 def initialize( group, path, options = {} ) 251 @group = group 252 @path = path.to_s 253 @imports = [] 254 @language = 'Java' 255 @token_vocab = nil 256 @tasks_defined = false 257 @extra_dependencies = [] 258 if extra = options[ :extra_dependencies ] 259 extra = [ extra ].flatten 260 @extra_dependencies.concat( extra ) 261 end 262 263 study 264 yield( self ) if block_given? 265 fetch_imports 266 end 267 268 ################################################################## 269 ######## ATTRIBUTES AND ATTRIBUTE-ISH METHODS #################### 270 ################################################################## 271 attr_reader :type, :name, :language, :source, 272 :token_vocab, :imports, :imported_grammars, 273 :path, :group 274 275 for attr in [ :output_directory, :load_path, :antlr_jar ] 276 class_eval( <<-END ) 277 def #{ attr } 278 @group.#{ attr } 279 end 280 END 281 end 282 283 def lexer_files 284 if lexer? then base = @name 285 elsif combined? then base = @name + 'Lexer' 286 else return( [] ) 287 end 288 return( file_names( base ) ) 289 end 290 291 def parser_files 292 if parser? then base = @name 293 elsif combined? then base = @name + 'Parser' 294 else return( [] ) 295 end 296 return( file_names( base ) ) 297 end 298 299 def tree_parser_files 300 return( tree? ? file_names( @name ) : [] ) 301 end 302 303 def file_names( base ) 304 LANGUAGES.fetch( @language ).map do | ext | 305 File.join( output_directory, base + ext ) 306 end 307 end 308 309 for type in GRAMMAR_TYPES 310 class_eval( <<-END ) 311 def #{ type }? 312 @type == #{ type.inspect } 313 end 314 END 315 end 316 317 def delegate_files( delegate_suffix ) 318 file_names( "#{ name }_#{ delegate_suffix }" ) 319 end 320 321 def tokens_file 322 File.join( output_directory, name + '.tokens' ) 323 end 324 325 def target_files( all = true ) 326 targets = [ tokens_file ] 327 328 for target_type in %w( lexer parser tree_parser ) 329 for file in self.send( :"#{ target_type }_files" ) 330 targets << file 331 end 332 end 333 334 if all 335 for grammar in @imported_grammars 336 targets.concat( grammar.target_files ) 337 end 338 end 339 340 return targets 341 end 342 343 def update 344 touch( @path ) 345 end 346 347 def all_imported_files 348 imported_files = [] 349 for grammar in @imported_grammars 350 imported_files.push( grammar.path, *grammar.all_imported_files ) 351 end 352 return imported_files 353 end 354 355 def clean 356 deleted = [] 357 for target in target_files 358 if test( ?f, target ) 359 rm( target ) 360 deleted << target 361 end 362 end 363 364 for grammar in @imported_grammars 365 deleted.concat( grammar.clean ) 366 end 367 368 return deleted 369 end 370 371 def define_tasks( shared_depends ) 372 unless @tasks_defined 373 depends = [ @path, *all_imported_files ] 374 for f in depends 375 file( f ) 376 end 377 depends = shared_depends + depends 378 379 target_files.each do | target | 380 file( target => ( depends - [ target ] ) ) do # prevents recursive .tokens file dependencies 381 @group.compile( self ) 382 end 383 end 384 385 @tasks_defined = true 386 end 387 end 388 389 private 390 391 def fetch_imports 392 @imported_grammars = @imports.map do | imp | 393 file = group.locate( "#{ imp }.g" ) or raise( Util.tidy( <<-END ) ) 394 | #{ @path }: unable to locate imported grammar file #{ imp }.g 395 | search directories ( @load_path ): 396 | - #{ load_path.join( "\n - " ) } 397 END 398 Imported.new( self, file ) 399 end 400 end 401 402 def study 403 @source = File.read( @path ) 404 @source =~ /^\s*(lexer|parser|tree)?\s*grammar\s*(\S+)\s*;/ or 405 raise Grammar::FormatError[ @source, @path ] 406 @name = $2 407 @type = $1 || 'combined' 408 if @source =~ /^\s*options\s*\{(.*?)\}/m 409 option_block = $1 410 if option_block =~ /\s*language\s*=\s*(\S+)\s*;/ 411 @language = $1 412 LANGUAGES.has_key?( @language ) or 413 raise( Grammar::FormatError, "Unknown ANTLR target language: %p" % @language ) 414 end 415 option_block =~ /\s*tokenVocab\s*=\s*(\S+)\s*;/ and 416 @token_vocab = $1 417 end 418 419 @source.scan( /^\s*import\s+(\w+\s*(?:,\s*\w+\s*)*);/ ) do 420 list = $1.strip 421 @imports.concat( list.split( /\s*,\s*/ ) ) 422 end 423 end 424 end # class Grammar 425 426 class GrammarFile::Imported < GrammarFile 427 def initialize( owner, path ) 428 @owner = owner 429 @path = path.to_s 430 @imports = [] 431 @language = 'Java' 432 @token_vocab = nil 433 study 434 fetch_imports 435 end 436 437 for attr in [ :load_path, :output_directory, :antlr_jar, :verbose, :group ] 438 class_eval( <<-END ) 439 def #{ attr } 440 @owner.#{ attr } 441 end 442 END 443 end 444 445 def delegate_files( suffix ) 446 @owner.delegate_files( "#{ @name }_#{ suffix }" ) 447 end 448 449 def target_files 450 targets = [ tokens_file ] 451 targets.concat( @owner.delegate_files( @name ) ) 452 return( targets ) 453 end 454 end 455 456 class GrammarFile::FormatError < StandardError 457 attr_reader :file, :source 458 459 def self.[]( *args ) 460 new( *args ) 461 end 462 463 def initialize( source, file = nil ) 464 @file = file 465 @source = source 466 message = '' 467 if file.nil? # inline 468 message << "bad inline grammar source:\n" 469 message << ( "-" * 80 ) << "\n" 470 message << @source 471 message[ -1 ] == ?\n or message << "\n" 472 message << ( "-" * 80 ) << "\n" 473 message << "could not locate a grammar name and type declaration matching\n" 474 message << "/^\s*(lexer|parser|tree)?\s*grammar\s*(\S+)\s*;/" 475 else 476 message << 'bad grammar source in file %p\n' % @file 477 message << ( "-" * 80 ) << "\n" 478 message << @source 479 message[ -1 ] == ?\n or message << "\n" 480 message << ( "-" * 80 ) << "\n" 481 message << "could not locate a grammar name and type declaration matching\n" 482 message << "/^\s*(lexer|parser|tree)?\s*grammar\s*(\S+)\s*;/" 483 end 484 super( message ) 485 end 486 end # error Grammar::FormatError 487 end # class CompileTask 488 end # module ANTLR3 489