Home | History | Annotate | Download | only in antlr3
      1 #!/usr/bin/ruby
      2 # encoding: utf-8
      3 
      4 require 'antlr3'
      5 require 'set'
      6 require 'rake'
      7 require 'rake/tasklib'
      8 require 'shellwords'
      9 
     10 module ANTLR3
     11 
     12 =begin rdoc ANTLR3::CompileTask
     13 
     14 A rake task-generating utility concerning ANTLR grammar file
     15 compilation. This is a general utility -- the grammars do
     16 not have to be targetted for Ruby output; it handles all
     17 known ANTLR language targets.
     18 
     19   require 'antlr3/task'
     20   
     21   ANTLR3::CompileTask.define(
     22     :name => 'grammars', :output_directory => 'lib/parsers'
     23   ) do | t |
     24     t.grammar_set( 'antlr/MainParser.g', 'antlr/MainTree.g' )
     25     
     26     t.grammar_set( 'antlr/Template.g' ) do | gram |
     27       gram.output_directory = 'lib/parsers/template'
     28       gram.debug = true
     29     end
     30   end
     31   
     32 
     33 TODO: finish documentation
     34 
     35 =end
     36 
     37 class CompileTask < Rake::TaskLib
     38   attr_reader :grammar_sets, :options
     39   attr_accessor :name
     40   
     41   def self.define( *grammar_files )
     42     lib = new( *grammar_files )
     43     block_given? and yield( lib )
     44     lib.define
     45     return( lib )
     46   end
     47   
     48   def initialize( *grammar_files )
     49     grammar_files = [ grammar_files ].flatten!
     50     options = Hash === grammar_files.last ? grammar_files.pop : {}
     51     @grammar_sets = []
     52     @name = options.fetch( :name, 'antlr-grammars' )
     53     @options = options
     54     @namespace = Rake.application.current_scope
     55     grammar_files.empty? or grammar_set( grammar_files )
     56   end
     57   
     58   def target_files
     59     @grammar_sets.inject( [] ) do | list, set |
     60       list.concat( set.target_files )
     61     end
     62   end
     63   
     64   def grammar_set( *grammar_files )
     65     grammar_files = [ grammar_files ].flatten!
     66     options = @options.merge( 
     67       Hash === grammar_files.last ? grammar_files.pop : {}
     68     )
     69     set = GrammarSet.new( grammar_files, options )
     70     block_given? and yield( set )
     71     @grammar_sets << set
     72     return( set )
     73   end
     74   
     75   def compile_task
     76     full_name = ( @namespace + [ @name, 'compile' ] ).join( ':' )
     77     Rake::Task[ full_name ]
     78   end
     79   
     80   def compile!
     81     compile_task.invoke
     82   end
     83   
     84   def clobber_task
     85     full_name = ( @namespace + [ @name, 'clobber' ] ).join( ':' )
     86     Rake::Task[ full_name ]
     87   end
     88   
     89   def clobber!
     90     clobber_task.invoke
     91   end
     92   
     93   def define
     94     namespace( @name ) do
     95       desc( "trash all ANTLR-generated source code" )
     96       task( 'clobber' ) do
     97         for set in @grammar_sets
     98           set.clean
     99         end
    100       end
    101       
    102       for set in @grammar_sets
    103         set.define_tasks
    104       end
    105       
    106       desc( "compile ANTLR grammars" )
    107       task( 'compile' => target_files )
    108     end
    109   end
    110   
    111 
    112 #class CompileTask::GrammarSet
    113 class GrammarSet
    114   attr_accessor :antlr_jar, :debug,
    115                 :trace, :profile, :compile_options,
    116                 :java_options
    117   attr_reader :load_path, :grammars
    118   attr_writer :output_directory
    119   
    120   def initialize( grammar_files, options = {} )
    121     @load_path = grammar_files.map { | f | File.dirname( f ) }
    122     @load_path.push( '.', @output_directory )
    123     
    124     if extra_load = options[ :load_path ]
    125       extra_load = [ extra_load ].flatten
    126       @load_path.unshift( extra_load )
    127     end
    128     @load_path.uniq!
    129     
    130     @grammars = grammar_files.map do | file |
    131       GrammarFile.new( self, file )
    132     end
    133     @output_directory = '.'
    134     dir = options[ :output_directory ] and @output_directory = dir.to_s
    135     
    136     @antlr_jar = options.fetch( :antlr_jar, ANTLR3.antlr_jar )
    137     @debug = options.fetch( :debug, false )
    138     @trace = options.fetch( :trace, false )
    139     @profile = options.fetch( :profile, false )
    140     @compile_options =
    141       case opts = options[ :compile_options ]
    142       when Array then opts
    143       else Shellwords.shellwords( opts.to_s )
    144       end
    145     @java_options =
    146       case opts = options[ :java_options ]
    147       when Array then opts
    148       else Shellwords.shellwords( opts.to_s )
    149       end
    150   end
    151   
    152   def target_files
    153     @grammars.map { | gram | gram.target_files }.flatten
    154   end
    155   
    156   def output_directory
    157     @output_directory || '.'
    158   end
    159   
    160   def define_tasks
    161     file( @antlr_jar )
    162     
    163     for grammar in @grammars
    164       deps = [ @antlr_jar ]
    165       if  vocab = grammar.token_vocab and
    166           tfile = find_tokens_file( vocab, grammar )
    167         file( tfile )
    168         deps << tfile
    169       end
    170       grammar.define_tasks( deps )
    171     end
    172   end
    173   
    174   def clean
    175     for grammar in @grammars
    176       grammar.clean
    177     end
    178     if test( ?d, output_directory ) and ( Dir.entries( output_directory ) - %w( . .. ) ).empty?
    179       rmdir( output_directory )
    180     end
    181   end
    182   
    183   def find_tokens_file( vocab, grammar )
    184     gram = @grammars.find { | gram | gram.name == vocab } and
    185       return( gram.tokens_file )
    186     file = locate( "#{ vocab }.tokens" ) and return( file )
    187     warn( Util.tidy( <<-END, true ) )
    188     | unable to locate .tokens file `#{ vocab }' referenced in #{ grammar.path }
    189     | -- ignoring dependency
    190     END
    191     return( nil )
    192   end
    193   
    194   def locate( file_name )
    195     dir = @load_path.find do | dir |
    196       File.file?( File.join( dir, file_name ) )
    197     end
    198     dir and return( File.join( dir, file_name ) )
    199   end
    200   
    201   def compile( grammar )
    202     dir = output_directory
    203     test( ?d, dir ) or FileUtils.mkpath( dir )
    204     sh( build_command( grammar ) )
    205   end
    206   
    207   def build_command( grammar )
    208     parts = [ 'java', '-cp', @antlr_jar ]
    209     parts.concat( @java_options )
    210     parts << 'org.antlr.Tool' << '-fo' << output_directory
    211     parts << '-debug' if @debug
    212     parts << '-profile' if @profile
    213     parts << '-trace' if @trace
    214     parts.concat( @compile_options )
    215     parts << grammar.path
    216     return parts.map! { | t | escape( t ) }.join( ' ' )
    217   end
    218   
    219   def escape( token )
    220     token = token.to_s.dup
    221     token.empty? and return( %('') )
    222     token.gsub!( /([^A-Za-z0-9_\-.,:\/@\n])/n, "\\\\\\1" )
    223     token.gsub!( /\n/, "'\n'" )
    224     return( token )
    225   end
    226   
    227 end
    228 
    229 class GrammarFile
    230   LANGUAGES = { 
    231     "ActionScript" => [ ".as" ],
    232     "CSharp2" => [ ".cs" ],
    233     "C" => [ ".c", ".h" ],
    234     "ObjC" => [ ".m", ".h" ],
    235     "CSharp3" => [ ".cs" ],
    236     "Cpp" => [ ".cpp", ".h" ],
    237     "Ruby" => [ ".rb" ],
    238     "Java" => [ ".java" ],
    239     "JavaScript" => [ ".js" ],
    240     "Python" => [ ".py" ],
    241     "Delphi" => [ ".pas" ],
    242     "Perl5" => [ ".pm" ]
    243   }.freeze
    244   GRAMMAR_TYPES = %w(lexer parser tree combined)
    245   
    246   ##################################################################
    247   ######## CONSTRUCTOR #############################################
    248   ##################################################################
    249   
    250   def initialize( group, path, options = {} )
    251     @group = group
    252     @path = path.to_s
    253     @imports = []
    254     @language = 'Java'
    255     @token_vocab = nil
    256     @tasks_defined = false
    257     @extra_dependencies = []
    258     if extra = options[ :extra_dependencies ]
    259       extra = [ extra ].flatten
    260       @extra_dependencies.concat( extra )
    261     end
    262     
    263     study
    264     yield( self ) if block_given?
    265     fetch_imports
    266   end
    267   
    268   ##################################################################
    269   ######## ATTRIBUTES AND ATTRIBUTE-ISH METHODS ####################
    270   ##################################################################
    271   attr_reader :type, :name, :language, :source,
    272               :token_vocab, :imports, :imported_grammars,
    273               :path, :group
    274   
    275   for attr in [ :output_directory, :load_path, :antlr_jar ]
    276     class_eval( <<-END )
    277       def #{ attr }
    278         @group.#{ attr }
    279       end
    280     END
    281   end
    282   
    283   def lexer_files
    284     if lexer? then base = @name
    285     elsif combined? then base = @name + 'Lexer'
    286     else return( [] )
    287     end
    288     return( file_names( base ) )
    289   end
    290   
    291   def parser_files
    292     if parser? then base = @name
    293     elsif combined? then base = @name + 'Parser'
    294     else return( [] )
    295     end
    296     return( file_names( base ) )
    297   end
    298   
    299   def tree_parser_files
    300     return( tree? ? file_names( @name ) : [] )
    301   end
    302   
    303   def file_names( base )
    304     LANGUAGES.fetch( @language ).map do | ext |
    305       File.join( output_directory, base + ext )
    306     end
    307   end
    308   
    309   for type in GRAMMAR_TYPES
    310     class_eval( <<-END )
    311       def #{ type }?
    312         @type == #{ type.inspect }
    313       end
    314     END
    315   end
    316   
    317   def delegate_files( delegate_suffix )
    318     file_names( "#{ name }_#{ delegate_suffix }" )
    319   end
    320   
    321   def tokens_file
    322     File.join( output_directory, name + '.tokens' )
    323   end
    324   
    325   def target_files( all = true )
    326     targets = [ tokens_file ]
    327     
    328     for target_type in %w( lexer parser tree_parser )
    329       for file in self.send( :"#{ target_type }_files" )
    330         targets << file
    331       end
    332     end
    333     
    334     if all
    335       for grammar in @imported_grammars
    336         targets.concat( grammar.target_files )
    337       end
    338     end
    339     
    340     return targets
    341   end
    342   
    343   def update
    344     touch( @path )
    345   end
    346   
    347   def all_imported_files
    348     imported_files = []
    349     for grammar in @imported_grammars
    350       imported_files.push( grammar.path, *grammar.all_imported_files )
    351     end
    352     return imported_files
    353   end
    354   
    355   def clean
    356     deleted = []
    357     for target in target_files
    358       if test( ?f, target )
    359         rm( target )
    360         deleted << target
    361       end
    362     end
    363     
    364     for grammar in @imported_grammars
    365       deleted.concat( grammar.clean )
    366     end
    367     
    368     return deleted
    369   end
    370   
    371   def define_tasks( shared_depends )
    372     unless @tasks_defined
    373       depends = [ @path, *all_imported_files ]
    374       for f in depends
    375         file( f )
    376       end
    377       depends = shared_depends + depends
    378       
    379       target_files.each do | target |
    380         file( target => ( depends - [ target ] ) ) do   # prevents recursive .tokens file dependencies
    381           @group.compile( self )
    382         end
    383       end
    384       
    385       @tasks_defined = true
    386     end
    387   end
    388   
    389 private
    390   
    391   def fetch_imports
    392     @imported_grammars = @imports.map do | imp |
    393       file = group.locate( "#{ imp }.g" ) or raise( Util.tidy( <<-END ) )
    394       | #{ @path }: unable to locate imported grammar file #{ imp }.g
    395       | search directories ( @load_path ):
    396       |   - #{ load_path.join( "\n  - " ) }
    397       END
    398       Imported.new( self, file )
    399     end
    400   end
    401   
    402   def study
    403     @source = File.read( @path )
    404     @source =~ /^\s*(lexer|parser|tree)?\s*grammar\s*(\S+)\s*;/ or
    405       raise Grammar::FormatError[ @source, @path ]
    406     @name = $2
    407     @type = $1 || 'combined'
    408     if @source =~ /^\s*options\s*\{(.*?)\}/m
    409       option_block = $1
    410       if option_block =~ /\s*language\s*=\s*(\S+)\s*;/
    411         @language = $1
    412         LANGUAGES.has_key?( @language ) or
    413           raise( Grammar::FormatError, "Unknown ANTLR target language: %p" % @language )
    414       end
    415       option_block =~ /\s*tokenVocab\s*=\s*(\S+)\s*;/ and
    416         @token_vocab = $1
    417     end
    418     
    419     @source.scan( /^\s*import\s+(\w+\s*(?:,\s*\w+\s*)*);/ ) do
    420       list = $1.strip
    421       @imports.concat( list.split( /\s*,\s*/ ) )
    422     end
    423   end
    424 end # class Grammar
    425 
    426 class GrammarFile::Imported < GrammarFile
    427   def initialize( owner, path )
    428     @owner = owner
    429     @path = path.to_s
    430     @imports = []
    431     @language = 'Java'
    432     @token_vocab = nil
    433     study
    434     fetch_imports
    435   end
    436   
    437   for attr in [ :load_path, :output_directory, :antlr_jar, :verbose, :group ]
    438     class_eval( <<-END )
    439       def #{ attr }
    440         @owner.#{ attr }
    441       end
    442     END
    443   end
    444   
    445   def delegate_files( suffix )
    446     @owner.delegate_files( "#{ @name }_#{ suffix }" )
    447   end
    448   
    449   def target_files
    450     targets = [ tokens_file ]
    451     targets.concat( @owner.delegate_files( @name ) )
    452     return( targets )
    453   end
    454 end
    455 
    456 class GrammarFile::FormatError < StandardError
    457   attr_reader :file, :source
    458   
    459   def self.[]( *args )
    460     new( *args )
    461   end
    462   
    463   def initialize( source, file = nil )
    464     @file = file
    465     @source = source
    466     message = ''
    467     if file.nil? # inline
    468       message << "bad inline grammar source:\n"
    469       message << ( "-" * 80 ) << "\n"
    470       message << @source
    471       message[ -1 ] == ?\n or message << "\n"
    472       message << ( "-" * 80 ) << "\n"
    473       message << "could not locate a grammar name and type declaration matching\n"
    474       message << "/^\s*(lexer|parser|tree)?\s*grammar\s*(\S+)\s*;/"
    475     else
    476       message << 'bad grammar source in file %p\n' % @file
    477       message << ( "-" * 80 ) << "\n"
    478       message << @source
    479       message[ -1 ] == ?\n or message << "\n"
    480       message << ( "-" * 80 ) << "\n"
    481       message << "could not locate a grammar name and type declaration matching\n"
    482       message << "/^\s*(lexer|parser|tree)?\s*grammar\s*(\S+)\s*;/"
    483     end
    484     super( message )
    485   end
    486 end # error Grammar::FormatError
    487 end # class CompileTask
    488 end # module ANTLR3
    489